Skip to content

Commit 0f07cac

Browse files
committed
ggml : fix q4_1 dot product types
1 parent c5d70f5 commit 0f07cac

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

ggml.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -2344,14 +2344,14 @@ static void ggml_vec_dot_q4_1(const int n, float * restrict s, const void * rest
23442344

23452345
#if defined(__ARM_FEATURE_DOTPROD)
23462346
// dot product into int32x4_t
2347-
int32x4_t p_0 = vdotq_s32(vdupq_n_s32(0), v0_0l, v1_0l);
2348-
int32x4_t p_1 = vdotq_s32(vdupq_n_s32(0), v0_1l, v1_1l);
2347+
uint32x4_t p_0 = vdotq_u32(vdupq_n_u32(0), v0_0l, v1_0l);
2348+
uint32x4_t p_1 = vdotq_u32(vdupq_n_u32(0), v0_1l, v1_1l);
23492349

2350-
p_0 = vdotq_s32(p_0, v0_0h, v1_0h);
2351-
p_1 = vdotq_s32(p_1, v0_1h, v1_1h);
2350+
p_0 = vdotq_u32(p_0, v0_0h, v1_0h);
2351+
p_1 = vdotq_u32(p_1, v0_1h, v1_1h);
23522352

2353-
sum11 += x0->d*y0->d*vaddvq_s32(p_0);
2354-
sum11 += x1->d*y1->d*vaddvq_s32(p_1);
2353+
sum11 += x0->d*y0->d*vaddvq_u32(p_0);
2354+
sum11 += x1->d*y1->d*vaddvq_u32(p_1);
23552355
#else
23562356
const uint16x8_t pl0l = vmull_u8(vget_low_u8 (v0_0l), vget_low_u8 (v1_0l));
23572357
const uint16x8_t pl0h = vmull_u8(vget_high_u8(v0_0l), vget_high_u8(v1_0l));

0 commit comments

Comments
 (0)