Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: optimize loop boundaries in rope_hex_f32 for better performance
  • Loading branch information
chraac committed Nov 28, 2025
commit 0121291d5358ee27bf4e8bd96106020f4b56857c
13 changes: 3 additions & 10 deletions ggml/src/ggml-hexagon/htp/rope-ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,23 +282,16 @@ static void rope_hex_f32(struct rope_th_ctx * rope_ctx,
freq_factors = (const float *) src2->data;
}

int ir = 0;
const int32_t half_dims = rope_ctx->n_dims / 2;
const uint32_t i0_end = MIN(ir1, ne1);
const int32_t half_dims = rope_ctx->n_dims / 2;
for (uint32_t i3 = 0; i3 < ne3; i3++) { // batch
for (uint32_t i2 = 0; i2 < ne2; i2++) { // seq-len
const int32_t p = pos[i2];

rope_cache_init(p, rope_ctx->freq_scale, freq_factors, rope_ctx->corr_dims, ne0, rope_ctx->ext_factor,
rope_ctx->attn_factor, wp0, rope_ctx->theta_scale);

for (uint32_t i1 = 0; i1 < ne1; i1++) { // attn-heads
if (ir++ < ir0) {
continue;
}
if (ir > ir1) {
break;
}

for (uint32_t i1 = ir0; i1 < i0_end; i1++) { // attn-heads
const float * src = (float *) ((char *) src0->data + i3 * nb03 + i2 * nb02 + i1 * nb01);
float * dst_data = (float *) ((char *) dst->data + i3 * nb3 + i2 * nb2 + i1 * nb1);

Expand Down