Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
0625382
model: add support for extra bufs for all devices
max-krasnyansky Jul 16, 2025
80dc8e8
hexagon: add experimental ggml-hexagon backend for the Hexagon NPU
max-krasnyansky Jun 23, 2025
ec4436f
hexagon: fix format checker errors
max-krasnyansky Oct 13, 2025
aa65f21
hexagon: update readme and cmake presets
max-krasnyansky Oct 14, 2025
647fa3d
ci: add android-ndk-build jobs that build plain ARM64 and Snapdragon …
max-krasnyansky Oct 15, 2025
da7caac
hexagon: add simple graph optimizer for stacking MUL_MAT ops with the…
max-krasnyansky Oct 14, 2025
bbbc8ea
hexagon: move ADB helper scripts into scripts/snapdragon/adb
max-krasnyansky Oct 15, 2025
cc7dbd4
hexagon: replace all f/printfs with GGML_LOG_...
max-krasnyansky Oct 16, 2025
69a8047
readme: add hexagon to the list supported backends
max-krasnyansky Oct 16, 2025
debdb3b
hexagon: stack malmuts with quantized inputs only
max-krasnyansky Oct 16, 2025
3475e29
hexagon: add TODO for fixing issues in hexagon_graph_optimize
max-krasnyansky Oct 17, 2025
1e750df
hexagon: update to hex-sdk 6.4.0 and add scripts for running on QDC
max-krasnyansky Oct 17, 2025
8e7d8b5
scripts: fix lint errors
max-krasnyansky Oct 17, 2025
20aa689
scripts: update qdc pytest script to make linter happy
max-krasnyansky Oct 17, 2025
03e2b9c
hexagon: add reduce sum in fp32
max-krasnyansky Oct 18, 2025
384164d
hexagon: reduce number of vector stores in matmul output
max-krasnyansky Oct 18, 2025
a314eb6
hexagon: remove the need for vdelta in reduce-multiply-x8
max-krasnyansky Oct 18, 2025
7f2d00b
hexagon: consistent use of reduce_sum_fp32 for row_sums
max-krasnyansky Oct 19, 2025
5de19f8
hexagon: some more matmul optimizations and comments
max-krasnyansky Oct 19, 2025
cf0242e
hexagon: update cmake presets
max-krasnyansky Oct 21, 2025
250e3a6
hexagon: add OPMASK support for run-bench.sh wrapper
max-krasnyansky Oct 21, 2025
08a97e6
hexagon: update to use GGML_BACKEND_API
max-krasnyansky Oct 21, 2025
6d2d0bd
hexagon: remove unused logic for setting tensor flags for the views
max-krasnyansky Oct 21, 2025
18d7d20
hexagon: add asserts to set/get_tensor to make sure we handle complet…
max-krasnyansky Oct 21, 2025
26a90a0
hexagon: use cpy_tensor slow path for non-host buffers
max-krasnyansky Oct 21, 2025
a8e5ad8
hexagon: error checks in the buffer allocator
max-krasnyansky Oct 21, 2025
dc001b9
cmake: move include(extProj) under ggml-hexagon
max-krasnyansky Oct 21, 2025
c749b86
hexagon: don't forget to delete the backend on free
max-krasnyansky Oct 22, 2025
0c01229
hexagon: set/get_tensor size assert apply only to quantized tensors
max-krasnyansky Oct 22, 2025
62ef4eb
hexagon: reintroduce HEX_VERBOSE wrapper for GGML_LOG_DEBUG for now
max-krasnyansky Oct 22, 2025
19041f7
docs: typos in hexagon developer docs (libggm-...)
max-krasnyansky Oct 22, 2025
3e4ff73
hexagon: overhaul error handling in the session/device allocation
max-krasnyansky Oct 22, 2025
6acc285
hexagon: update cmake presets to enable fp16 vectors
max-krasnyansky Oct 22, 2025
dda466c
hexagon: remove unused time_usec function
max-krasnyansky Oct 22, 2025
b0e5beb
hexagon: don't forget to release buffer contexts
max-krasnyansky Oct 22, 2025
3049de5
hexagon: fixed indents in hvx-utils (missed clang-format auto-format …
max-krasnyansky Oct 22, 2025
f7d7411
hexagon: remove custom can_repeat function and use ggml_can_repeat
max-krasnyansky Oct 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
hexagon: reintroduce HEX_VERBOSE wrapper for GGML_LOG_DEBUG for now
GGML_LOG_DEBUG is always enabled for test-backend-ops and the output gets in the way.
Ideally we need a bit more finer log levels.
  • Loading branch information
max-krasnyansky committed Oct 22, 2025
commit 62ef4ebadfdbfa753931b9b7a8280adeb7d8f448
71 changes: 37 additions & 34 deletions ggml/src/ggml-hexagon/ggml-hexagon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ static int opt_experimental = 0;
static int opt_opmask = HTP_OPMASK_QUEUE | HTP_OPMASK_QUANTIZE | HTP_OPMASK_COMPUTE;
static int opt_opsync = 0; // synchronous ops

#define HEX_VERBOSE(...) \
if (opt_verbose) GGML_LOG_DEBUG(__VA_ARGS__)

#define HEX_PROFILE(...) \
if (opt_profile) GGML_LOG_INFO(__VA_ARGS__)

Expand Down Expand Up @@ -306,7 +309,7 @@ struct ggml_backend_hexagon_buffer_type_context {

struct ggml_backend_hexagon_buffer_context {
bool mmap_to(ggml_hexagon_session * s) {
GGML_LOG_DEBUG("ggml-hex: %s mmaping buffer: base %p domain-id %d session-id %d size %zu fd %d repack %d\n",
HEX_VERBOSE("ggml-hex: %s mmaping buffer: base %p domain-id %d session-id %d size %zu fd %d repack %d\n",
s->name.c_str(), (void *) this->base, s->domain_id, s->session_id, this->size, this->fd,
(int) this->repack);

Expand Down Expand Up @@ -357,7 +360,7 @@ struct ggml_backend_hexagon_buffer_context {
return;
}

GGML_LOG_DEBUG("ggml-hex: %s allocated buffer: base %p size %zu fd %d repack %d\n", sess->name.c_str(),
HEX_VERBOSE("ggml-hex: %s allocated buffer: base %p size %zu fd %d repack %d\n", sess->name.c_str(),
(void *) this->base, size, this->fd, (int) repack);

this->sess = sess;
Expand Down Expand Up @@ -400,7 +403,7 @@ static enum ggml_status ggml_backend_hexagon_buffer_init_tensor(ggml_backend_buf
auto ctx = static_cast<ggml_backend_hexagon_buffer_context *>(buffer->context);
auto sess = ctx->sess;

GGML_LOG_DEBUG("ggml-hex: %s init-tensor %s : base %p data %p nbytes %zu usage %d repack %d\n", sess->name.c_str(),
HEX_VERBOSE("ggml-hex: %s init-tensor %s : base %p data %p nbytes %zu usage %d repack %d\n", sess->name.c_str(),
tensor->name, (void *) ctx->base, tensor->data, ggml_nbytes(tensor), (int) buffer->usage,
(int) ctx->repack);

Expand All @@ -425,7 +428,7 @@ static x2_q4 unpack_q4(uint8_t v) {
}

static void dump_block_q4_0(const block_q4_0 * b, int i) {
GGML_LOG_DEBUG("ggml-hex: repack q4_0 %d: %d %d %d %d ... %d %d %d %d : %.6f\n", i, unpack_q4(b->qs[0]).v[0],
HEX_VERBOSE("ggml-hex: repack q4_0 %d: %d %d %d %d ... %d %d %d %d : %.6f\n", i, unpack_q4(b->qs[0]).v[0],
unpack_q4(b->qs[1]).v[0], unpack_q4(b->qs[2]).v[0], unpack_q4(b->qs[3]).v[0], unpack_q4(b->qs[12]).v[1],
unpack_q4(b->qs[13]).v[1], unpack_q4(b->qs[14]).v[1], unpack_q4(b->qs[15]).v[1],
GGML_FP16_TO_FP32(b->d));
Expand All @@ -443,13 +446,13 @@ static void dump_packed_block_q4x4x2(const uint8_t * v, unsigned int i, size_t k
const uint8_t * q = v_q + i * qblk_size;
const ggml_half * d = (const ggml_half *) (v_d + i * dblk_size);

GGML_LOG_DEBUG("ggml-hex: repack q4x4x2-%d: %d %d %d %d ... %d %d %d %d ... %d %d %d %d : %.6f %.6f %.6f %.6f\n", i,
HEX_VERBOSE("ggml-hex: repack q4x4x2-%d: %d %d %d %d ... %d %d %d %d ... %d %d %d %d : %.6f %.6f %.6f %.6f\n", i,
unpack_q4(q[0]).v[0], unpack_q4(q[1]).v[0], unpack_q4(q[2]).v[0], unpack_q4(q[3]).v[0],
unpack_q4(q[60]).v[0], unpack_q4(q[61]).v[0], unpack_q4(q[62]).v[0], unpack_q4(q[63]).v[0],
unpack_q4(q[124]).v[0], unpack_q4(q[125]).v[0], unpack_q4(q[126]).v[0], unpack_q4(q[127]).v[0],
GGML_FP16_TO_FP32(d[0]), GGML_FP16_TO_FP32(d[1]), GGML_FP16_TO_FP32(d[2]), GGML_FP16_TO_FP32(d[3]));

GGML_LOG_DEBUG("ggml-hex: repack q4x4x2-%d: %d %d %d %d ... %d %d %d %d ... %d %d %d %d : %.6f %.6f %.6f %.6f\n",
HEX_VERBOSE("ggml-hex: repack q4x4x2-%d: %d %d %d %d ... %d %d %d %d ... %d %d %d %d : %.6f %.6f %.6f %.6f\n",
i + 1, unpack_q4(q[0]).v[1], unpack_q4(q[1]).v[1], unpack_q4(q[2]).v[1], unpack_q4(q[3]).v[1],
unpack_q4(q[60]).v[1], unpack_q4(q[61]).v[1], unpack_q4(q[62]).v[1], unpack_q4(q[63]).v[1],
unpack_q4(q[124]).v[1], unpack_q4(q[125]).v[1], unpack_q4(q[126]).v[1], unpack_q4(q[127]).v[1],
Expand Down Expand Up @@ -658,7 +661,7 @@ static void repack_q4_0_q4x4x2(ggml_tensor * t, const void * data, size_t size)
void * buf_rp = ggml_aligned_malloc(row_size_rp);
GGML_ASSERT(buf_rp != NULL);

GGML_LOG_DEBUG("ggml-hex: repack-q4_0-q4x4x2 %s : data %p size %zu dims %ldx%ld row-size %zu\n", t->name, data, size,
HEX_VERBOSE("ggml-hex: repack-q4_0-q4x4x2 %s : data %p size %zu dims %ldx%ld row-size %zu\n", t->name, data, size,
t->ne[0], nrows, row_size);

init_row_q4x4x2((block_q4_0 *) buf_pd, t->ne[0]); // init padded buffer to make sure the tail is all zeros
Expand Down Expand Up @@ -690,7 +693,7 @@ static void repack_q4x4x2_q4_0(void * data, const ggml_tensor * t, size_t size)
void * buf_rp = ggml_aligned_malloc(row_size_rp);
GGML_ASSERT(buf_rp != NULL);

GGML_LOG_DEBUG("ggml-hex: repack-q4x4x2-q4_0 %s : data %p size %zu dims %ldx%ld row-size %zu\n", t->name, data, size,
HEX_VERBOSE("ggml-hex: repack-q4x4x2-q4_0 %s : data %p size %zu dims %ldx%ld row-size %zu\n", t->name, data, size,
t->ne[0], nrows, row_size);

memset(buf_pd, 0, row_size_pd); // clear-out padded buffer to make sure the tail is all zeros
Expand All @@ -710,7 +713,7 @@ static void repack_q4x4x2_q4_0(void * data, const ggml_tensor * t, size_t size)

// ======== Q8x4x2 ====================
static void dump_block_q8_0(const block_q8_0 * b, int i) {
GGML_LOG_DEBUG("ggml-hex: repack q8_0 %d: %d %d %d %d ... %d %d %d %d : %.6f\n", i, b->qs[0], b->qs[1], b->qs[2],
HEX_VERBOSE("ggml-hex: repack q8_0 %d: %d %d %d %d ... %d %d %d %d : %.6f\n", i, b->qs[0], b->qs[1], b->qs[2],
b->qs[3], b->qs[28], b->qs[29], b->qs[30], b->qs[31], GGML_FP16_TO_FP32(b->d));
}

Expand All @@ -726,11 +729,11 @@ static void dump_packed_block_q8x4x2(const uint8_t * v, unsigned int i, size_t k
const uint8_t * q = v_q + i * qblk_size;
const ggml_half * d = (const ggml_half *) (v_d + i * dblk_size);

GGML_LOG_DEBUG("ggml-hex: repack q8x4x2-%d: %d %d %d %d ... %d %d %d %d ... %d %d %d %d : %.6f %.6f %.6f %.6f\n", i,
HEX_VERBOSE("ggml-hex: repack q8x4x2-%d: %d %d %d %d ... %d %d %d %d ... %d %d %d %d : %.6f %.6f %.6f %.6f\n", i,
q[0], q[1], q[2], q[3], q[60], q[61], q[62], q[63], q[124], q[125], q[126], q[127],
GGML_FP16_TO_FP32(d[0]), GGML_FP16_TO_FP32(d[1]), GGML_FP16_TO_FP32(d[2]), GGML_FP16_TO_FP32(d[3]));

GGML_LOG_DEBUG("ggml-hex: repack q8x4x2-%d: %d %d %d %d ... %d %d %d %d ... %d %d %d %d : %.6f %.6f %.6f %.6f\n",
HEX_VERBOSE("ggml-hex: repack q8x4x2-%d: %d %d %d %d ... %d %d %d %d ... %d %d %d %d : %.6f %.6f %.6f %.6f\n",
i + 1, q[128], q[129], q[130], q[131], q[192], q[193], q[194], q[195], q[252], q[253], q[254], q[255],
GGML_FP16_TO_FP32(d[4]), GGML_FP16_TO_FP32(d[5]), GGML_FP16_TO_FP32(d[6]), GGML_FP16_TO_FP32(d[7]));
}
Expand Down Expand Up @@ -932,7 +935,7 @@ static void repack_q8_0_q8x4x2(ggml_tensor * t, const void * data, size_t size)
void * buf_rp = ggml_aligned_malloc(row_size_rp);
GGML_ASSERT(buf_rp != NULL);

GGML_LOG_DEBUG("ggml-hex: repack-q8_0-q8x4x2 %s : data %p size %zu dims %ldx%ld row-size %zu\n", t->name, data, size,
HEX_VERBOSE("ggml-hex: repack-q8_0-q8x4x2 %s : data %p size %zu dims %ldx%ld row-size %zu\n", t->name, data, size,
t->ne[0], nrows, row_size);

init_row_q8x4x2((block_q8_0 *) buf_pd, t->ne[0]); // init padded buffer to make sure the tail is all zeros
Expand Down Expand Up @@ -964,7 +967,7 @@ static void repack_q8x4x2_q8_0(void * data, const ggml_tensor * t, size_t size)
void * buf_rp = ggml_aligned_malloc(row_size_rp);
GGML_ASSERT(buf_rp != NULL);

GGML_LOG_DEBUG("ggml-hex: repack-q8x4x2-q8_0 %s : data %p size %zu dims %ldx%ld row-size %zu\n", t->name, data, size,
HEX_VERBOSE("ggml-hex: repack-q8x4x2-q8_0 %s : data %p size %zu dims %ldx%ld row-size %zu\n", t->name, data, size,
t->ne[0], nrows, row_size);

memset(buf_pd, 0, row_size_pd); // clear-out padded buffer to make sure the tail is all zeros
Expand Down Expand Up @@ -995,7 +998,7 @@ static x2_mxfp4 unpack_mxfp4(uint8_t v) {
}

static void dump_block_mxfp4(const block_mxfp4 * b, int i) {
GGML_LOG_DEBUG("ggml-hex: repack mxfp4 %d: %d %d %d %d ... %d %d %d %d : %.6f\n", i, unpack_mxfp4(b->qs[0]).v[0],
HEX_VERBOSE("ggml-hex: repack mxfp4 %d: %d %d %d %d ... %d %d %d %d : %.6f\n", i, unpack_mxfp4(b->qs[0]).v[0],
unpack_mxfp4(b->qs[1]).v[0], unpack_mxfp4(b->qs[2]).v[0], unpack_mxfp4(b->qs[3]).v[0],
unpack_mxfp4(b->qs[12]).v[1], unpack_mxfp4(b->qs[13]).v[1], unpack_mxfp4(b->qs[14]).v[1],
unpack_mxfp4(b->qs[15]).v[1], GGML_E8M0_TO_FP32_HALF(b->e));
Expand All @@ -1013,14 +1016,14 @@ static void dump_packed_block_mxfp4x4x2(const uint8_t * v, unsigned int i, size_
const uint8_t * q = v_q + i * qblk_size;
const uint8_t * e = (const uint8_t *) (v_e + i * eblk_size);

GGML_LOG_DEBUG("ggml-hex: repack mxfp4x4x2-%d: %d %d %d %d ... %d %d %d %d ... %d %d %d %d : %.6f %.6f %.6f %.6f\n", i,
HEX_VERBOSE("ggml-hex: repack mxfp4x4x2-%d: %d %d %d %d ... %d %d %d %d ... %d %d %d %d : %.6f %.6f %.6f %.6f\n", i,
unpack_mxfp4(q[0]).v[0], unpack_mxfp4(q[1]).v[0], unpack_mxfp4(q[2]).v[0], unpack_mxfp4(q[3]).v[0],
unpack_mxfp4(q[60]).v[0], unpack_mxfp4(q[61]).v[0], unpack_mxfp4(q[62]).v[0], unpack_mxfp4(q[63]).v[0],
unpack_mxfp4(q[124]).v[0], unpack_mxfp4(q[125]).v[0], unpack_mxfp4(q[126]).v[0],
unpack_mxfp4(q[127]).v[0], GGML_E8M0_TO_FP32_HALF(e[0]), GGML_E8M0_TO_FP32_HALF(e[1]),
GGML_E8M0_TO_FP32_HALF(e[2]), GGML_E8M0_TO_FP32_HALF(e[3]));

GGML_LOG_DEBUG("ggml-hex: repack mxfp4x4x2-%d: %d %d %d %d ... %d %d %d %d ... %d %d %d %d : %.6f %.6f %.6f %.6f\n",
HEX_VERBOSE("ggml-hex: repack mxfp4x4x2-%d: %d %d %d %d ... %d %d %d %d ... %d %d %d %d : %.6f %.6f %.6f %.6f\n",
i + 1, unpack_mxfp4(q[0]).v[1], unpack_mxfp4(q[1]).v[1], unpack_mxfp4(q[2]).v[1],
unpack_mxfp4(q[3]).v[1], unpack_mxfp4(q[60]).v[1], unpack_mxfp4(q[61]).v[1], unpack_mxfp4(q[62]).v[1],
unpack_mxfp4(q[63]).v[1], unpack_mxfp4(q[124]).v[1], unpack_mxfp4(q[125]).v[1],
Expand Down Expand Up @@ -1231,7 +1234,7 @@ static void repack_mxfp4_mxfp4x4x2(ggml_tensor * t, const void * data, size_t si
void * buf_rp = ggml_aligned_malloc(row_size_rp);
GGML_ASSERT(buf_rp != NULL);

GGML_LOG_DEBUG("ggml-hex: repack-mxfp4-mxfp4x4x2 %s : data %p size %zu dims %ldx%ld row-size %zu\n", t->name, data,
HEX_VERBOSE("ggml-hex: repack-mxfp4-mxfp4x4x2 %s : data %p size %zu dims %ldx%ld row-size %zu\n", t->name, data,
size, t->ne[0], nrows, row_size);

init_row_mxfp4x4x2((block_mxfp4 *) buf_pd, t->ne[0]); // init padded buffer to make sure the tail is all zeros
Expand Down Expand Up @@ -1263,7 +1266,7 @@ static void repack_mxfp4x4x2_mxfp4(void * data, const ggml_tensor * t, size_t si
void * buf_rp = ggml_aligned_malloc(row_size_rp);
GGML_ASSERT(buf_rp != NULL);

GGML_LOG_DEBUG("ggml-hex: repack-mxfp4x4x2-mxfp4 %s : data %p size %zu dims %ldx%ld row-size %zu\n", t->name, data,
HEX_VERBOSE("ggml-hex: repack-mxfp4x4x2-mxfp4 %s : data %p size %zu dims %ldx%ld row-size %zu\n", t->name, data,
size, t->ne[0], nrows, row_size);

memset(buf_pd, 0, row_size_pd); // clear-out padded buffer to make sure the tail is all zeros
Expand All @@ -1289,7 +1292,7 @@ static void ggml_backend_hexagon_buffer_set_tensor(ggml_backend_buffer_t buffer,
auto ctx = (ggml_backend_hexagon_buffer_context *) buffer->context;
auto sess = ctx->sess;

GGML_LOG_DEBUG("ggml-hex: %s set-tensor %s : data %p offset %zu size %zu\n", sess->name.c_str(), tensor->name, data,
HEX_VERBOSE("ggml-hex: %s set-tensor %s : data %p offset %zu size %zu\n", sess->name.c_str(), tensor->name, data,
offset, size);

switch (tensor->type) {
Expand Down Expand Up @@ -1325,7 +1328,7 @@ static void ggml_backend_hexagon_buffer_get_tensor(ggml_backend_buffer_t buffer,
auto ctx = (ggml_backend_hexagon_buffer_context *) buffer->context;
auto sess = ctx->sess;

GGML_LOG_DEBUG("ggml-hex: %s get-tensor %s : data %p offset %zu size %zu\n", sess->name.c_str(), tensor->name, data,
HEX_VERBOSE("ggml-hex: %s get-tensor %s : data %p offset %zu size %zu\n", sess->name.c_str(), tensor->name, data,
offset, size);

switch (tensor->type) {
Expand Down Expand Up @@ -1366,7 +1369,7 @@ static bool ggml_backend_hexagon_buffer_cpy_tensor(ggml_backend_buffer_t bu
static void ggml_backend_hexagon_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
auto ctx = (ggml_backend_hexagon_buffer_context *) buffer->context;
auto sess = ctx->sess;
GGML_LOG_DEBUG("ggml-hex: %s clear-buff base %p size %zu\n", sess->name.c_str(), (void *) ctx->base, ctx->size);
HEX_VERBOSE("ggml-hex: %s clear-buff base %p size %zu\n", sess->name.c_str(), (void *) ctx->base, ctx->size);
memset(ctx->base, value, ctx->size);
}

Expand Down Expand Up @@ -2128,7 +2131,7 @@ static void hex_dump_dspbuf(const struct ggml_tensor * t, const dspqueue_buffer
auto buf = static_cast<ggml_backend_hexagon_buffer_context *>(t->buffer->context);
auto sess = buf->sess;

GGML_LOG_DEBUG("ggml-hex: %s dspqbuf : %s base-addr %p base-size %zu data %p offset %u size %u\n", sess->name.c_str(),
HEX_VERBOSE("ggml-hex: %s dspqbuf : %s base-addr %p base-size %zu data %p offset %u size %u\n", sess->name.c_str(),
t->name, (void *) buf->base, buf->size, (void *) d->ptr, (unsigned int) d->offset,
(unsigned int) d->size);
}
Expand Down Expand Up @@ -2211,7 +2214,7 @@ static void ggml_hexagon_mul_mat(const struct ggml_tensor * op, uint32_t flags)
hex_format_op_buffs(buffs, op);
hex_format_op_names(names, op);

GGML_LOG_DEBUG("ggml-hex: %s %s: %s : %s : %s : %s : %s: flags 0x%x\n", sess->name.c_str(), ggml_op_name(op->op),
HEX_VERBOSE("ggml-hex: %s %s: %s : %s : %s : %s : %s: flags 0x%x\n", sess->name.c_str(), ggml_op_name(op->op),
names, dims, types, strides, buffs, req.flags);
if (opt_verbose > 1) {
hex_dump_dspbuf(src0, &bufs[0]);
Expand Down Expand Up @@ -2348,7 +2351,7 @@ static void ggml_hexagon_mul_mat_id(const struct ggml_tensor * op, uint32_t flag
hex_format_op_buffs(buffs, op);
hex_format_op_names(names, op);

GGML_LOG_DEBUG("ggml-hex: %s %s: %s : %s : %s : %s : %s: flags 0x%x\n", sess->name.c_str(), ggml_op_name(op->op),
HEX_VERBOSE("ggml-hex: %s %s: %s : %s : %s : %s : %s: flags 0x%x\n", sess->name.c_str(), ggml_op_name(op->op),
names, dims, types, strides, buffs, req.flags);

if (opt_verbose > 1) {
Expand Down Expand Up @@ -2496,7 +2499,7 @@ static void ggml_hexagon_binary(const struct ggml_tensor * op, uint32_t flags) {
hex_format_op_buffs(buffs, op);
hex_format_op_names(names, op);

GGML_LOG_DEBUG("ggml-hex: %s %s : %s : %s : %s : %s : %s : flags 0x%x\n", sess->name.c_str(),
HEX_VERBOSE("ggml-hex: %s %s : %s : %s : %s : %s : %s : flags 0x%x\n", sess->name.c_str(),
ggml_op_name(node->op), names, dims, types, strides, buffs, req.flags);
if (opt_verbose > 1) {
hex_dump_dspbuf(src0, &bufs[0]);
Expand Down Expand Up @@ -2635,7 +2638,7 @@ static void ggml_hexagon_add_id(const struct ggml_tensor * op, uint32_t flags) {
hex_format_op_buffs(buffs, op);
hex_format_op_names(names, op);

GGML_LOG_DEBUG("ggml-hex: %s %s : %s : %s : %s : %s : %s : flags 0x%x\n", sess->name.c_str(),
HEX_VERBOSE("ggml-hex: %s %s : %s : %s : %s : %s : %s : flags 0x%x\n", sess->name.c_str(),
ggml_op_name(node->op), names, dims, types, strides, buffs, req.flags);

if (opt_verbose > 1) {
Expand Down Expand Up @@ -2815,7 +2818,7 @@ static void ggml_hexagon_unary(const struct ggml_tensor * op, uint32_t flags) {
hex_format_op_buffs(buffs, op);
hex_format_op_names(names, op);

GGML_LOG_DEBUG("ggml-hex: %s %s : %s : %s : %s : %s : %s : flags 0x%x\n", sess->name.c_str(), ggml_op_name(op->op),
HEX_VERBOSE("ggml-hex: %s %s : %s : %s : %s : %s : %s : flags 0x%x\n", sess->name.c_str(), ggml_op_name(op->op),
names, dims, types, strides, buffs, req.flags);
if (opt_verbose > 1) {
hex_dump_dspbuf(src0, &bufs[0]);
Expand Down Expand Up @@ -2988,7 +2991,7 @@ static void ggml_hexagon_rope(const struct ggml_tensor * op, uint32_t flags) {
hex_format_op_buffs(buffs, op);
hex_format_op_names(names, op);

GGML_LOG_DEBUG("ggml-hex: %s %s : %s : %s : %s : %s : %s : flags 0x%x\n", sess->name.c_str(), ggml_op_name(op->op),
HEX_VERBOSE("ggml-hex: %s %s : %s : %s : %s : %s : %s : flags 0x%x\n", sess->name.c_str(), ggml_op_name(op->op),
names, dims, types, strides, buffs, req.flags);
if (opt_verbose > 1) {
hex_dump_dspbuf(src0, &bufs[0]);
Expand Down Expand Up @@ -3092,7 +3095,7 @@ static inline int last_compute_op(ggml_cgraph * graph) {
static ggml_status ggml_backend_hexagon_graph_compute(ggml_backend_t backend, ggml_cgraph * graph) {
auto sess = static_cast<ggml_hexagon_session *>(backend->context);

GGML_LOG_DEBUG("ggml-hex: %s graph-compute n_nodes %d\n", sess->name.c_str(), graph->n_nodes);
HEX_VERBOSE("ggml-hex: %s graph-compute n_nodes %d\n", sess->name.c_str(), graph->n_nodes);

const int last = last_compute_op(graph);

Expand Down Expand Up @@ -3176,7 +3179,7 @@ static ggml_status ggml_backend_hexagon_graph_compute(ggml_backend_t backend, gg
static void ggml_backend_hexagon_synchronize(ggml_backend_t backend) {
auto sess = static_cast<ggml_hexagon_session *>(backend->context);

GGML_LOG_DEBUG("ggml-hex: %s synchronize\n", sess->name.c_str());
HEX_VERBOSE("ggml-hex: %s synchronize\n", sess->name.c_str());

// Wait until all pending ops complete
while (sess->op_pending) {
Expand Down Expand Up @@ -3515,7 +3518,7 @@ static bool ggml_backend_hexagon_device_supports_op(ggml_backend_dev_t dev, cons
hex_format_op_buffs(buffs, op);
hex_format_op_names(names, op);

GGML_LOG_DEBUG("ggml-hex: %s device-supports-op %s : %s : %s : %s : %s : %s : (%d)\n", sess->name.c_str(),
HEX_VERBOSE("ggml-hex: %s device-supports-op %s : %s : %s : %s : %s : %s : (%d)\n", sess->name.c_str(),
ggml_op_name(op->op), names, dims, types, strides, buffs, (int) supp);
}

Expand All @@ -3535,14 +3538,14 @@ static bool ggml_backend_hexagon_device_supports_buft(ggml_backend_dev_t dev, gg
// Need session/domain-id for buffers to be compatible
bool supp = (s0->session_id == s1->session_id);

GGML_LOG_DEBUG("ggml-hex: %s device-supports-buft %s (%d)\n", s0->name.c_str(), s1->name.c_str(), (int) supp);
HEX_VERBOSE("ggml-hex: %s device-supports-buft %s (%d)\n", s0->name.c_str(), s1->name.c_str(), (int) supp);

return supp;
}

static ggml_backend_buffer_type_t * ggml_backend_hexagon_device_get_extra_buffers_type(ggml_backend_dev_t dev) {
auto s0 = static_cast<ggml_hexagon_session *>(dev->context);
GGML_LOG_DEBUG("ggml-hex: device-get-extra-buft : %s \n", s0->name.c_str());
HEX_VERBOSE("ggml-hex: device-get-extra-buft : %s \n", s0->name.c_str());

static ggml_backend_buffer_type_t bufts[2];
bufts[0] = ggml_backend_hexagon_device_get_repack_buffer_type(dev);
Expand Down Expand Up @@ -3687,7 +3690,7 @@ static void ggml_hexagon_init(ggml_backend_reg * reg) {

reg->context = new ggml_hexagon_registry(reg);

GGML_LOG_DEBUG("ggml-hex: size-of-general-req %zu size-of-general-rsp %zu\n", sizeof(struct htp_general_req),
HEX_VERBOSE("ggml-hex: size-of-general-req %zu size-of-general-rsp %zu\n", sizeof(struct htp_general_req),
sizeof(struct htp_general_rsp));
}

Expand Down