Skip to content

Commit ded446b

Browse files
authored
opencl: allow large buffer for adreno (#20997)
1 parent f8d4aba commit ded446b

1 file changed

Lines changed: 26 additions & 0 deletions

File tree

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,9 @@ struct ggml_backend_opencl_context {
394394
bool fp16_support;
395395
bool has_vector_subgroup_broadcast;
396396
bool disable_fusion;
397+
398+
bool adreno_has_large_buffer;
399+
bool adreno_use_large_buffer;
397400
ggml_cl_compiler_version adreno_cl_compiler_version;
398401

399402
int adreno_wave_size;
@@ -787,6 +790,10 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
787790
" -cl-mad-enable -cl-unsafe-math-optimizations"
788791
" -cl-finite-math-only -cl-fast-relaxed-math";
789792

793+
if (backend_ctx->adreno_use_large_buffer) {
794+
compile_opts += " -qcom-enable-large-buffer ";
795+
}
796+
790797
GGML_LOG_INFO("ggml_opencl: loading OpenCL kernels");
791798

792799
// add
@@ -3020,6 +3027,8 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
30203027
// Check if ext_buffer contains cl_khr_fp16
30213028
backend_ctx->fp16_support = strstr(ext_buffer, "cl_khr_fp16") != NULL;
30223029
GGML_LOG_INFO("ggml_opencl: device FP16 support: %s\n", backend_ctx->fp16_support ? "true" : "false");
3030+
// check Adreno large buffer support
3031+
backend_ctx->adreno_has_large_buffer = strstr(ext_buffer, "cl_qcom_large_buffer") != NULL;
30233032

30243033
// fp16 is required
30253034
if (!backend_ctx->fp16_support) {
@@ -3086,6 +3095,18 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
30863095
GGML_LOG_INFO("ggml_opencl: using kernels optimized for Adreno (GGML_OPENCL_USE_ADRENO_KERNELS)\n");
30873096
#endif // GGML_OPENCL_USE_ADRENO_KERNELS
30883097

3098+
// determine whether to use large buffer for Adreno
3099+
backend_ctx->adreno_use_large_buffer = getenv("GGML_OPENCL_ADRENO_USE_LARGE_BUFFER") != nullptr &&
3100+
backend_ctx->gpu_family == GPU_FAMILY::ADRENO;
3101+
if (backend_ctx->adreno_use_large_buffer) {
3102+
if (!backend_ctx->adreno_has_large_buffer) {
3103+
GGML_LOG_INFO("ggml_opencl: Adreno large buffer requested but not supported by driver, will use regular buffer\n");
3104+
backend_ctx->adreno_use_large_buffer = false;
3105+
} else {
3106+
GGML_LOG_INFO("ggml_opencl: Adreno large buffer enabled\n");
3107+
}
3108+
}
3109+
30893110
cl_int err;
30903111

30913112
// A local ref of cl_context for convenience
@@ -5660,6 +5681,11 @@ static ggml_backend_buffer_t ggml_backend_opencl_buffer_type_alloc_buffer(ggml_b
56605681

56615682
cl_int err;
56625683
cl_mem mem = clCreateBuffer(backend_ctx->context, CL_MEM_READ_WRITE, size, NULL, &err);
5684+
if (err != CL_SUCCESS && backend_ctx->adreno_use_large_buffer) {
5685+
cl_mem_properties props[] = { 0x41A6 /* CL_LARGE_BUFFER_QCOM */, 1, 0 };
5686+
mem = clCreateBufferWithProperties(backend_ctx->context, props, CL_MEM_READ_WRITE, size, NULL, &err);
5687+
}
5688+
56635689
if (err != CL_SUCCESS) {
56645690
GGML_LOG_INFO("%s: failed to allocate %.2f MiB\n", __func__, size / 1024.0 / 1024.0);
56655691
return nullptr;

0 commit comments

Comments
 (0)