opencl: move backend info printing into its own function (#23702)

* opencl: move backend info print into its own function

* opencl: move new log line

* opencl: fix for non adreno path
This commit is contained in:
lhez 2026-05-28 11:05:42 -07:00 committed by GitHub
parent 3ef2369551
commit 408ae2b9e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -379,6 +379,8 @@ struct ggml_backend_opencl_device_context {
GPU_FAMILY gpu_family = GPU_FAMILY::UNKNOWN;
ADRENO_GPU_GEN adreno_gen = ADRENO_GPU_GEN::ADRENO_UNKNOWN;
std::regex *opfilter = nullptr; // regex of ops to not claim
std::string opfilter_str; // regex string for opfilter
size_t global_mem_size = 0;
};
@ -415,8 +417,6 @@ struct ggml_backend_opencl_context {
bool has_qcom_subgroup_shuffle = false; // cl_qcom_subgroup_shuffle
bool disable_fusion;
std::regex *opfilter = nullptr; // regex of ops to not claim
bool adreno_has_large_buffer;
bool adreno_use_large_buffer;
ggml_cl_compiler_version adreno_cl_compiler_version;
@ -428,6 +428,8 @@ struct ggml_backend_opencl_context {
size_t image2d_max_width;
size_t image2d_max_height;
cl_device_svm_capabilities svm_caps;
cl_context context;
cl_command_queue queue;
@ -3731,6 +3733,68 @@ static std::vector<ggml_backend_device> ggml_opencl_probe_devices(ggml_backend_r
return found_devices;
}
static void ggml_opencl_print_backend_info(ggml_backend_opencl_device_context * dev_ctx) {
GGML_ASSERT(dev_ctx);
GGML_ASSERT(dev_ctx->backend_ctx);
auto * backend_ctx = dev_ctx->backend_ctx;
GGML_LOG_INFO("ggml_opencl: OpenCL driver: %s\n",
backend_ctx->driver_version.c_str());
GGML_LOG_INFO("ggml_opencl: vector subgroup broadcast support: %s\n",
backend_ctx->has_vector_subgroup_broadcast ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: device FP16 support: %s\n",
backend_ctx->fp16_support ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: mem base addr align: %u\n",
backend_ctx->alignment);
GGML_LOG_INFO("ggml_opencl: global mem size: %zu MB\n",
backend_ctx->global_mem_size/1024/1024);
GGML_LOG_INFO("ggml_opencl: max mem alloc size: %zu MB\n",
backend_ctx->max_alloc_size/1024/1024);
GGML_LOG_INFO("ggml_opencl: device max image buffer size (pixels): %lu\n",
backend_ctx->image_max_buffer_size);
GGML_LOG_INFO("ggml_opencl: device max image2d size: %lu x %lu\n",
backend_ctx->image2d_max_width, backend_ctx->image2d_max_height);
GGML_LOG_INFO("ggml_opencl: device max workgroup size: %lu\n",
backend_ctx->max_workgroup_size);
GGML_LOG_INFO("ggml_opencl: SVM coarse grain buffer support: %s\n",
backend_ctx->svm_caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: SVM fine grain buffer support: %s\n",
backend_ctx->svm_caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: SVM fine grain system support: %s\n",
backend_ctx->svm_caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: SVM atomics support: %s\n",
backend_ctx->svm_caps & CL_DEVICE_SVM_ATOMICS ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: cl_qcom_subgroup_shuffle support: %s\n",
backend_ctx->has_qcom_subgroup_shuffle ? "true" : "false");
// Print out configurations
#ifdef GGML_OPENCL_SOA_Q
GGML_LOG_INFO("ggml_opencl: flattening quantized weights representation as struct of arrays (GGML_OPENCL_SOA_Q)\n");
#endif // GGML_OPENCL_SOA_Q
#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
GGML_LOG_INFO("ggml_opencl: using kernels optimized for Adreno (GGML_OPENCL_USE_ADRENO_KERNELS)\n");
if (backend_ctx->adreno_xmem_gemm_enabled) {
GGML_LOG_INFO("ggml_opencl: Adreno xmem F16xF32 GEMM enabled (temporary weight prepack)\n");
}
#endif // GGML_OPENCL_USE_ADRENO_KERNELS
if (backend_ctx->adreno_use_large_buffer) {
if (!backend_ctx->adreno_has_large_buffer) {
GGML_LOG_INFO("ggml_opencl: Adreno large buffer requested but not supported by driver, will use regular buffer\n");
backend_ctx->adreno_use_large_buffer = false;
} else {
GGML_LOG_INFO("ggml_opencl: Adreno large buffer enabled\n");
}
}
if (dev_ctx->opfilter) {
// for information only, the actual regex object is created in ggml_opencl_is_device_supported
GGML_LOG_INFO("ggml_opencl: opfilter regex = \"%s\"\n", dev_ctx->opfilter_str.c_str());
}
}
// check if device should be accepted
static bool ggml_opencl_is_device_supported(ggml_backend_dev_t dev) {
GGML_ASSERT(dev);
@ -3799,6 +3863,13 @@ static bool ggml_opencl_is_device_supported(ggml_backend_dev_t dev) {
}
clGetDeviceInfo(dev_ctx->device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &dev_ctx->global_mem_size, NULL);
const char * str_opfilter = getenv("GGML_OPENCL_OPFILTER");
if (str_opfilter) {
dev_ctx->opfilter_str = str_opfilter;
dev_ctx->opfilter = new std::regex(str_opfilter, std::regex_constants::icase);
}
return true;
}
@ -3850,15 +3921,12 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
char *driver_version = (char *)alloca(driver_version_str_size + 1);
clGetDeviceInfo(device, CL_DRIVER_VERSION, driver_version_str_size, driver_version, NULL);
driver_version[driver_version_str_size] = '\0';
GGML_LOG_INFO("ggml_opencl: OpenCL driver: %s\n", driver_version);
backend_ctx->driver_version = driver_version;
backend_ctx->adreno_cl_compiler_version = get_adreno_cl_compiler_version(driver_version);
backend_ctx->has_vector_subgroup_broadcast =
(backend_ctx->adreno_cl_compiler_version.type == E031 && backend_ctx->adreno_cl_compiler_version.major >= 47) ||
(backend_ctx->adreno_cl_compiler_version.type == DX && backend_ctx->adreno_cl_compiler_version.major >= 17);
GGML_LOG_INFO("ggml_opencl: vector subgroup broadcast support: %s\n",
backend_ctx->has_vector_subgroup_broadcast ? "true" : "false");
size_t ext_str_size;
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &ext_str_size);
@ -3867,18 +3935,12 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
ext_buffer[ext_str_size] = '\0'; // ensure it is null terminated
// check support for qcom_subgroup_shuffle
if (opencl_c_version.major == 3 && strstr(ext_buffer, "cl_khr_subgroups") != NULL) {
GGML_LOG_INFO("ggml_opencl: cl_khr_subgroups support: true\n");
if (strstr(ext_buffer, "cl_qcom_subgroup_shuffle") != NULL) {
backend_ctx->has_qcom_subgroup_shuffle = true;
}
if (strstr(ext_buffer, "cl_qcom_subgroup_shuffle") != NULL) {
backend_ctx->has_qcom_subgroup_shuffle = true;
}
GGML_LOG_INFO("ggml_opencl: cl_qcom_subgroup_shuffle support: %s\n",
backend_ctx->has_qcom_subgroup_shuffle ? "true" : "false");
// Check if ext_buffer contains cl_khr_fp16
backend_ctx->fp16_support = strstr(ext_buffer, "cl_khr_fp16") != NULL;
GGML_LOG_INFO("ggml_opencl: device FP16 support: %s\n", backend_ctx->fp16_support ? "true" : "false");
// check Adreno large buffer support
backend_ctx->adreno_has_large_buffer = strstr(ext_buffer, "cl_qcom_large_buffer") != NULL;
@ -3887,35 +3949,15 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), &base_align_in_bits, NULL));
GGML_ASSERT(base_align_in_bits % 8u == 0);
backend_ctx->alignment = base_align_in_bits / 8u;
GGML_LOG_INFO("ggml_opencl: mem base addr align: %u\n", backend_ctx->alignment);
backend_ctx->global_mem_size = dev_ctx->global_mem_size;
GGML_LOG_INFO("ggml_opencl: global mem size: %zu MB\n", backend_ctx->global_mem_size/1024/1024);
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &backend_ctx->max_alloc_size, NULL);
GGML_LOG_INFO("ggml_opencl: max mem alloc size: %zu MB\n", backend_ctx->max_alloc_size/1024/1024);
clGetDeviceInfo(device, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof(size_t), &backend_ctx->image_max_buffer_size, NULL);
GGML_LOG_INFO("ggml_opencl: device max image buffer size (pixels): %lu\n", backend_ctx->image_max_buffer_size);
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &backend_ctx->image2d_max_width, NULL);
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &backend_ctx->image2d_max_height, NULL);
GGML_LOG_INFO("ggml_opencl: device max image2d size: %lu x %lu\n", backend_ctx->image2d_max_width, backend_ctx->image2d_max_height);
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &backend_ctx->max_workgroup_size, NULL);
GGML_LOG_INFO("ggml_opencl: device max workgroup size: %lu\n", backend_ctx->max_workgroup_size);
// Check SVM.
cl_device_svm_capabilities svm_caps;
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &svm_caps, 0));
GGML_LOG_INFO("ggml_opencl: SVM coarse grain buffer support: %s\n",
svm_caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: SVM fine grain buffer support: %s\n",
svm_caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: SVM fine grain system support: %s\n",
svm_caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: SVM atomics support: %s\n",
svm_caps & CL_DEVICE_SVM_ATOMICS ? "true" : "false");
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &backend_ctx->max_alloc_size, NULL));
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof(size_t), &backend_ctx->image_max_buffer_size, NULL));
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &backend_ctx->image2d_max_width, NULL));
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &backend_ctx->image2d_max_height, NULL));
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &backend_ctx->max_workgroup_size, NULL));
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &backend_ctx->svm_caps, 0));
if (opencl_c_version.major >= 3) {
// Assume it is not available for 3.0, since it is optional in 3.0.
@ -3931,36 +3973,15 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
backend_ctx->non_uniform_workgroups = true;
}
// Print out configurations
#ifdef GGML_OPENCL_SOA_Q
GGML_LOG_INFO("ggml_opencl: flattening quantized weights representation as struct of arrays (GGML_OPENCL_SOA_Q)\n");
#endif // GGML_OPENCL_SOA_Q
#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
GGML_LOG_INFO("ggml_opencl: using kernels optimized for Adreno (GGML_OPENCL_USE_ADRENO_KERNELS)\n");
#endif // GGML_OPENCL_USE_ADRENO_KERNELS
#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
// determine whether to use Adreno xmem GEMM
backend_ctx->adreno_xmem_gemm_enabled = getenv("GGML_OPENCL_ADRENO_XMEM_GEMM") != nullptr &&
backend_ctx->gpu_family == GPU_FAMILY::ADRENO;
if (getenv("GGML_OPENCL_ADRENO_XMEM_GEMM") != nullptr) {
GGML_LOG_INFO("ggml_opencl: Adreno xmem F16xF32 GEMM %s\n",
backend_ctx->adreno_xmem_gemm_enabled ?
"enabled (temporary weight prepack)" : "requested but unsupported by this driver");
}
#endif // GGML_OPENCL_USE_ADRENO_KERNELS
#endif
// determine whether to use large buffer for Adreno
backend_ctx->adreno_use_large_buffer = getenv("GGML_OPENCL_ADRENO_USE_LARGE_BUFFER") != nullptr &&
backend_ctx->gpu_family == GPU_FAMILY::ADRENO;
if (backend_ctx->adreno_use_large_buffer) {
if (!backend_ctx->adreno_has_large_buffer) {
GGML_LOG_INFO("ggml_opencl: Adreno large buffer requested but not supported by driver, will use regular buffer\n");
backend_ctx->adreno_use_large_buffer = false;
} else {
GGML_LOG_INFO("ggml_opencl: Adreno large buffer enabled\n");
}
}
cl_int err;
@ -4010,12 +4031,6 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
backend_ctx->disable_fusion = getenv("GGML_OPENCL_DISABLE_FUSION") != nullptr;
const char * str_opfilter = getenv("GGML_OPENCL_OPFILTER");
if (str_opfilter) {
backend_ctx->opfilter = new std::regex(str_opfilter, std::regex_constants::icase);
GGML_LOG_INFO("ggml_opencl: opfilter regex = \"%s\"\n", str_opfilter);
}
dev_ctx->backend_ctx = backend_ctx.release();
return dev_ctx->backend_ctx;
}
@ -4825,7 +4840,7 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te
ggml_backend_opencl_context * backend_ctx = dev_ctx->backend_ctx;
// reject ops that match the opfilter regex
if (backend_ctx->opfilter && std::regex_match(std::string(ggml_op_desc(op)), *backend_ctx->opfilter)) {
if (dev_ctx->opfilter && std::regex_match(std::string(ggml_op_desc(op)), *dev_ctx->opfilter)) {
return false;
}
@ -7823,6 +7838,8 @@ static ggml_backend_t ggml_backend_opencl_device_init(ggml_backend_dev_t dev, co
/* .context = */ backend_ctx,
};
ggml_backend_opencl_device_context * dev_ctx = (ggml_backend_opencl_device_context *) dev->context;
ggml_opencl_print_backend_info(dev_ctx);
return backend;
GGML_UNUSED(params);