mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-31 05:03:44 +00:00
opencl: move backend info printing into its own function (#23702)
* opencl: move backend info print into its own function * opencl: move new log line * opencl: fix for non adreno path
This commit is contained in:
parent
3ef2369551
commit
408ae2b9e5
1 changed files with 86 additions and 69 deletions
|
|
@ -379,6 +379,8 @@ struct ggml_backend_opencl_device_context {
|
|||
GPU_FAMILY gpu_family = GPU_FAMILY::UNKNOWN;
|
||||
ADRENO_GPU_GEN adreno_gen = ADRENO_GPU_GEN::ADRENO_UNKNOWN;
|
||||
|
||||
std::regex *opfilter = nullptr; // regex of ops to not claim
|
||||
std::string opfilter_str; // regex string for opfilter
|
||||
size_t global_mem_size = 0;
|
||||
};
|
||||
|
||||
|
|
@ -415,8 +417,6 @@ struct ggml_backend_opencl_context {
|
|||
bool has_qcom_subgroup_shuffle = false; // cl_qcom_subgroup_shuffle
|
||||
bool disable_fusion;
|
||||
|
||||
std::regex *opfilter = nullptr; // regex of ops to not claim
|
||||
|
||||
bool adreno_has_large_buffer;
|
||||
bool adreno_use_large_buffer;
|
||||
ggml_cl_compiler_version adreno_cl_compiler_version;
|
||||
|
|
@ -428,6 +428,8 @@ struct ggml_backend_opencl_context {
|
|||
size_t image2d_max_width;
|
||||
size_t image2d_max_height;
|
||||
|
||||
cl_device_svm_capabilities svm_caps;
|
||||
|
||||
cl_context context;
|
||||
cl_command_queue queue;
|
||||
|
||||
|
|
@ -3731,6 +3733,68 @@ static std::vector<ggml_backend_device> ggml_opencl_probe_devices(ggml_backend_r
|
|||
return found_devices;
|
||||
}
|
||||
|
||||
static void ggml_opencl_print_backend_info(ggml_backend_opencl_device_context * dev_ctx) {
|
||||
GGML_ASSERT(dev_ctx);
|
||||
GGML_ASSERT(dev_ctx->backend_ctx);
|
||||
|
||||
auto * backend_ctx = dev_ctx->backend_ctx;
|
||||
|
||||
GGML_LOG_INFO("ggml_opencl: OpenCL driver: %s\n",
|
||||
backend_ctx->driver_version.c_str());
|
||||
GGML_LOG_INFO("ggml_opencl: vector subgroup broadcast support: %s\n",
|
||||
backend_ctx->has_vector_subgroup_broadcast ? "true" : "false");
|
||||
GGML_LOG_INFO("ggml_opencl: device FP16 support: %s\n",
|
||||
backend_ctx->fp16_support ? "true" : "false");
|
||||
GGML_LOG_INFO("ggml_opencl: mem base addr align: %u\n",
|
||||
backend_ctx->alignment);
|
||||
GGML_LOG_INFO("ggml_opencl: global mem size: %zu MB\n",
|
||||
backend_ctx->global_mem_size/1024/1024);
|
||||
GGML_LOG_INFO("ggml_opencl: max mem alloc size: %zu MB\n",
|
||||
backend_ctx->max_alloc_size/1024/1024);
|
||||
GGML_LOG_INFO("ggml_opencl: device max image buffer size (pixels): %lu\n",
|
||||
backend_ctx->image_max_buffer_size);
|
||||
GGML_LOG_INFO("ggml_opencl: device max image2d size: %lu x %lu\n",
|
||||
backend_ctx->image2d_max_width, backend_ctx->image2d_max_height);
|
||||
GGML_LOG_INFO("ggml_opencl: device max workgroup size: %lu\n",
|
||||
backend_ctx->max_workgroup_size);
|
||||
GGML_LOG_INFO("ggml_opencl: SVM coarse grain buffer support: %s\n",
|
||||
backend_ctx->svm_caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER ? "true" : "false");
|
||||
GGML_LOG_INFO("ggml_opencl: SVM fine grain buffer support: %s\n",
|
||||
backend_ctx->svm_caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER ? "true" : "false");
|
||||
GGML_LOG_INFO("ggml_opencl: SVM fine grain system support: %s\n",
|
||||
backend_ctx->svm_caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM ? "true" : "false");
|
||||
GGML_LOG_INFO("ggml_opencl: SVM atomics support: %s\n",
|
||||
backend_ctx->svm_caps & CL_DEVICE_SVM_ATOMICS ? "true" : "false");
|
||||
GGML_LOG_INFO("ggml_opencl: cl_qcom_subgroup_shuffle support: %s\n",
|
||||
backend_ctx->has_qcom_subgroup_shuffle ? "true" : "false");
|
||||
|
||||
// Print out configurations
|
||||
#ifdef GGML_OPENCL_SOA_Q
|
||||
GGML_LOG_INFO("ggml_opencl: flattening quantized weights representation as struct of arrays (GGML_OPENCL_SOA_Q)\n");
|
||||
#endif // GGML_OPENCL_SOA_Q
|
||||
|
||||
#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
|
||||
GGML_LOG_INFO("ggml_opencl: using kernels optimized for Adreno (GGML_OPENCL_USE_ADRENO_KERNELS)\n");
|
||||
if (backend_ctx->adreno_xmem_gemm_enabled) {
|
||||
GGML_LOG_INFO("ggml_opencl: Adreno xmem F16xF32 GEMM enabled (temporary weight prepack)\n");
|
||||
}
|
||||
#endif // GGML_OPENCL_USE_ADRENO_KERNELS
|
||||
|
||||
if (backend_ctx->adreno_use_large_buffer) {
|
||||
if (!backend_ctx->adreno_has_large_buffer) {
|
||||
GGML_LOG_INFO("ggml_opencl: Adreno large buffer requested but not supported by driver, will use regular buffer\n");
|
||||
backend_ctx->adreno_use_large_buffer = false;
|
||||
} else {
|
||||
GGML_LOG_INFO("ggml_opencl: Adreno large buffer enabled\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (dev_ctx->opfilter) {
|
||||
// for information only, the actual regex object is created in ggml_opencl_is_device_supported
|
||||
GGML_LOG_INFO("ggml_opencl: opfilter regex = \"%s\"\n", dev_ctx->opfilter_str.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// check if device should be accepted
|
||||
static bool ggml_opencl_is_device_supported(ggml_backend_dev_t dev) {
|
||||
GGML_ASSERT(dev);
|
||||
|
|
@ -3799,6 +3863,13 @@ static bool ggml_opencl_is_device_supported(ggml_backend_dev_t dev) {
|
|||
}
|
||||
|
||||
clGetDeviceInfo(dev_ctx->device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &dev_ctx->global_mem_size, NULL);
|
||||
|
||||
const char * str_opfilter = getenv("GGML_OPENCL_OPFILTER");
|
||||
if (str_opfilter) {
|
||||
dev_ctx->opfilter_str = str_opfilter;
|
||||
dev_ctx->opfilter = new std::regex(str_opfilter, std::regex_constants::icase);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -3850,15 +3921,12 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
|
|||
char *driver_version = (char *)alloca(driver_version_str_size + 1);
|
||||
clGetDeviceInfo(device, CL_DRIVER_VERSION, driver_version_str_size, driver_version, NULL);
|
||||
driver_version[driver_version_str_size] = '\0';
|
||||
GGML_LOG_INFO("ggml_opencl: OpenCL driver: %s\n", driver_version);
|
||||
backend_ctx->driver_version = driver_version;
|
||||
|
||||
backend_ctx->adreno_cl_compiler_version = get_adreno_cl_compiler_version(driver_version);
|
||||
backend_ctx->has_vector_subgroup_broadcast =
|
||||
(backend_ctx->adreno_cl_compiler_version.type == E031 && backend_ctx->adreno_cl_compiler_version.major >= 47) ||
|
||||
(backend_ctx->adreno_cl_compiler_version.type == DX && backend_ctx->adreno_cl_compiler_version.major >= 17);
|
||||
GGML_LOG_INFO("ggml_opencl: vector subgroup broadcast support: %s\n",
|
||||
backend_ctx->has_vector_subgroup_broadcast ? "true" : "false");
|
||||
|
||||
size_t ext_str_size;
|
||||
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &ext_str_size);
|
||||
|
|
@ -3867,18 +3935,12 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
|
|||
ext_buffer[ext_str_size] = '\0'; // ensure it is null terminated
|
||||
|
||||
// check support for qcom_subgroup_shuffle
|
||||
if (opencl_c_version.major == 3 && strstr(ext_buffer, "cl_khr_subgroups") != NULL) {
|
||||
GGML_LOG_INFO("ggml_opencl: cl_khr_subgroups support: true\n");
|
||||
if (strstr(ext_buffer, "cl_qcom_subgroup_shuffle") != NULL) {
|
||||
backend_ctx->has_qcom_subgroup_shuffle = true;
|
||||
}
|
||||
if (strstr(ext_buffer, "cl_qcom_subgroup_shuffle") != NULL) {
|
||||
backend_ctx->has_qcom_subgroup_shuffle = true;
|
||||
}
|
||||
GGML_LOG_INFO("ggml_opencl: cl_qcom_subgroup_shuffle support: %s\n",
|
||||
backend_ctx->has_qcom_subgroup_shuffle ? "true" : "false");
|
||||
|
||||
// Check if ext_buffer contains cl_khr_fp16
|
||||
backend_ctx->fp16_support = strstr(ext_buffer, "cl_khr_fp16") != NULL;
|
||||
GGML_LOG_INFO("ggml_opencl: device FP16 support: %s\n", backend_ctx->fp16_support ? "true" : "false");
|
||||
|
||||
// check Adreno large buffer support
|
||||
backend_ctx->adreno_has_large_buffer = strstr(ext_buffer, "cl_qcom_large_buffer") != NULL;
|
||||
|
|
@ -3887,35 +3949,15 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
|
|||
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), &base_align_in_bits, NULL));
|
||||
GGML_ASSERT(base_align_in_bits % 8u == 0);
|
||||
backend_ctx->alignment = base_align_in_bits / 8u;
|
||||
GGML_LOG_INFO("ggml_opencl: mem base addr align: %u\n", backend_ctx->alignment);
|
||||
|
||||
backend_ctx->global_mem_size = dev_ctx->global_mem_size;
|
||||
GGML_LOG_INFO("ggml_opencl: global mem size: %zu MB\n", backend_ctx->global_mem_size/1024/1024);
|
||||
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &backend_ctx->max_alloc_size, NULL);
|
||||
GGML_LOG_INFO("ggml_opencl: max mem alloc size: %zu MB\n", backend_ctx->max_alloc_size/1024/1024);
|
||||
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof(size_t), &backend_ctx->image_max_buffer_size, NULL);
|
||||
GGML_LOG_INFO("ggml_opencl: device max image buffer size (pixels): %lu\n", backend_ctx->image_max_buffer_size);
|
||||
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &backend_ctx->image2d_max_width, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &backend_ctx->image2d_max_height, NULL);
|
||||
GGML_LOG_INFO("ggml_opencl: device max image2d size: %lu x %lu\n", backend_ctx->image2d_max_width, backend_ctx->image2d_max_height);
|
||||
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &backend_ctx->max_workgroup_size, NULL);
|
||||
GGML_LOG_INFO("ggml_opencl: device max workgroup size: %lu\n", backend_ctx->max_workgroup_size);
|
||||
|
||||
// Check SVM.
|
||||
cl_device_svm_capabilities svm_caps;
|
||||
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &svm_caps, 0));
|
||||
GGML_LOG_INFO("ggml_opencl: SVM coarse grain buffer support: %s\n",
|
||||
svm_caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER ? "true" : "false");
|
||||
GGML_LOG_INFO("ggml_opencl: SVM fine grain buffer support: %s\n",
|
||||
svm_caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER ? "true" : "false");
|
||||
GGML_LOG_INFO("ggml_opencl: SVM fine grain system support: %s\n",
|
||||
svm_caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM ? "true" : "false");
|
||||
GGML_LOG_INFO("ggml_opencl: SVM atomics support: %s\n",
|
||||
svm_caps & CL_DEVICE_SVM_ATOMICS ? "true" : "false");
|
||||
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &backend_ctx->max_alloc_size, NULL));
|
||||
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof(size_t), &backend_ctx->image_max_buffer_size, NULL));
|
||||
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &backend_ctx->image2d_max_width, NULL));
|
||||
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &backend_ctx->image2d_max_height, NULL));
|
||||
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &backend_ctx->max_workgroup_size, NULL));
|
||||
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &backend_ctx->svm_caps, 0));
|
||||
|
||||
if (opencl_c_version.major >= 3) {
|
||||
// Assume it is not available for 3.0, since it is optional in 3.0.
|
||||
|
|
@ -3931,36 +3973,15 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
|
|||
backend_ctx->non_uniform_workgroups = true;
|
||||
}
|
||||
|
||||
// Print out configurations
|
||||
#ifdef GGML_OPENCL_SOA_Q
|
||||
GGML_LOG_INFO("ggml_opencl: flattening quantized weights representation as struct of arrays (GGML_OPENCL_SOA_Q)\n");
|
||||
#endif // GGML_OPENCL_SOA_Q
|
||||
|
||||
#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
|
||||
GGML_LOG_INFO("ggml_opencl: using kernels optimized for Adreno (GGML_OPENCL_USE_ADRENO_KERNELS)\n");
|
||||
#endif // GGML_OPENCL_USE_ADRENO_KERNELS
|
||||
|
||||
#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
|
||||
// determine whether to use Adreno xmem GEMM
|
||||
backend_ctx->adreno_xmem_gemm_enabled = getenv("GGML_OPENCL_ADRENO_XMEM_GEMM") != nullptr &&
|
||||
backend_ctx->gpu_family == GPU_FAMILY::ADRENO;
|
||||
if (getenv("GGML_OPENCL_ADRENO_XMEM_GEMM") != nullptr) {
|
||||
GGML_LOG_INFO("ggml_opencl: Adreno xmem F16xF32 GEMM %s\n",
|
||||
backend_ctx->adreno_xmem_gemm_enabled ?
|
||||
"enabled (temporary weight prepack)" : "requested but unsupported by this driver");
|
||||
}
|
||||
#endif // GGML_OPENCL_USE_ADRENO_KERNELS
|
||||
#endif
|
||||
|
||||
// determine whether to use large buffer for Adreno
|
||||
backend_ctx->adreno_use_large_buffer = getenv("GGML_OPENCL_ADRENO_USE_LARGE_BUFFER") != nullptr &&
|
||||
backend_ctx->gpu_family == GPU_FAMILY::ADRENO;
|
||||
if (backend_ctx->adreno_use_large_buffer) {
|
||||
if (!backend_ctx->adreno_has_large_buffer) {
|
||||
GGML_LOG_INFO("ggml_opencl: Adreno large buffer requested but not supported by driver, will use regular buffer\n");
|
||||
backend_ctx->adreno_use_large_buffer = false;
|
||||
} else {
|
||||
GGML_LOG_INFO("ggml_opencl: Adreno large buffer enabled\n");
|
||||
}
|
||||
}
|
||||
|
||||
cl_int err;
|
||||
|
||||
|
|
@ -4010,12 +4031,6 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
|
|||
|
||||
backend_ctx->disable_fusion = getenv("GGML_OPENCL_DISABLE_FUSION") != nullptr;
|
||||
|
||||
const char * str_opfilter = getenv("GGML_OPENCL_OPFILTER");
|
||||
if (str_opfilter) {
|
||||
backend_ctx->opfilter = new std::regex(str_opfilter, std::regex_constants::icase);
|
||||
GGML_LOG_INFO("ggml_opencl: opfilter regex = \"%s\"\n", str_opfilter);
|
||||
}
|
||||
|
||||
dev_ctx->backend_ctx = backend_ctx.release();
|
||||
return dev_ctx->backend_ctx;
|
||||
}
|
||||
|
|
@ -4825,7 +4840,7 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te
|
|||
ggml_backend_opencl_context * backend_ctx = dev_ctx->backend_ctx;
|
||||
|
||||
// reject ops that match the opfilter regex
|
||||
if (backend_ctx->opfilter && std::regex_match(std::string(ggml_op_desc(op)), *backend_ctx->opfilter)) {
|
||||
if (dev_ctx->opfilter && std::regex_match(std::string(ggml_op_desc(op)), *dev_ctx->opfilter)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -7823,6 +7838,8 @@ static ggml_backend_t ggml_backend_opencl_device_init(ggml_backend_dev_t dev, co
|
|||
/* .context = */ backend_ctx,
|
||||
};
|
||||
|
||||
ggml_backend_opencl_device_context * dev_ctx = (ggml_backend_opencl_device_context *) dev->context;
|
||||
ggml_opencl_print_backend_info(dev_ctx);
|
||||
return backend;
|
||||
|
||||
GGML_UNUSED(params);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue