mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
Merge remote-tracking branch 'origin/master' into opencl-dev
This commit is contained in:
commit
49aaf08387
17 changed files with 766 additions and 90 deletions
|
@ -497,16 +497,11 @@ void ggml_cl_init(void) {
|
|||
|
||||
size_t ext_str_size;
|
||||
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &ext_str_size);
|
||||
char* ext_buffer = (char*) malloc(sizeof(char) * ext_str_size);
|
||||
char *ext_buffer = (char *)alloca(ext_str_size + 1);
|
||||
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, ext_str_size, ext_buffer, NULL);
|
||||
ext_buffer[ext_str_size] = '\0'; // ensure it is null terminated
|
||||
// Check if ext_buffer contains cl_khr_fp16
|
||||
for (size_t i = 0; i < ext_str_size - 12; i++) {
|
||||
if (memcmp(ext_buffer + i, "cl_khr_fp16", 11) == 0) {
|
||||
fp16_support = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
free(ext_buffer);
|
||||
fp16_support = strstr(ext_buffer, "cl_khr_fp16") != NULL;
|
||||
fprintf(stderr, "ggml_opencl: device FP16 support: %s\n", fp16_support ? "true" : "false");
|
||||
|
||||
cl_context_properties properties[] = {
|
||||
|
@ -795,7 +790,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|||
size_t d_size;
|
||||
cl_mem d_X;
|
||||
if (src0->backend == GGML_BACKEND_CL) {
|
||||
d_X = *(cl_mem*) src0->data;
|
||||
d_X = (cl_mem) src0->data;
|
||||
} else {
|
||||
d_X = ggml_cl_pool_malloc(sizeof(ggml_fp16_t) * x_ne, &x_size, CL_MEM_READ_ONLY);
|
||||
}
|
||||
|
@ -871,7 +866,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
|||
size_t d_size;
|
||||
cl_mem d_X;
|
||||
if (src0->backend == GGML_BACKEND_CL) {
|
||||
d_X = *(cl_mem*) src0->data;
|
||||
d_X = (cl_mem) src0->data;
|
||||
} else {
|
||||
d_X = ggml_cl_pool_malloc(sizeof(ggml_fp16_t) * x_ne, &x_size, CL_MEM_READ_ONLY);
|
||||
}
|
||||
|
@ -998,7 +993,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
|||
events.emplace_back();
|
||||
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Q, 0, src0, i03, i02, events.data() + ev_idx++));
|
||||
} else if (src0->backend == GGML_BACKEND_CL) {
|
||||
d_Q = *(cl_mem*) src0->data;
|
||||
d_Q = (cl_mem) src0->data;
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
}
|
||||
|
@ -1145,14 +1140,13 @@ void ggml_cl_transform_tensor(ggml_tensor * tensor) {
|
|||
const size_t q_sz = ggml_type_size(type) * ne0 * ne1 * ne2 * ne3 / ggml_blck_size(type);
|
||||
|
||||
size_t q_size;
|
||||
cl_mem* dst = (cl_mem*) malloc(sizeof(cl_mem));
|
||||
*dst = ggml_cl_pool_malloc(q_sz, &q_size, CL_MEM_READ_ONLY);
|
||||
cl_mem dst = ggml_cl_pool_malloc(q_sz, &q_size, CL_MEM_READ_ONLY);
|
||||
|
||||
// copy tensor to device
|
||||
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
||||
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
||||
int i = i3*ne2 + i2;
|
||||
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, *dst, i*ne0*ne1, tensor, i3, i2, NULL));
|
||||
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, dst, i*ne0*ne1, tensor, i3, i2, NULL));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue