mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # tests/test-backend-ops.cpp
This commit is contained in:
commit
d383c03554
4 changed files with 182 additions and 88 deletions
|
@ -321,7 +321,7 @@ static vk_device_architecture get_device_architecture(const vk::PhysicalDevice&
|
|||
}
|
||||
|
||||
struct vk_device_struct {
|
||||
std::mutex mutex;
|
||||
std::recursive_mutex mutex;
|
||||
|
||||
vk::PhysicalDevice physical_device;
|
||||
vk::PhysicalDeviceProperties properties;
|
||||
|
@ -1213,7 +1213,7 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
|
|||
}
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(device->mutex);
|
||||
std::lock_guard<std::recursive_mutex> guard(device->mutex);
|
||||
device->pipelines.insert({ pipeline->name, pipeline });
|
||||
}
|
||||
|
||||
|
@ -1427,7 +1427,7 @@ static uint32_t ggml_vk_find_queue_family_index(std::vector<vk::QueueFamilyPrope
|
|||
|
||||
static void ggml_vk_create_queue(vk_device& device, vk_queue& q, uint32_t queue_family_index, uint32_t queue_index, vk::PipelineStageFlags&& stage_flags, bool transfer_only) {
|
||||
VK_LOG_DEBUG("ggml_vk_create_queue()");
|
||||
std::lock_guard<std::mutex> guard(device->mutex);
|
||||
std::lock_guard<std::recursive_mutex> guard(device->mutex);
|
||||
|
||||
q.queue_family_index = queue_family_index;
|
||||
q.transfer_only = transfer_only;
|
||||
|
@ -4148,6 +4148,7 @@ static void * ggml_vk_host_malloc(vk_device& device, size_t size) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
std::lock_guard<std::recursive_mutex> guard(device->mutex);
|
||||
device->pinned_memory.push_back(std::make_tuple(buf->ptr, size, buf));
|
||||
|
||||
return buf->ptr;
|
||||
|
@ -4158,6 +4159,8 @@ static void ggml_vk_host_free(vk_device& device, void* ptr) {
|
|||
return;
|
||||
}
|
||||
VK_LOG_MEMORY("ggml_vk_host_free(" << ptr << ")");
|
||||
std::lock_guard<std::recursive_mutex> guard(device->mutex);
|
||||
|
||||
vk_buffer buf;
|
||||
size_t index;
|
||||
for (size_t i = 0; i < device->pinned_memory.size(); i++) {
|
||||
|
@ -4180,6 +4183,7 @@ static void ggml_vk_host_free(vk_device& device, void* ptr) {
|
|||
}
|
||||
|
||||
static void ggml_vk_host_get(vk_device& device, const void * ptr, vk_buffer& buf, size_t& buf_offset) {
|
||||
std::lock_guard<std::recursive_mutex> guard(device->mutex);
|
||||
buf = nullptr;
|
||||
buf_offset = 0;
|
||||
for (size_t i = 0; i < device->pinned_memory.size(); i++) {
|
||||
|
@ -4481,7 +4485,7 @@ static void ggml_vk_buffer_write_2d(vk_buffer& dst, size_t offset, const void *
|
|||
memcpy((uint8_t *)dst->ptr + offset + i * width, (const uint8_t *) src + i * spitch, width);
|
||||
}
|
||||
} else {
|
||||
std::lock_guard<std::mutex> guard(dst->device->mutex);
|
||||
std::lock_guard<std::recursive_mutex> guard(dst->device->mutex);
|
||||
|
||||
vk_context subctx = ggml_vk_create_temporary_context(dst->device->transfer_queue.cmd_pool);
|
||||
ggml_vk_ctx_begin(dst->device, subctx);
|
||||
|
@ -4572,7 +4576,7 @@ static void ggml_vk_buffer_read(vk_buffer& src, size_t offset, void * dst, size_
|
|||
|
||||
memcpy(dst, (uint8_t *) src->ptr + offset, size);
|
||||
} else {
|
||||
std::lock_guard<std::mutex> guard(src->device->mutex);
|
||||
std::lock_guard<std::recursive_mutex> guard(src->device->mutex);
|
||||
|
||||
vk_context subctx = ggml_vk_create_temporary_context(src->device->transfer_queue.cmd_pool);
|
||||
ggml_vk_ctx_begin(src->device, subctx);
|
||||
|
@ -4602,7 +4606,7 @@ static void ggml_vk_buffer_copy_async(vk_context& ctx, vk_buffer& dst, size_t ds
|
|||
|
||||
static void ggml_vk_buffer_copy(vk_buffer& dst, size_t dst_offset, vk_buffer& src, size_t src_offset, size_t size) {
|
||||
if (src->device == dst->device) {
|
||||
std::lock_guard<std::mutex> guard(src->device->mutex);
|
||||
std::lock_guard<std::recursive_mutex> guard(src->device->mutex);
|
||||
VK_LOG_DEBUG("ggml_vk_buffer_copy(SINGLE_DEVICE, " << size << ")");
|
||||
// Copy within the device
|
||||
vk_context subctx = ggml_vk_create_temporary_context(src->device->transfer_queue.cmd_pool);
|
||||
|
@ -4637,7 +4641,7 @@ static void ggml_vk_buffer_memset_async(vk_context& ctx, vk_buffer& dst, size_t
|
|||
static void ggml_vk_buffer_memset(vk_buffer& dst, size_t offset, uint32_t c, size_t size) {
|
||||
VK_LOG_DEBUG("ggml_vk_buffer_memset(" << offset << ", " << c << ", " << size << ")");
|
||||
|
||||
std::lock_guard<std::mutex> guard(dst->device->mutex);
|
||||
std::lock_guard<std::recursive_mutex> guard(dst->device->mutex);
|
||||
vk_context subctx = ggml_vk_create_temporary_context(dst->device->transfer_queue.cmd_pool);
|
||||
ggml_vk_ctx_begin(dst->device, subctx);
|
||||
subctx->s->buffer.fillBuffer(dst->buffer, offset, size, c);
|
||||
|
@ -4864,9 +4868,17 @@ static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const
|
|||
// type size must be exactly 2 or 4.
|
||||
GGML_ASSERT(ggml_is_quantized(to) || ggml_type_size(src->type) == 2 || ggml_type_size(src->type) == 4);
|
||||
if ((ggml_type_size(src->type) % 4) == 0) {
|
||||
return ctx->device->pipeline_contig_cpy_f32_f32;
|
||||
if (contig) {
|
||||
return ctx->device->pipeline_contig_cpy_f32_f32;
|
||||
} else {
|
||||
return ctx->device->pipeline_cpy_f32_f32;
|
||||
}
|
||||
} else {
|
||||
return ctx->device->pipeline_contig_cpy_f16_f16;
|
||||
if (contig) {
|
||||
return ctx->device->pipeline_contig_cpy_f16_f16;
|
||||
} else {
|
||||
return ctx->device->pipeline_cpy_f16_f16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4927,7 +4939,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
|
|||
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
||||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3];
|
||||
std::cerr << "), " << (dryrun ? "dryrun" : "") << ")");
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); // NOLINT
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || src0->type == GGML_TYPE_BF16); // NOLINT
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16); // NOLINT
|
||||
|
||||
const uint64_t ne00 = src0->ne[0];
|
||||
|
@ -5155,7 +5167,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
|
|||
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
||||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3];
|
||||
std::cerr << "), " << (dryrun ? "dryrun" : "") << "),)");
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); // NOLINT
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || src0->type == GGML_TYPE_BF16); // NOLINT
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16); // NOLINT
|
||||
|
||||
const uint64_t ne00 = src0->ne[0];
|
||||
|
@ -5756,7 +5768,7 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
|
|||
std::cerr << "), (" << ids << ", name=" << ids->name << ", type=" << ids->type << ", ne0=" << ids->ne[0] << ", ne1=" << ids->ne[1] << ", ne2=" << ids->ne[2] << ", ne3=" << ids->ne[3] << ", nb0=" << ids->nb[0] << ", nb1=" << ids->nb[1] << ", nb2=" << ids->nb[2] << ", nb3=" << ids->nb[3];
|
||||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3];
|
||||
std::cerr << "), " << (dryrun ? "dryrun" : "") << ")");
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); // NOLINT
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || src0->type == GGML_TYPE_BF16); // NOLINT
|
||||
GGML_ASSERT(ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16); // NOLINT
|
||||
GGML_ASSERT(ids->type == GGML_TYPE_I32);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue