mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/docker.yml # CMakeLists.txt # CONTRIBUTING.md # docs/android.md # docs/docker.md # examples/embedding/embedding.cpp # examples/imatrix/imatrix.cpp # examples/infill/infill.cpp # examples/llama-bench/llama-bench.cpp # examples/main/README.md # examples/parallel/parallel.cpp # examples/perplexity/perplexity.cpp # examples/quantize-stats/quantize-stats.cpp # examples/save-load-state/save-load-state.cpp # examples/server/README.md # examples/simple/CMakeLists.txt # examples/speculative/speculative.cpp # flake.lock # ggml/src/CMakeLists.txt # ggml/src/ggml-blas.cpp # pocs/vdot/q8dot.cpp # pocs/vdot/vdot.cpp # scripts/debug-test.sh # scripts/sync-ggml.last # src/llama.cpp # tests/test-backend-ops.cpp # tests/test-chat-template.cpp # tests/test-quantize-fns.cpp # tests/test-quantize-perf.cpp # tests/test-tokenizer-0.cpp # tests/test-tokenizer-1-bpe.cpp # tests/test-tokenizer-1-spm.cpp
This commit is contained in:
commit
e692a79aab
61 changed files with 2579 additions and 1949 deletions
|
@ -463,6 +463,7 @@ enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device) {
|
|||
}
|
||||
|
||||
void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props) {
|
||||
memset(props, 0, sizeof(*props));
|
||||
device->iface.get_props(device, props);
|
||||
}
|
||||
|
||||
|
@ -479,6 +480,10 @@ ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t devic
|
|||
}
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device) {
|
||||
if (device->iface.get_host_buffer_type == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return device->iface.get_host_buffer_type(device);
|
||||
}
|
||||
|
||||
|
@ -495,7 +500,11 @@ bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buff
|
|||
}
|
||||
|
||||
bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op) {
|
||||
return device->iface.offload_op(device, op);
|
||||
if (device->iface.offload_op != NULL) {
|
||||
return device->iface.offload_op(device, op);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Backend (reg)
|
||||
|
@ -525,6 +534,18 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
|
|||
#include "ggml-cuda.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_METAL
|
||||
#include "ggml-metal.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_BLAS
|
||||
#include "ggml-blas.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_RPC
|
||||
#include "ggml-rpc.h"
|
||||
#endif
|
||||
|
||||
struct ggml_backend_registry {
|
||||
std::vector<ggml_backend_reg_t> backends;
|
||||
std::vector<ggml_backend_dev_t> devices;
|
||||
|
@ -533,10 +554,19 @@ struct ggml_backend_registry {
|
|||
#ifdef GGML_USE_CUDA
|
||||
register_backend(ggml_backend_cuda_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_METAL
|
||||
register_backend(ggml_backend_metal_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_BLAS
|
||||
register_backend(ggml_backend_blas_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_RPC
|
||||
register_backend(ggml_backend_rpc_reg());
|
||||
#endif
|
||||
|
||||
// TODO: sycl, vulkan, kompute, cann
|
||||
|
||||
register_backend(ggml_backend_cpu_reg());
|
||||
|
||||
// TODO: sycl, metal, vulkan, kompute, cann
|
||||
}
|
||||
|
||||
void register_backend(ggml_backend_reg_t reg) {
|
||||
|
@ -1118,9 +1148,10 @@ static void ggml_backend_cpu_device_get_props(ggml_backend_dev_t dev, struct ggm
|
|||
props->type = ggml_backend_cpu_device_get_type(dev);
|
||||
ggml_backend_cpu_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
||||
props->caps = {
|
||||
/* async */ false,
|
||||
/* host_buffer */ false,
|
||||
/* events */ false,
|
||||
/* .async = */ false,
|
||||
/* .host_buffer = */ false,
|
||||
/* .buffer_from_host_ptr = */ true,
|
||||
/* .events = */ false,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -1153,7 +1184,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
|
|||
op->type != GGML_TYPE_IQ1_S &&
|
||||
op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
|
||||
case GGML_OP_MUL_MAT:
|
||||
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_internal_get_type_traits(op->src[0]->type).vec_dot_type;
|
||||
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_get_type_traits(op->src[0]->type)->vec_dot_type;
|
||||
case GGML_OP_ROPE_BACK:
|
||||
return op->src[2] == NULL && (op->op_params[2] & 4) == 0;
|
||||
case GGML_OP_IM2COL_BACK:
|
||||
|
@ -1216,16 +1247,22 @@ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg
|
|||
};
|
||||
|
||||
return &ggml_backend_cpu_device;
|
||||
}
|
||||
|
||||
static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
||||
if (strcmp(name, "ggml_backend_set_n_threads") == 0) {
|
||||
return (void *)ggml_backend_cpu_set_n_threads;
|
||||
}
|
||||
return NULL;
|
||||
|
||||
GGML_UNUSED(reg);
|
||||
GGML_UNUSED(index);
|
||||
}
|
||||
|
||||
static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
|
||||
/* .get_name = */ ggml_backend_cpu_reg_get_name,
|
||||
/* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,
|
||||
/* .get_device = */ ggml_backend_cpu_reg_get_device,
|
||||
/* .get_proc_address = */ NULL,
|
||||
/* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
|
||||
};
|
||||
|
||||
ggml_backend_reg_t ggml_backend_cpu_reg(void) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue