mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-14 02:49:41 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # common/CMakeLists.txt # docs/backend/SYCL.md # ggml/CMakeLists.txt # ggml/src/ggml-sycl/CMakeLists.txt # ggml/src/ggml-sycl/binbcast.cpp # ggml/src/ggml-sycl/convert.cpp # ggml/src/ggml-sycl/dequantize.hpp # ggml/src/ggml-sycl/dmmv.cpp # ggml/src/ggml-sycl/gemm.hpp # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-sycl/mmvq.cpp # ggml/src/ggml-sycl/vecdotq.hpp # ggml/src/ggml-vulkan/CMakeLists.txt # ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt # ggml/src/gguf.cpp # scripts/compare-llama-bench.py # tests/CMakeLists.txt # tests/test-chat.cpp # tools/llama-bench/llama-bench.cpp # tools/server/README.md
This commit is contained in:
commit
e5d26a2356
47 changed files with 2671 additions and 504 deletions
|
@ -14,6 +14,12 @@
|
|||
#include <thread>
|
||||
#include <unordered_map>
|
||||
|
||||
// Quantization types. Changes to this struct must be replicated in quantize.cpp
|
||||
struct tensor_quantization {
|
||||
std::string name;
|
||||
ggml_type quant = GGML_TYPE_COUNT;
|
||||
};
|
||||
|
||||
static void zeros(std::ofstream & file, size_t n) {
|
||||
char zero = 0;
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
|
@ -48,12 +54,6 @@ struct quantize_state_impl {
|
|||
{}
|
||||
};
|
||||
|
||||
// changes to this struct must be replicated in quantize.cpp
|
||||
struct tensor_quantization {
|
||||
std::string name;
|
||||
ggml_type quant = GGML_TYPE_COUNT;
|
||||
};
|
||||
|
||||
static void llama_tensor_dequantize_impl(
|
||||
ggml_tensor * tensor, std::vector<no_init<float>> & output, std::vector<std::thread> & workers,
|
||||
const size_t nelements, const int nthread
|
||||
|
@ -799,17 +799,19 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
|||
// unless the user specifies a type
|
||||
if (params->tensor_types) {
|
||||
const std::vector<tensor_quantization> & tensor_types = *static_cast<const std::vector<tensor_quantization> *>(params->tensor_types);
|
||||
const std::string tensor_name(tensor->name);
|
||||
for (const auto & [tname, qtype] : tensor_types) {
|
||||
if (std::regex pattern(tname); std::regex_search(tensor->name, pattern)) {
|
||||
if (qtype != new_type) {
|
||||
LLAMA_LOG_DEBUG("(overriding %s -> %s), ", ggml_type_name(new_type), ggml_type_name(qtype));
|
||||
if (std::regex pattern(tname); std::regex_search(tensor_name, pattern)) {
|
||||
if (qtype != new_type) {
|
||||
LLAMA_LOG_DEBUG("(overriding %s) ", ggml_type_name(new_type));
|
||||
new_type = qtype;
|
||||
break; // if two or more types are specified for the tensor, first match wins
|
||||
}
|
||||
new_type = qtype;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (params->token_embedding_type < GGML_TYPE_COUNT && strcmp(tensor->name, "token_embd.weight") == 0) {
|
||||
new_type = params->token_embedding_type;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue