mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/docker.yml # CMakeLists.txt # CONTRIBUTING.md # docs/android.md # docs/docker.md # examples/embedding/embedding.cpp # examples/imatrix/imatrix.cpp # examples/infill/infill.cpp # examples/llama-bench/llama-bench.cpp # examples/main/README.md # examples/parallel/parallel.cpp # examples/perplexity/perplexity.cpp # examples/quantize-stats/quantize-stats.cpp # examples/save-load-state/save-load-state.cpp # examples/server/README.md # examples/simple/CMakeLists.txt # examples/speculative/speculative.cpp # flake.lock # ggml/src/CMakeLists.txt # ggml/src/ggml-blas.cpp # pocs/vdot/q8dot.cpp # pocs/vdot/vdot.cpp # scripts/debug-test.sh # scripts/sync-ggml.last # src/llama.cpp # tests/test-backend-ops.cpp # tests/test-chat-template.cpp # tests/test-quantize-fns.cpp # tests/test-quantize-perf.cpp # tests/test-tokenizer-0.cpp # tests/test-tokenizer-1-bpe.cpp # tests/test-tokenizer-1-spm.cpp
This commit is contained in:
commit
e692a79aab
61 changed files with 2579 additions and 1949 deletions
|
@ -580,25 +580,25 @@ void convert_tensor(void* src,
|
|||
if (src_type == GGML_TYPE_F16) {
|
||||
ggml_fp16_to_fp32_row((ggml_fp16_t*)src, (float*)dst, n);
|
||||
} else {
|
||||
auto qtype = ggml_internal_get_type_traits(src_type);
|
||||
if (qtype.to_float == NULL) {
|
||||
auto qtype = ggml_get_type_traits(src_type);
|
||||
if (qtype->to_float == NULL) {
|
||||
throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available",
|
||||
ggml_type_name(src_type)));
|
||||
}
|
||||
qtype.to_float(src, (float*)dst, n);
|
||||
qtype->to_float(src, (float*)dst, n);
|
||||
}
|
||||
} else {
|
||||
// src_type == GGML_TYPE_F16 => dst_type is quantized
|
||||
// src_type is quantized => dst_type == GGML_TYPE_F16 or dst_type is quantized
|
||||
auto qtype = ggml_internal_get_type_traits(src_type);
|
||||
if (qtype.to_float == NULL) {
|
||||
auto qtype = ggml_get_type_traits(src_type);
|
||||
if (qtype->to_float == NULL) {
|
||||
throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available",
|
||||
ggml_type_name(src_type)));
|
||||
}
|
||||
std::vector<char> buf;
|
||||
buf.resize(sizeof(float) * n);
|
||||
char* src_data_f32 = buf.data();
|
||||
qtype.to_float(src, (float*)src_data_f32, n);
|
||||
qtype->to_float(src, (float*)src_data_f32, n);
|
||||
if (dst_type == GGML_TYPE_F16) {
|
||||
ggml_fp32_to_fp16_row((float*)src_data_f32, (ggml_fp16_t*)dst, n);
|
||||
} else {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue