Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/docker.yml
#	CMakeLists.txt
#	CONTRIBUTING.md
#	docs/android.md
#	docs/docker.md
#	examples/embedding/embedding.cpp
#	examples/imatrix/imatrix.cpp
#	examples/infill/infill.cpp
#	examples/llama-bench/llama-bench.cpp
#	examples/main/README.md
#	examples/parallel/parallel.cpp
#	examples/perplexity/perplexity.cpp
#	examples/quantize-stats/quantize-stats.cpp
#	examples/save-load-state/save-load-state.cpp
#	examples/server/README.md
#	examples/simple/CMakeLists.txt
#	examples/speculative/speculative.cpp
#	flake.lock
#	ggml/src/CMakeLists.txt
#	ggml/src/ggml-blas.cpp
#	pocs/vdot/q8dot.cpp
#	pocs/vdot/vdot.cpp
#	scripts/debug-test.sh
#	scripts/sync-ggml.last
#	src/llama.cpp
#	tests/test-backend-ops.cpp
#	tests/test-chat-template.cpp
#	tests/test-quantize-fns.cpp
#	tests/test-quantize-perf.cpp
#	tests/test-tokenizer-0.cpp
#	tests/test-tokenizer-1-bpe.cpp
#	tests/test-tokenizer-1-spm.cpp
This commit is contained in:
Concedo 2024-10-11 11:59:59 +08:00
commit e692a79aab
61 changed files with 2579 additions and 1949 deletions

View file

@ -580,25 +580,25 @@ void convert_tensor(void* src,
if (src_type == GGML_TYPE_F16) {
ggml_fp16_to_fp32_row((ggml_fp16_t*)src, (float*)dst, n);
} else {
auto qtype = ggml_internal_get_type_traits(src_type);
if (qtype.to_float == NULL) {
auto qtype = ggml_get_type_traits(src_type);
if (qtype->to_float == NULL) {
throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available",
ggml_type_name(src_type)));
}
qtype.to_float(src, (float*)dst, n);
qtype->to_float(src, (float*)dst, n);
}
} else {
// src_type == GGML_TYPE_F16 => dst_type is quantized
// src_type is quantized => dst_type == GGML_TYPE_F16 or dst_type is quantized
auto qtype = ggml_internal_get_type_traits(src_type);
if (qtype.to_float == NULL) {
auto qtype = ggml_get_type_traits(src_type);
if (qtype->to_float == NULL) {
throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available",
ggml_type_name(src_type)));
}
std::vector<char> buf;
buf.resize(sizeof(float) * n);
char* src_data_f32 = buf.data();
qtype.to_float(src, (float*)src_data_f32, n);
qtype->to_float(src, (float*)src_data_f32, n);
if (dst_type == GGML_TYPE_F16) {
ggml_fp32_to_fp16_row((float*)src_data_f32, (ggml_fp16_t*)dst, n);
} else {