Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/workflows/docker.yml # CMakeLists.txt # CONTRIBUTING.md # docs/android.md # docs/docker.md # examples/embedding/embedding.cpp # examples/imatrix/imatrix.cpp # examples/infill/infill.cpp # examples/llama-bench/llama-bench.cpp # examples/main/README.md # examples/parallel/parallel.cpp # examples/perplexity/perplexity.cpp # examples/quantize-stats/quantize-stats.cpp # examples/save-load-state/save-load-state.cpp # examples/server/README.md # examples/simple/CMakeLists.txt # examples/speculative/speculative.cpp # flake.lock # ggml/src/CMakeLists.txt # ggml/src/ggml-blas.cpp # pocs/vdot/q8dot.cpp # pocs/vdot/vdot.cpp # scripts/debug-test.sh # scripts/sync-ggml.last # src/llama.cpp # tests/test-backend-ops.cpp # tests/test-chat-template.cpp # tests/test-quantize-fns.cpp # tests/test-quantize-perf.cpp # tests/test-tokenizer-0.cpp # tests/test-tokenizer-1-bpe.cpp # tests/test-tokenizer-1-spm.cpp
2025-09-10 17:14:36 +00:00 · 2024-10-11 11:59:59 +08:00 · 2024-10-11 11:59:59 +08:00 · e692a79aab
commit e692a79aab
parent 5ad826b82a 7eee341bee
61 changed files with 2579 additions and 1949 deletions
--- a/otherarch/sdcpp/model.cpp
+++ b/otherarch/sdcpp/model.cpp
@ -580,25 +580,25 @@ void convert_tensor(void* src,
        if (src_type == GGML_TYPE_F16) {
            ggml_fp16_to_fp32_row((ggml_fp16_t*)src, (float*)dst, n);
        } else {
-            auto qtype = ggml_internal_get_type_traits(src_type);
-            if (qtype.to_float == NULL) {
+            auto qtype = ggml_get_type_traits(src_type);
+            if (qtype->to_float == NULL) {
                throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available",
                                                ggml_type_name(src_type)));
            }
-            qtype.to_float(src, (float*)dst, n);
+            qtype->to_float(src, (float*)dst, n);
        }
    } else {
        // src_type == GGML_TYPE_F16 => dst_type is quantized
        // src_type is quantized => dst_type == GGML_TYPE_F16 or dst_type is quantized
-        auto qtype = ggml_internal_get_type_traits(src_type);
-        if (qtype.to_float == NULL) {
+        auto qtype = ggml_get_type_traits(src_type);
+        if (qtype->to_float == NULL) {
            throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available",
                                            ggml_type_name(src_type)));
        }
        std::vector<char> buf;
        buf.resize(sizeof(float) * n);
        char* src_data_f32 = buf.data();
-        qtype.to_float(src, (float*)src_data_f32, n);
+        qtype->to_float(src, (float*)src_data_f32, n);
        if (dst_type == GGML_TYPE_F16) {
            ggml_fp32_to_fp16_row((float*)src_data_f32, (ggml_fp16_t*)dst, n);
        } else {