Merge branch 'upstream' into concedo_experimental

# Conflicts: # ci/run.sh # ggml/src/CMakeLists.txt # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml-cpu/repack.cpp # ggml/src/ggml-sycl/binbcast.cpp # ggml/src/ggml-sycl/concat.cpp # ggml/src/ggml-sycl/conv.cpp # ggml/src/ggml-sycl/convert.cpp # ggml/src/ggml-sycl/cpy.cpp # ggml/src/ggml-sycl/dmmv.cpp # ggml/src/ggml-sycl/dpct/helper.hpp # ggml/src/ggml-sycl/element_wise.cpp # ggml/src/ggml-sycl/getrows.cpp # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-sycl/gla.cpp # ggml/src/ggml-sycl/im2col.cpp # ggml/src/ggml-sycl/mmq.cpp # ggml/src/ggml-sycl/mmvq.cpp # ggml/src/ggml-sycl/norm.cpp # ggml/src/ggml-sycl/rope.cpp # ggml/src/ggml-sycl/softmax.cpp # ggml/src/ggml-sycl/tsembd.cpp # ggml/src/ggml-sycl/wkv.cpp # tests/test-backend-ops.cpp
2025-09-10 09:04:36 +00:00 · 2025-06-21 00:32:22 +08:00 · 2025-06-21 00:32:22 +08:00 · 4f2fcaa2ef
commit 4f2fcaa2ef
parent c16d672ce4 c959f462a0
21 changed files with 443 additions and 81 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1298,6 +1298,9 @@ std::vector<llama_token> common_tokenize(
    int n_tokens = text.length() + 2 * add_special;
    std::vector<llama_token> result(n_tokens);
    n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
+    if (n_tokens == std::numeric_limits<int32_t>::min()) {
+        throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
+    }
    if (n_tokens < 0) {
        result.resize(-n_tokens);
        int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);