Merge commit 'c917b67f06' into concedo_experimental

# Conflicts: # .devops/tools.sh # Makefile # ggml/src/ggml-cuda/mmq.cuh # tests/test-double-float.cpp # tests/test-quantize-fns.cpp # tests/test-quantize-perf.cpp
2025-09-11 09:34:37 +00:00 · 2024-07-14 11:38:20 +08:00 · 2024-07-14 11:38:20 +08:00 · 602661ba49
commit 602661ba49
parent c08309e773 c917b67f06
25 changed files with 1339 additions and 1504 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -5955,13 +5955,6 @@ static bool llm_load_tensors(

    auto & hparams = model.hparams;

-#ifdef GGML_USE_SYCL
-    // disable MoE with SYCL until mul_mat_id is updated
-    if (hparams.n_expert > 0) {
-        n_gpu_layers = 0;
-    }
-#endif
-
    model.split_mode   = split_mode;
    model.main_gpu     = main_gpu;
    model.n_gpu_layers = n_gpu_layers;
@ -21500,7 +21493,7 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token
            size--;
        }
        if (length < (int32_t)size) {
-            return (int32_t) -size;
+            return -(int32_t) size;
        }
        memcpy(buf, token, size);
        return (int32_t) size;