Merge branch 'upstream' into concedo_experimental

# Conflicts: # common/CMakeLists.txt # docs/backend/SYCL.md # ggml/CMakeLists.txt # ggml/src/ggml-sycl/CMakeLists.txt # ggml/src/ggml-sycl/binbcast.cpp # ggml/src/ggml-sycl/convert.cpp # ggml/src/ggml-sycl/dequantize.hpp # ggml/src/ggml-sycl/dmmv.cpp # ggml/src/ggml-sycl/gemm.hpp # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-sycl/mmvq.cpp # ggml/src/ggml-sycl/vecdotq.hpp # ggml/src/ggml-vulkan/CMakeLists.txt # ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt # ggml/src/gguf.cpp # scripts/compare-llama-bench.py # tests/CMakeLists.txt # tests/test-chat.cpp # tools/llama-bench/llama-bench.cpp # tools/server/README.md
2025-09-11 09:34:37 +00:00 · 2025-05-16 15:30:31 +08:00 · 2025-05-16 15:30:31 +08:00 · e5d26a2356
commit e5d26a2356
parent 6cafc0e73e bc098c3cf0
47 changed files with 2671 additions and 504 deletions
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@ -12322,6 +12322,9 @@ struct llm_build_granite : public llm_graph_context {

        // inp_pos - built only if rope enabled
        ggml_tensor * inp_pos = nullptr;
+        if (use_rope) {
+            inp_pos = build_inp_pos();
+        }

        auto * inp_attn = build_attn_inp_kv_unified();

@ -12364,10 +12367,6 @@ struct llm_build_granite : public llm_graph_context {
                Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);

                if (use_rope) {
-
-                    if (!inp_pos) {
-                        inp_pos = build_inp_pos();
-                    }
                    ggml_tensor * rope_factors = model.get_rope_factors(n_ctx_per_seq, il);
                    Qcur = ggml_rope_ext(
                            ctx0, Qcur, inp_pos, rope_factors,