Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	common/CMakeLists.txt
#	docs/backend/SYCL.md
#	ggml/CMakeLists.txt
#	ggml/src/ggml-sycl/CMakeLists.txt
#	ggml/src/ggml-sycl/binbcast.cpp
#	ggml/src/ggml-sycl/convert.cpp
#	ggml/src/ggml-sycl/dequantize.hpp
#	ggml/src/ggml-sycl/dmmv.cpp
#	ggml/src/ggml-sycl/gemm.hpp
#	ggml/src/ggml-sycl/ggml-sycl.cpp
#	ggml/src/ggml-sycl/mmvq.cpp
#	ggml/src/ggml-sycl/vecdotq.hpp
#	ggml/src/ggml-vulkan/CMakeLists.txt
#	ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt
#	ggml/src/gguf.cpp
#	scripts/compare-llama-bench.py
#	tests/CMakeLists.txt
#	tests/test-chat.cpp
#	tools/llama-bench/llama-bench.cpp
#	tools/server/README.md
This commit is contained in:
Concedo 2025-05-16 15:30:31 +08:00
commit e5d26a2356
47 changed files with 2671 additions and 504 deletions

View file

@ -12322,6 +12322,9 @@ struct llm_build_granite : public llm_graph_context {
// inp_pos - built only if rope enabled
ggml_tensor * inp_pos = nullptr;
if (use_rope) {
inp_pos = build_inp_pos();
}
auto * inp_attn = build_attn_inp_kv_unified();
@ -12364,10 +12367,6 @@ struct llm_build_granite : public llm_graph_context {
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
if (use_rope) {
if (!inp_pos) {
inp_pos = build_inp_pos();
}
ggml_tensor * rope_factors = model.get_rope_factors(n_ctx_per_seq, il);
Qcur = ggml_rope_ext(
ctx0, Qcur, inp_pos, rope_factors,