Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.devops/tools.sh
#	.devops/vulkan.Dockerfile
#	.github/workflows/build.yml
#	.github/workflows/docker.yml
#	.github/workflows/server.yml
#	Makefile
#	README.md
#	cmake/llama-config.cmake.in
#	common/CMakeLists.txt
#	examples/gbnf-validator/gbnf-validator.cpp
#	examples/run/run.cpp
#	examples/server/README.md
#	examples/server/tests/README.md
#	ggml/src/CMakeLists.txt
#	ggml/src/ggml-hip/CMakeLists.txt
#	scripts/sync-ggml.last
#	tests/CMakeLists.txt
#	tests/test-backend-ops.cpp
#	tests/test-chat-template.cpp
#	tests/test-grammar-integration.cpp
This commit is contained in:
Concedo 2025-02-01 17:14:59 +08:00
commit f13498df13
75 changed files with 1257924 additions and 701572 deletions

View file

@ -7736,17 +7736,13 @@ struct llm_build_context {
1
);
struct ggml_tensor * last_norm_att = ggml_view_3d(ctx0, x_norm_att, n_embd, 1, n_seqs, x_norm_att->nb[1], x_norm_att->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(x_norm_att));
ggml_build_forward_expand(
gf,
ggml_cpy(
ctx0,
wkv_states,
ggml_view_1d(
ctx0,
kv_self.v_l[il],
hparams.n_embd_v_s() * n_seqs,
hparams.n_embd_v_s() * kv_head * ggml_element_size(kv_self.v_l[il])
)
ggml_view_1d(ctx0, last_norm_att, n_embd * n_seqs, 0),
ggml_view_1d(ctx0, kv_self.k_l[il], hparams.n_embd_k_s() * n_seqs, hparams.n_embd_k_s() * kv_head * ggml_element_size(kv_self.k_l[il]))
)
);