mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-09 11:00:40 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/vulkan.Dockerfile # .github/workflows/build-cache.yml # .github/workflows/build-cmake-pkg.yml # .github/workflows/build-linux-cross.yml # .github/workflows/build.yml # .github/workflows/release.yml # .github/workflows/server-self-hosted.yml # .github/workflows/server-webui.yml # ggml/src/ggml-hexagon/ggml-hexagon.cpp # ggml/src/ggml-hexagon/htp/matmul-ops.c # tests/test-backend-ops.cpp
This commit is contained in:
commit
893b8abc21
6 changed files with 82 additions and 25 deletions
|
|
@ -7620,6 +7620,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
if (!layer.wo_s && layer.wo) {
|
||||
layer.wo_s = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.wqkv_s && layer.wqkv) {
|
||||
layer.wqkv_s = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.wqkv_gate_s && layer.wqkv_gate) {
|
||||
layer.wqkv_gate_s = create_tensor(tn(LLM_TENSOR_ATTN_GATE, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
|
||||
// dense FFN weight scales (per-tensor, shape {1})
|
||||
if (!layer.ffn_gate_s && layer.ffn_gate) {
|
||||
|
|
@ -7631,6 +7637,15 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
if (!layer.ffn_up_s && layer.ffn_up) {
|
||||
layer.ffn_up_s = create_tensor(tn(LLM_TENSOR_FFN_UP, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.ffn_gate_shexp_s && layer.ffn_gate_shexp) {
|
||||
layer.ffn_gate_shexp_s = create_tensor(tn(LLM_TENSOR_FFN_GATE_SHEXP, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.ffn_down_shexp_s && layer.ffn_down_shexp) {
|
||||
layer.ffn_down_shexp_s = create_tensor(tn(LLM_TENSOR_FFN_DOWN_SHEXP, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.ffn_up_shexp_s && layer.ffn_up_shexp) {
|
||||
layer.ffn_up_shexp_s = create_tensor(tn(LLM_TENSOR_FFN_UP_SHEXP, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
|
||||
// MoE expert weight scales (per-expert, shape {n_expert})
|
||||
if (!layer.ffn_gate_exps_s && layer.ffn_gate_exps) {
|
||||
|
|
@ -7642,6 +7657,17 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
if (!layer.ffn_up_exps_s && layer.ffn_up_exps) {
|
||||
layer.ffn_up_exps_s = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "scale", i), {n_expert}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
|
||||
// recurrent / linear-attention weight scales (per-tensor, shape {1})
|
||||
if (!layer.ssm_out_s && layer.ssm_out) {
|
||||
layer.ssm_out_s = create_tensor(tn(LLM_TENSOR_SSM_OUT, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.ssm_alpha_s && layer.ssm_alpha) {
|
||||
layer.ssm_alpha_s = create_tensor(tn(LLM_TENSOR_SSM_ALPHA, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.ssm_beta_s && layer.ssm_beta) {
|
||||
layer.ssm_beta_s = create_tensor(tn(LLM_TENSOR_SSM_BETA, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue