mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-12 01:54:37 +00:00
Rewrite history to fix bad vulkan shader commits without increasing repo size
added dpe colab (+8 squashed commit) Squashed commit: [b8362da4] updated lite [ed6c037d] move nsigma into the regular sampler stack [ac5f61c6] relative filepath fixed [05fe96ab] export template [ed0a5a3e] nix_example.md: refactor (#1401) * nix_example.md: add override example * nix_example.md: drop graphics example, already basic nixos knowledge * nix_example.md: format * nix_example.md: Vulkan is disabled on macOS Disabled in:1ccd253acc
* nix_examples.md: nixpkgs.config.cuda{Arches -> Capabilities} Fixes: https://github.com/LostRuins/koboldcpp/issues/1367 [675c62f7] AutoGuess: Phi 4 (mini) (#1402) [4bf56982
] phrasing [b8c0df04
] Add Rep Pen to Top N Sigma sampler chain (#1397) - place after nsigma and before xtc (+3 squashed commit) Squashed commit: [87c52b97
] disable VMM from HIP [ee8906f3
] edit description [e85c0e69
] Remove Unnecessary Rep Counting (#1394) * stop counting reps * fix range-based initializer * strike that - reverse it
This commit is contained in:
parent
50eae1ffeb
commit
6b7d2349a7
114 changed files with 6666 additions and 2642 deletions
|
@ -2297,13 +2297,16 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
} break;
|
||||
case LLM_ARCH_PHI3:
|
||||
{
|
||||
const int64_t n_embd_head = n_embd / n_head;
|
||||
|
||||
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), { n_embd, n_vocab }, 0);
|
||||
|
||||
// output
|
||||
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), { n_embd }, 0);
|
||||
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), { n_embd, n_vocab }, 0);
|
||||
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
|
||||
|
||||
// if output is NULL, init from the input tok embed
|
||||
if (output == NULL) {
|
||||
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_layer; ++i) {
|
||||
auto & layer = layers[i];
|
||||
|
@ -2318,8 +2321,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd }, 0);
|
||||
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), { n_embd, 2 * n_ff }, 0);
|
||||
|
||||
layer.rope_long = create_tensor(tn(LLM_TENSOR_ROPE_FACTORS_LONG, "weight", i), { n_embd_head/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
|
||||
layer.rope_short = create_tensor(tn(LLM_TENSOR_ROPE_FACTORS_SHORT, "weight", i), { n_embd_head/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
|
||||
layer.rope_long = create_tensor(tn(LLM_TENSOR_ROPE_FACTORS_LONG, "weight", i), { n_rot/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
|
||||
layer.rope_short = create_tensor(tn(LLM_TENSOR_ROPE_FACTORS_SHORT, "weight", i), { n_rot/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_PHIMOE:
|
||||
|
@ -3937,6 +3940,10 @@ int32_t llama_model_n_head(const struct llama_model * model) {
|
|||
return model->hparams.n_head();
|
||||
}
|
||||
|
||||
int32_t llama_model_n_head_kv(const struct llama_model * model) {
|
||||
return model->hparams.n_head_kv();
|
||||
}
|
||||
|
||||
// deprecated
|
||||
int32_t llama_n_ctx_train(const struct llama_model * model) {
|
||||
return llama_model_n_ctx_train(model);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue