mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-18 06:19:19 +00:00
model : NvFP4 quantized LM head support (#23046)
* NvFP4 quantized LM head support Signed-off-by: ynankani <ynankani@nvidia.com> * Address review commnets Signed-off-by: ynankani <ynankani@nvidia.com> * Add assert for NvFp4 lm head and tied embeddings Signed-off-by: ynankani <ynankani@nvidia.com> * Address review commnets Signed-off-by: ynankani <ynankani@nvidia.com> * Create output_s tensor only when LM head NvFp4 Signed-off-by: ynankani <ynankani@nvidia.com> --------- Signed-off-by: ynankani <ynankani@nvidia.com>
This commit is contained in:
parent
59778f0196
commit
42928bc14d
103 changed files with 121 additions and 101 deletions
|
|
@ -142,7 +142,7 @@ llama_model_bloom::graph::graph(const llama_model & model, const llm_graph_param
|
|||
cb(cur, "result_norm", -1);
|
||||
res->t_embd = cur;
|
||||
|
||||
cur = build_lora_mm(model.output, cur);
|
||||
cur = build_lora_mm(model.output, cur, model.output_s);
|
||||
|
||||
cb(cur, "result_output", -1);
|
||||
res->t_logits = cur;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue