mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-12 09:59:41 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # Makefile # README.md # common/CMakeLists.txt # common/common.cpp # common/common.h # examples/embedding/embedding.cpp # examples/imatrix/imatrix.cpp # examples/infill/infill.cpp # examples/parallel/parallel.cpp # examples/perplexity/perplexity.cpp # examples/rpc/README.md # examples/save-load-state/save-load-state.cpp # examples/server/README.md # examples/speculative/speculative.cpp # tests/test-sampling.cpp
This commit is contained in:
commit
a947558e0e
45 changed files with 2628 additions and 6743 deletions
|
@ -9317,7 +9317,7 @@ static struct ggml_tensor * llm_build_copy_mask_state(
|
|||
// FIXME: zero-out NANs?
|
||||
states = ggml_mul(ctx, states, state_mask);
|
||||
|
||||
// copy states which won't be changed further (between n_seqs and n_rs)
|
||||
// copy states which won't be changed further (between n_seqs and n_kv)
|
||||
ggml_build_forward_expand(graph,
|
||||
ggml_cpy(ctx,
|
||||
ggml_view_1d(ctx, states, n_state*(n_kv - n_seqs), n_seqs*n_state*ggml_element_size(states)),
|
||||
|
@ -17607,6 +17607,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
quantize &= name.find("time_mix_first.weight") == std::string::npos;
|
||||
quantize &= name.find("time_mix_w1.weight") == std::string::npos;
|
||||
quantize &= name.find("time_mix_w2.weight") == std::string::npos;
|
||||
quantize &= name.find("time_mix_decay_w1.weight") == std::string::npos;
|
||||
quantize &= name.find("time_mix_decay_w2.weight") == std::string::npos;
|
||||
|
||||
// do not quantize relative position bias (T5)
|
||||
quantize &= name.find("attn_rel_b.weight") == std::string::npos;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue