mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # README.md # docs/build.md # docs/development/HOWTO-add-model.md # tests/test-backend-ops.cpp # tests/test-chat-template.cpp
This commit is contained in:
commit
b154bd3671
50 changed files with 189817 additions and 187510 deletions
|
@ -620,7 +620,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
|||
|
||||
qs.n_ffn_down = qs.n_ffn_gate = qs.n_ffn_up = (int)model.hparams.n_layer;
|
||||
|
||||
// sanity checks
|
||||
// sanity checks for models that have attention layers
|
||||
if (qs.n_attention_wv != 0)
|
||||
{
|
||||
const auto & n_head_kv_iter = model.hparams.n_head_kv_arr.begin();
|
||||
// attention layers have a non-zero number of kv heads
|
||||
|
@ -758,6 +759,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
|||
quantize &= name.find("time_mix_w2.weight") == std::string::npos;
|
||||
quantize &= name.find("time_mix_decay_w1.weight") == std::string::npos;
|
||||
quantize &= name.find("time_mix_decay_w2.weight") == std::string::npos;
|
||||
quantize &= name.find("time_mix_lerp_fused.weight") == std::string::npos;
|
||||
|
||||
// do not quantize relative position bias (T5)
|
||||
quantize &= name.find("attn_rel_b.weight") == std::string::npos;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue