mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-14 02:49:41 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # README.md # examples/parallel/parallel.cpp # ggml/src/CMakeLists.txt # ggml/src/ggml-blas/CMakeLists.txt # ggml/src/ggml-sycl/CMakeLists.txt # ggml/src/gguf.cpp # scripts/sync-ggml.last # tests/test-gguf.cpp
This commit is contained in:
commit
b42b618897
8 changed files with 89 additions and 37 deletions
|
@ -2319,9 +2319,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|||
|
||||
std::string model_name;
|
||||
std::string tokenizer_pre;
|
||||
std::string general_arch;
|
||||
|
||||
ml.get_key(LLM_KV_GENERAL_NAME, model_name, false);
|
||||
ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
|
||||
ml.get_key(LLM_KV_GENERAL_ARCHITECTURE, general_arch, false);
|
||||
|
||||
// model name to lowercase
|
||||
std::transform(model_name.begin(), model_name.end(), model_name.begin(),
|
||||
|
@ -2330,8 +2332,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|||
}
|
||||
);
|
||||
|
||||
// set attributes by model/tokenizer name
|
||||
if (_contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})) {
|
||||
// set attributes by model/tokenizer/architecture name
|
||||
if (false
|
||||
|| _contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})
|
||||
|| _contains_any(general_arch, {"nomic-bert-moe"})
|
||||
) {
|
||||
_set_token_attr("<mask>", LLAMA_TOKEN_ATTR_LSTRIP, true);
|
||||
} else if (_contains_any(model_name, {"phi-3", "phi3"})) {
|
||||
for (auto id : cache_special_tokens) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue