mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-14 10:59:41 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # common/common.cpp # examples/batched-bench/batched-bench.cpp # examples/batched/batched.cpp # examples/export-lora/export-lora.cpp # examples/gritlm/gritlm.cpp # examples/parallel/parallel.cpp # examples/passkey/passkey.cpp # examples/speculative-simple/speculative-simple.cpp # examples/speculative/speculative.cpp # ggml/src/ggml-cann/CMakeLists.txt # ggml/src/ggml-cann/acl_tensor.cpp # ggml/src/ggml-cann/acl_tensor.h # ggml/src/ggml-cann/aclnn_ops.cpp # ggml/src/ggml-cann/aclnn_ops.h # ggml/src/ggml-vulkan/CMakeLists.txt # tests/test-arg-parser.cpp # tests/test-backend-ops.cpp
This commit is contained in:
commit
103d60ed2c
43 changed files with 1509 additions and 1129 deletions
|
@ -636,7 +636,8 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|||
regex_exprs = {
|
||||
// original regex from tokenizer.json
|
||||
// "'(?i:[sdmt]|ll|ve|re)|[^\\r\\n\\p{L}\\p{N}]?+\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]++[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+"
|
||||
"'(?:[sSdDmMtT]|[lL][lL]|[vV][eE]|[rR][eE])|[^\\r\\n\\p{L}\\p{N}]?+\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]++[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+",
|
||||
// FIXME? Changed possessive quantifiers (?+ and ++) to greedy to avoid errors and imatrix hanging (tried atomic grouping but it's not supported?)
|
||||
"'(?:[sSdDmMtT]|[lL][lL]|[vV][eE]|[rR][eE])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+",
|
||||
};
|
||||
break;
|
||||
default:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue