mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-11 04:51:25 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # examples/run/run.cpp # scripts/sync-ggml.last
This commit is contained in:
commit
27b9358baf
12 changed files with 181 additions and 135 deletions
|
|
@ -626,7 +626,14 @@ std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8) {
|
|||
result.reserve(utf8.size());
|
||||
size_t offset = 0;
|
||||
while (offset < utf8.size()) {
|
||||
result.push_back(unicode_cpt_from_utf8(utf8, offset));
|
||||
try {
|
||||
result.push_back(unicode_cpt_from_utf8(utf8, offset));
|
||||
}
|
||||
catch (const std::invalid_argument & /*ex*/) {
|
||||
// Silently ignore invalid UTF-8 input to avoid leaking the exception beyond llama_tokenize
|
||||
++offset;
|
||||
result.emplace_back(0xFFFD); // replacement character
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue