Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	examples/run/run.cpp
#	scripts/sync-ggml.last
This commit is contained in:
Concedo 2025-02-08 01:31:49 +08:00
commit 27b9358baf
12 changed files with 181 additions and 135 deletions

View file

@ -626,7 +626,14 @@ std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8) {
result.reserve(utf8.size());
size_t offset = 0;
while (offset < utf8.size()) {
result.push_back(unicode_cpt_from_utf8(utf8, offset));
try {
result.push_back(unicode_cpt_from_utf8(utf8, offset));
}
catch (const std::invalid_argument & /*ex*/) {
// Silently ignore invalid UTF-8 input to avoid leaking the exception beyond llama_tokenize
++offset;
result.emplace_back(0xFFFD); // replacement character
}
}
return result;
}