unicode : avoid char32_t (#7957)

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-06-16 14:51:40 +03:00 committed by GitHub
parent 6fe1c62741
commit 52399254b3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 13 additions and 13 deletions

View file

@ -13246,7 +13246,7 @@ struct llm_tokenizer_wpm {
const std::vector<uint32_t> cpts_nfd = unicode_cpts_normalize_nfd(unicode_cpts_from_utf8(text));
std::vector<std::string> words(1, "");
for (const char32_t cpt : cpts_nfd) {
for (const uint32_t cpt : cpts_nfd) {
const auto flags = unicode_cpt_flags(cpt);
if (flags.is_whitespace) {