fix: Catch AssertionError in countTokens() (#445)

This commit is contained in:
Rene Leonhardt 2024-04-10 13:56:35 +02:00 committed by GitHub
parent 7d89650062
commit 6fb0b8d30c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -59,9 +59,10 @@ public final class EncodingManager {
public int countTokens(String text) {
try {
// #444: Cl100kParser.split() throws AssertionError "Input is not UTF-8: "
return encoding.countTokens(text);
} catch (Exception ex) {
LOG.warn(ex);
} catch (Exception | Error ex) {
LOG.warn("Could not count tokens for: " + text, ex);
return 0;
}
}