diff --git a/build.gradle.kts b/build.gradle.kts index 2c5b03bd..40d972c8 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -58,7 +58,7 @@ dependencies { } implementation("org.jsoup:jsoup:1.17.2") implementation("org.apache.commons:commons-text:1.11.0") - implementation("com.knuddels:jtokkit:0.6.1") + implementation("com.knuddels:jtokkit:1.0.0") testImplementation("org.assertj:assertj-core:3.25.3") testImplementation("org.awaitility:awaitility:4.2.0") diff --git a/src/main/java/ee/carlrobert/codegpt/EncodingManager.java b/src/main/java/ee/carlrobert/codegpt/EncodingManager.java index d4790dca..8e3962d5 100644 --- a/src/main/java/ee/carlrobert/codegpt/EncodingManager.java +++ b/src/main/java/ee/carlrobert/codegpt/EncodingManager.java @@ -7,6 +7,7 @@ import com.knuddels.jtokkit.Encodings; import com.knuddels.jtokkit.api.Encoding; import com.knuddels.jtokkit.api.EncodingRegistry; import com.knuddels.jtokkit.api.EncodingType; +import com.knuddels.jtokkit.api.IntArrayList; import ee.carlrobert.codegpt.conversations.Conversation; import ee.carlrobert.llm.client.openai.completion.request.OpenAIChatCompletionMessage; import java.util.List; @@ -57,15 +58,25 @@ public final class EncodingManager { /** * Truncates the given text to the given number of tokens. * - * @param text The text to truncate. + * @param text The text to truncate. * @param maxTokens The maximum number of tokens to keep. * @param fromStart Whether to truncate from the start or the end of the text. * @return The truncated text. */ public String truncateText(String text, int maxTokens, boolean fromStart) { - List tokens = encoding.encode(text); + var tokens = encoding.encode(text); int tokensToRetrieve = Math.min(maxTokens, tokens.size()); int startIndex = fromStart ? 0 : tokens.size() - tokensToRetrieve; - return encoding.decode(tokens.subList(startIndex, startIndex + tokensToRetrieve)); + var truncatedList = + tokens.boxed().subList(startIndex, startIndex + tokensToRetrieve); + return encoding.decode(convertToIntArrayList(truncatedList)); + } + + private IntArrayList convertToIntArrayList(List tokens) { + var result = new IntArrayList(tokens.size()); + for (var integer : tokens) { + result.add(integer); + } + return result; } }