llama : infill sampling handle very long tokens (#9924)

* llama : infill sampling handle very long tokens ggml-ci * cont : better indices ggml-ci
2025-09-10 00:54:41 +00:00 · 2024-10-17 22:32:47 +03:00 · 2024-10-17 22:32:47 +03:00 · 99bd4ac28c
commit 99bd4ac28c
parent 3752217ed5
4 changed files with 35 additions and 43 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -21466,13 +21466,6 @@ int32_t llama_token_to_piece(
    return llama_token_to_piece_impl(model->vocab, token, buf, length, lstrip, special);
 }

-bool llama_token_is_prefix(
-    const struct llama_model * model,
-                 llama_token   token0,
-                 llama_token   token1) {
-    return llama_token_is_prefix_impl(model->vocab, token0, token1);
-}
-
 int32_t llama_detokenize(
    const struct llama_model * model,
           const llama_token * tokens,