llama : infill sampling handle very long tokens (#9924)

* llama : infill sampling handle very long tokens

ggml-ci

* cont : better indices

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-10-17 22:32:47 +03:00 committed by GitHub
parent 3752217ed5
commit 99bd4ac28c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 35 additions and 43 deletions

View file

@ -21466,13 +21466,6 @@ int32_t llama_token_to_piece(
return llama_token_to_piece_impl(model->vocab, token, buf, length, lstrip, special);
}
bool llama_token_is_prefix(
const struct llama_model * model,
llama_token token0,
llama_token token1) {
return llama_token_is_prefix_impl(model->vocab, token0, token1);
}
int32_t llama_detokenize(
const struct llama_model * model,
const llama_token * tokens,