mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-14 19:09:45 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .gitignore # README.md # docs/backend/BLIS.md # docs/backend/SYCL.md # docs/development/llama-star/idea-arch.key # docs/development/llama-star/idea-arch.pdf # docs/development/token_generation_performance_tips.md # src/llama.cpp # tests/test-tokenizer-0.cpp # tests/test-tokenizer-1-bpe.cpp # tests/test-tokenizer-1-spm.cpp # tests/test-tokenizer-random.py
This commit is contained in:
commit
8e5fd6f509
28 changed files with 352 additions and 2091 deletions
|
@ -906,6 +906,7 @@ extern "C" {
|
|||
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
||||
/// @return Returns the number of tokens on success, no more than n_tokens_max
|
||||
/// @return Returns a negative number on failure - the number of tokens that would have been returned
|
||||
/// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
|
||||
/// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
|
||||
/// as plaintext. Does not insert a leading space.
|
||||
LLAMA_API int32_t llama_tokenize(
|
||||
|
@ -920,15 +921,31 @@ extern "C" {
|
|||
// Token Id -> Piece.
|
||||
// Uses the vocabulary in the provided context.
|
||||
// Does not write null terminator to the buffer.
|
||||
// User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
|
||||
// User can skip up to 'lstrip' leading spaces before copying (useful when encoding/decoding multiple tokens with 'add_space_prefix')
|
||||
// @param special If true, special tokens are rendered in the output.
|
||||
LLAMA_API int32_t llama_token_to_piece(
|
||||
const struct llama_model * model,
|
||||
llama_token token,
|
||||
char * buf,
|
||||
int32_t length,
|
||||
int32_t lstrip,
|
||||
bool special);
|
||||
|
||||
/// @details Convert the provided tokens into text (inverse of llama_tokenize()).
|
||||
/// @param text The char pointer must be large enough to hold the resulting text.
|
||||
/// @return Returns the number of chars/bytes on success, no more than text_len_max.
|
||||
/// @return Returns a negative number on failure - the number of chars/bytes that would have been returned.
|
||||
/// @param remove_special Allow to remove BOS and EOS tokens if model is configured to do so.
|
||||
/// @param unparse_special If true, special tokens are rendered in the output.
|
||||
LLAMA_API int32_t llama_detokenize(
|
||||
const struct llama_model * model,
|
||||
const llama_token * tokens,
|
||||
int32_t n_tokens,
|
||||
char * text,
|
||||
int32_t text_len_max,
|
||||
bool remove_special,
|
||||
bool unparse_special);
|
||||
|
||||
/// Apply chat template. Inspired by hf apply_chat_template() on python.
|
||||
/// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model"
|
||||
/// NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue