Merge branch 'upstream' into concedo_experimental

# Conflicts: # common/sampling.h # llama.h # tests/test-chat-template.cpp
2025-09-10 17:14:36 +00:00 · 2024-04-24 21:29:07 +08:00 · 2024-04-24 21:29:07 +08:00 · a681cdd9ef
commit a681cdd9ef
parent 15ed96c25a 3fe847b574
20 changed files with 788 additions and 355 deletions
--- a/common/common.h
+++ b/common/common.h
@ -253,11 +253,12 @@ std::vector<llama_token> llama_tokenize(
                        bool   add_special,
                        bool   parse_special = false);

-// tokenizes a token into a piece
+// tokenizes a token into a piece, optionally renders special/control tokens
 // should work similar to Python's `tokenizer.id_to_piece`
 std::string llama_token_to_piece(
        const struct llama_context * ctx,
-                       llama_token   token);
+                       llama_token   token,
+                       bool          special = true);

 // TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
 //       that takes into account the tokenizer type and decides how to handle the leading space