Merge branch 'master' into concedo_experimental

# Conflicts: # Makefile
2025-09-13 10:29:43 +00:00 · 2023-12-23 10:40:29 +08:00 · 2023-12-23 10:40:29 +08:00 · 4a8308b1c8
commit 4a8308b1c8
parent 8823e8b06d 7082d24cec
6 changed files with 252 additions and 1 deletions
--- a/common/common.h
+++ b/common/common.h
@ -45,7 +45,7 @@ struct gpt_params {
    int32_t n_ctx                           = 512;   // context size
    int32_t n_batch                         = 512;   // batch size for prompt processing (must be >=32 to use BLAS)
    int32_t n_keep                          = 0;     // number of tokens to keep from initial prompt
-    int32_t n_draft                         = 16;    // number of tokens to draft during speculative decoding
+    int32_t n_draft                         = 8;     // number of tokens to draft during speculative decoding
    int32_t n_chunks                        = -1;    // max number of chunks to process (-1 = unlimited)
    int32_t n_parallel                      = 1;     // number of parallel sequences to decode
    int32_t n_sequences                     = 1;     // number of sequences to decode
@ -248,3 +248,4 @@ void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size = 80);

 // Dump the KV cache view showing individual sequences in each cell (long output).
 void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
+