fix seq_id mismatch between head and worker devices

This commit is contained in:
Li, Zonghang 2025-06-11 17:10:21 +04:00
parent fb9b1f2b00
commit 3e6d831930
3 changed files with 33 additions and 33 deletions

View file

@ -957,7 +957,8 @@ extern "C" {
// < 0 - error
LLAMA_API int32_t llama_decode(
struct llama_context * ctx,
struct llama_batch batch);
struct llama_batch batch,
bool server_mode = false);
// Set the number of threads used for decoding
// n_threads is the number of threads used for generation (single token)