spec : add ngram-mod (#19164)

* spec : add ngram-mod

* cont : simplify + keep track of occupancy

* cont : cleanup

* cont : move initialization to common/speculative

* cont : cleanup

* cont : cleanup

* cont : fix
This commit is contained in:
Georgi Gerganov 2026-01-30 18:21:48 +02:00 committed by GitHub
parent 2e916f996a
commit dabaa2e77a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 292 additions and 29 deletions

View file

@ -705,6 +705,11 @@ private:
params_base.n_cache_reuse = 0;
SRV_WRN("%s\n", "cache_reuse is not supported by multimodal, it will be disabled");
}
if (params_base.speculative.type != COMMON_SPECULATIVE_TYPE_NONE) {
params_base.speculative.type = COMMON_SPECULATIVE_TYPE_NONE;
SRV_WRN("%s\n", "speculative decoding is not supported by multimodal, it will be disabled");
}
}
if (!llama_memory_can_shift(llama_get_memory(ctx))) {
@ -754,9 +759,9 @@ private:
SRV_ERR("%s\n", "speculative decoding is not supported with multimodal");
return false;
}
SRV_WRN("%s", "speculative decoding context initialized\n");
SLT_INF(slot, "%s", "speculative decoding context initialized\n");
} else {
SRV_WRN("%s", "speculative decoding context not initialized\n");
SLT_INF(slot, "%s", "speculative decoding context not initialized\n");
}
}