feat: add a primitive form of continuous batching (#2167)

* feat: add a primitive form of continuous batching

* fix: deadlock in batching fallback

* fix: windows build

* chore: suppress the contbatch arg from --help

* feat: batch-aware rep_pen_slope

* fix: automatically disable shifting when batching is enabled

* fix: mixed-path state corruption

* fix: attempt to fully separate the two pipelines

* added a semaphore to prevent non-batchable requests from starting while batched requests are running

---------

Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com>
This commit is contained in:
AlpinDale 2026-05-10 14:20:31 +04:30 committed by GitHub
parent a47037637c
commit c03302b670
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 924 additions and 16 deletions

View file

@ -258,6 +258,33 @@ extern "C"
bool has_finished() {
return generation_finished;
}
bool batch_generate_enabled() {
return gpttype_batch_generate_enabled();
}
int batch_generate_submit(const generation_inputs inputs) {
return gpttype_batch_generate_submit(inputs);
}
bool batch_generate_has_finished(int request_id) {
return gpttype_batch_generate_has_finished(request_id);
}
int batch_generate_stream_count(int request_id) {
return gpttype_batch_generate_stream_count(request_id);
}
const char * batch_generate_new_token(int request_id, int idx) {
return gpttype_batch_generate_new_token(request_id, idx);
}
const char * batch_generate_pending_output(int request_id) {
return gpttype_batch_generate_pending_output(request_id);
}
generation_outputs batch_generate_result(int request_id) {
return gpttype_batch_generate_result(request_id);
}
bool batch_generate_abort(int request_id) {
return gpttype_batch_generate_abort(request_id);
}
void batch_generate_release(int request_id) {
gpttype_batch_generate_release(request_id);
}
bool has_audio_support()
{
return audio_multimodal_supported;