mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
Merge branch 'master' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # .gitignore # CMakeLists.txt # Makefile # README.md # ci/run.sh # flake.lock # flake.nix # ggml-cuda.cu # ggml-cuda.h # scripts/get-wikitext-2.sh # tests/CMakeLists.txt
This commit is contained in:
commit
1cb8a5e955
79 changed files with 6273 additions and 2982 deletions
|
@ -1559,6 +1559,7 @@ struct llama_server_context
|
|||
void process_tasks()
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||
std::vector<task_server> deferred_tasks;
|
||||
while (!queue_tasks.empty())
|
||||
{
|
||||
task_server task = queue_tasks.front();
|
||||
|
@ -1569,9 +1570,8 @@ struct llama_server_context
|
|||
llama_client_slot *slot = get_slot(json_value(task.data, "slot_id", -1));
|
||||
if (slot == nullptr)
|
||||
{
|
||||
LOG_TEE("slot unavailable\n");
|
||||
// send error result
|
||||
send_error(task, "slot unavailable");
|
||||
// if no slot is available, we defer this task for processing later
|
||||
deferred_tasks.push_back(task);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1617,6 +1617,12 @@ struct llama_server_context
|
|||
}
|
||||
}
|
||||
|
||||
// add all the deferred tasks back the the queue
|
||||
for (task_server &task : deferred_tasks)
|
||||
{
|
||||
queue_tasks.push_back(task);
|
||||
}
|
||||
|
||||
// remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue
|
||||
std::vector<task_result> agg_results;
|
||||
auto queue_iterator = queue_multitasks.begin();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue