mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-05 23:49:04 +00:00
add endpoint /v1/cancel
This commit is contained in:
parent
c8af1be27e
commit
fbf853341b
2 changed files with 43 additions and 1 deletions
10
README.md
10
README.md
|
@ -364,6 +364,16 @@ Not yet. Now prima.cpp supports only CUDA-based GPUs. Vulkan is in our roadmap,
|
|||
|
||||
No worries, this is expected. Prima.cpp found that this device was too slow, and dropping it could speed up inference, so it was removed.
|
||||
|
||||
**8. How to cancel a running task?**
|
||||
|
||||
Besides closing the HTTP/SSE connection, prima.cpp offers a handy `/v1/cancel` endpoint to cancel a running task by its `task_id`.
|
||||
|
||||
```shell
|
||||
curl -X POST http://localhost:8080/v1/cancel \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"task_id": 0}'
|
||||
```
|
||||
|
||||
## ❤️ Acknowledgment
|
||||
This project builds upon the incredible work from the open-source community, especially [ggml, gguf](https://github.com/ggml-org/ggml), and [llama.cpp](https://github.com/ggml-org/llama.cpp). We gratefully acknowledge their contributions.
|
||||
|
||||
|
|
|
@ -1532,11 +1532,22 @@ struct server_context {
|
|||
cancel_tasks.reserve(id_tasks.size());
|
||||
for (const auto & id_task : id_tasks) {
|
||||
SRV_WRN("cancel task, id_task = %d\n", id_task);
|
||||
|
||||
|
||||
// create a cancel task for id_task
|
||||
server_task task;
|
||||
task.type = SERVER_TASK_TYPE_CANCEL;
|
||||
task.id_target = id_task;
|
||||
cancel_tasks.push_back(task);
|
||||
|
||||
// notify the results queue that the task is cancelled
|
||||
server_task_result cancel_res;
|
||||
cancel_res.id = id_task;
|
||||
cancel_res.stop = true;
|
||||
cancel_res.error = false;
|
||||
cancel_res.data = {{"cancelled", true}};
|
||||
queue_results.send(cancel_res);
|
||||
|
||||
// remove the task from the waiting queue
|
||||
queue_results.remove_waiting_task_id(id_task);
|
||||
}
|
||||
// push to beginning of the queue, so it has highest priority
|
||||
|
@ -2632,6 +2643,25 @@ int main(int argc, char ** argv) {
|
|||
res_ok(res, health);
|
||||
};
|
||||
|
||||
const auto handle_cancel_tasks = [&](const httplib::Request & req, httplib::Response & res) {
|
||||
json request_data = json::parse(req.body);
|
||||
if (!request_data.contains("task_id") || !request_data["task_id"].is_number_integer()) {
|
||||
res.status = 400;
|
||||
res_error(res, format_error_response(
|
||||
"Invalid request: 'task_id' field is required and must be integer",
|
||||
ERROR_TYPE_INVALID_REQUEST
|
||||
));
|
||||
return;
|
||||
}
|
||||
int task_id = request_data["task_id"].get<int>();
|
||||
ctx_server.cancel_tasks({task_id});
|
||||
json reply = {
|
||||
{"task_id", task_id},
|
||||
{"status", "cancelled"}
|
||||
};
|
||||
res_ok(res, reply);
|
||||
};
|
||||
|
||||
const auto handle_slots = [&](const httplib::Request & req, httplib::Response & res) {
|
||||
if (!params.endpoint_slots) {
|
||||
res_error(res, format_error_response("This server does not support slots endpoint. Start it without `--no-slots`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
|
@ -3324,6 +3354,8 @@ int main(int argc, char ** argv) {
|
|||
// Save & load slots
|
||||
svr->Get ("/slots", handle_slots);
|
||||
svr->Post("/slots/:id_slot", handle_slots_action);
|
||||
// Stop tasks
|
||||
svr->Post("/v1/cancel", handle_cancel_tasks);
|
||||
|
||||
//
|
||||
// Start the server
|
||||
|
|
Loading…
Add table
Reference in a new issue