mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-06 14:39:03 +00:00
add endpoint /v1/cancel
This commit is contained in:
parent
c8af1be27e
commit
fbf853341b
2 changed files with 43 additions and 1 deletions
10
README.md
10
README.md
|
@ -364,6 +364,16 @@ Not yet. Now prima.cpp supports only CUDA-based GPUs. Vulkan is in our roadmap,
|
||||||
|
|
||||||
No worries, this is expected. Prima.cpp found that this device was too slow, and dropping it could speed up inference, so it was removed.
|
No worries, this is expected. Prima.cpp found that this device was too slow, and dropping it could speed up inference, so it was removed.
|
||||||
|
|
||||||
|
**8. How to cancel a running task?**
|
||||||
|
|
||||||
|
Besides closing the HTTP/SSE connection, prima.cpp offers a handy `/v1/cancel` endpoint to cancel a running task by its `task_id`.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl -X POST http://localhost:8080/v1/cancel \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"task_id": 0}'
|
||||||
|
```
|
||||||
|
|
||||||
## ❤️ Acknowledgment
|
## ❤️ Acknowledgment
|
||||||
This project builds upon the incredible work from the open-source community, especially [ggml, gguf](https://github.com/ggml-org/ggml), and [llama.cpp](https://github.com/ggml-org/llama.cpp). We gratefully acknowledge their contributions.
|
This project builds upon the incredible work from the open-source community, especially [ggml, gguf](https://github.com/ggml-org/ggml), and [llama.cpp](https://github.com/ggml-org/llama.cpp). We gratefully acknowledge their contributions.
|
||||||
|
|
||||||
|
|
|
@ -1533,10 +1533,21 @@ struct server_context {
|
||||||
for (const auto & id_task : id_tasks) {
|
for (const auto & id_task : id_tasks) {
|
||||||
SRV_WRN("cancel task, id_task = %d\n", id_task);
|
SRV_WRN("cancel task, id_task = %d\n", id_task);
|
||||||
|
|
||||||
|
// create a cancel task for id_task
|
||||||
server_task task;
|
server_task task;
|
||||||
task.type = SERVER_TASK_TYPE_CANCEL;
|
task.type = SERVER_TASK_TYPE_CANCEL;
|
||||||
task.id_target = id_task;
|
task.id_target = id_task;
|
||||||
cancel_tasks.push_back(task);
|
cancel_tasks.push_back(task);
|
||||||
|
|
||||||
|
// notify the results queue that the task is cancelled
|
||||||
|
server_task_result cancel_res;
|
||||||
|
cancel_res.id = id_task;
|
||||||
|
cancel_res.stop = true;
|
||||||
|
cancel_res.error = false;
|
||||||
|
cancel_res.data = {{"cancelled", true}};
|
||||||
|
queue_results.send(cancel_res);
|
||||||
|
|
||||||
|
// remove the task from the waiting queue
|
||||||
queue_results.remove_waiting_task_id(id_task);
|
queue_results.remove_waiting_task_id(id_task);
|
||||||
}
|
}
|
||||||
// push to beginning of the queue, so it has highest priority
|
// push to beginning of the queue, so it has highest priority
|
||||||
|
@ -2632,6 +2643,25 @@ int main(int argc, char ** argv) {
|
||||||
res_ok(res, health);
|
res_ok(res, health);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const auto handle_cancel_tasks = [&](const httplib::Request & req, httplib::Response & res) {
|
||||||
|
json request_data = json::parse(req.body);
|
||||||
|
if (!request_data.contains("task_id") || !request_data["task_id"].is_number_integer()) {
|
||||||
|
res.status = 400;
|
||||||
|
res_error(res, format_error_response(
|
||||||
|
"Invalid request: 'task_id' field is required and must be integer",
|
||||||
|
ERROR_TYPE_INVALID_REQUEST
|
||||||
|
));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int task_id = request_data["task_id"].get<int>();
|
||||||
|
ctx_server.cancel_tasks({task_id});
|
||||||
|
json reply = {
|
||||||
|
{"task_id", task_id},
|
||||||
|
{"status", "cancelled"}
|
||||||
|
};
|
||||||
|
res_ok(res, reply);
|
||||||
|
};
|
||||||
|
|
||||||
const auto handle_slots = [&](const httplib::Request & req, httplib::Response & res) {
|
const auto handle_slots = [&](const httplib::Request & req, httplib::Response & res) {
|
||||||
if (!params.endpoint_slots) {
|
if (!params.endpoint_slots) {
|
||||||
res_error(res, format_error_response("This server does not support slots endpoint. Start it without `--no-slots`", ERROR_TYPE_NOT_SUPPORTED));
|
res_error(res, format_error_response("This server does not support slots endpoint. Start it without `--no-slots`", ERROR_TYPE_NOT_SUPPORTED));
|
||||||
|
@ -3324,6 +3354,8 @@ int main(int argc, char ** argv) {
|
||||||
// Save & load slots
|
// Save & load slots
|
||||||
svr->Get ("/slots", handle_slots);
|
svr->Get ("/slots", handle_slots);
|
||||||
svr->Post("/slots/:id_slot", handle_slots_action);
|
svr->Post("/slots/:id_slot", handle_slots_action);
|
||||||
|
// Stop tasks
|
||||||
|
svr->Post("/v1/cancel", handle_cancel_tasks);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Start the server
|
// Start the server
|
||||||
|
|
Loading…
Add table
Reference in a new issue