diff --git a/Makefile b/Makefile index 7d1361087..d7f3b18ec 100644 --- a/Makefile +++ b/Makefile @@ -572,7 +572,7 @@ clean: rm -vrf ggml/src/ggml-cuda/template-instances/*.o # useful tools -main: examples/main/main.cpp build-info.h ggml.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FULL) $(OBJS) +main: examples/main/main.cpp common/json-schema-to-grammar.cpp common/arg.cpp build-info.h ggml.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FULL) $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) @echo '==== Run ./main -h for help. ====' sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c build-info.h ggml.o llama.o console.o ggml-backend_default.o $(OBJS_FULL) $(OBJS) diff --git a/common/arg.cpp b/common/arg.cpp index 922391069..732e1a853 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2,6 +2,7 @@ #include "log.h" #include "sampling.h" +#include "build-info.h" #include #include diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 54d51bdc2..8ceec393d 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -117,6 +117,11 @@ static void sigint_handler(int signo) { LOG("\n"); gpt_perf_print(*g_ctx, *g_smpl); write_logfile(*g_ctx, *g_params, *g_model, *g_input_tokens, g_output_ss->str(), *g_output_tokens); + + // make sure all logs are flushed + LOG("Interrupted by user\n"); + gpt_log_pause(gpt_log_main()); + _exit(130); } } diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 7eeab43a4..e07cc65fa 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -532,26 +532,38 @@ struct server_response { // add the id_task to the list of tasks waiting for response void add_waiting_task_id(int id_task) { - SRV_DBG("waiting for task id = %d\n", id_task); + SRV_DBG("add task %d to waiting list. current waiting = %d (before add)\n", id_task, (int) waiting_task_ids.size()); std::unique_lock lock(mutex_results); waiting_task_ids.insert(id_task); } void add_waiting_tasks(const std::vector & tasks) { - for (const auto & t : tasks) { - add_waiting_task_id(t.id); + std::unique_lock lock(mutex_results); + + for (const auto & task : tasks) { + SRV_DBG("add task %d to waiting list. current waiting = %d (before add)\n", task.id, (int) waiting_task_ids.size()); + waiting_task_ids.insert(task.id); } } // when the request is finished, we can remove task associated with it void remove_waiting_task_id(int id_task) { - SRV_DBG("task id = %d is done\n", id_task); + SRV_DBG("remove task %d from waiting list. current waiting = %d (before remove)\n", id_task, (int) waiting_task_ids.size()); std::unique_lock lock(mutex_results); waiting_task_ids.erase(id_task); } + void remove_waiting_task_ids(const std::unordered_set & id_tasks) { + std::unique_lock lock(mutex_results); + + for (const auto & id_task : id_tasks) { + SRV_DBG("remove task %d from waiting list. current waiting = %d (before remove)\n", id_task, (int) waiting_task_ids.size()); + waiting_task_ids.erase(id_task); + } + } + // This function blocks the thread until there is a response for one of the id_tasks server_task_result recv(const std::unordered_set & id_tasks) { while (true) { @@ -2775,6 +2787,8 @@ int main(int argc, char ** argv) { }, [&](const json & error_data) { res_error(res, error_data); }); + + ctx_server.queue_results.remove_waiting_task_ids(task_ids); } else { const auto chunked_content_provider = [task_ids, &ctx_server](size_t, httplib::DataSink & sink) { ctx_server.receive_cmpl_results_stream(task_ids, [&](const server_task_result & result) -> bool { @@ -2785,7 +2799,12 @@ int main(int argc, char ** argv) { sink.done(); return false; }; - res.set_chunked_content_provider("text/event-stream", chunked_content_provider); + + auto on_complete = [task_ids, &ctx_server] (bool) { + ctx_server.queue_results.remove_waiting_task_ids(task_ids); + }; + + res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete); } }; @@ -2824,6 +2843,8 @@ int main(int argc, char ** argv) { }, [&](const json & error_data) { res_error(res, error_data); }); + + ctx_server.queue_results.remove_waiting_task_ids(task_ids); } else { const auto chunked_content_provider = [task_ids, &ctx_server, completion_id](size_t, httplib::DataSink & sink) { ctx_server.receive_cmpl_results_stream(task_ids, [&](const server_task_result & result) -> bool { @@ -2845,7 +2866,12 @@ int main(int argc, char ** argv) { sink.done(); return true; }; - res.set_chunked_content_provider("text/event-stream", chunked_content_provider); + + auto on_complete = [task_ids, &ctx_server] (bool) { + ctx_server.queue_results.remove_waiting_task_ids(task_ids); + }; + + res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete); } }; @@ -2954,6 +2980,8 @@ int main(int argc, char ** argv) { res_error(res, error_data); error = true; }); + + ctx_server.queue_results.remove_waiting_task_ids(task_ids); } if (error) {