Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	examples/imatrix/imatrix.cpp
#	examples/infill/infill.cpp
#	examples/perplexity/perplexity.cpp
This commit is contained in:
Concedo 2024-09-20 18:03:45 +08:00
commit 55a249d222
4 changed files with 41 additions and 7 deletions

View file

@ -572,7 +572,7 @@ clean:
rm -vrf ggml/src/ggml-cuda/template-instances/*.o rm -vrf ggml/src/ggml-cuda/template-instances/*.o
# useful tools # useful tools
main: examples/main/main.cpp build-info.h ggml.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FULL) $(OBJS) main: examples/main/main.cpp common/json-schema-to-grammar.cpp common/arg.cpp build-info.h ggml.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FULL) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
@echo '==== Run ./main -h for help. ====' @echo '==== Run ./main -h for help. ===='
sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c build-info.h ggml.o llama.o console.o ggml-backend_default.o $(OBJS_FULL) $(OBJS) sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c build-info.h ggml.o llama.o console.o ggml-backend_default.o $(OBJS_FULL) $(OBJS)

View file

@ -2,6 +2,7 @@
#include "log.h" #include "log.h"
#include "sampling.h" #include "sampling.h"
#include "build-info.h"
#include <algorithm> #include <algorithm>
#include <climits> #include <climits>

View file

@ -117,6 +117,11 @@ static void sigint_handler(int signo) {
LOG("\n"); LOG("\n");
gpt_perf_print(*g_ctx, *g_smpl); gpt_perf_print(*g_ctx, *g_smpl);
write_logfile(*g_ctx, *g_params, *g_model, *g_input_tokens, g_output_ss->str(), *g_output_tokens); write_logfile(*g_ctx, *g_params, *g_model, *g_input_tokens, g_output_ss->str(), *g_output_tokens);
// make sure all logs are flushed
LOG("Interrupted by user\n");
gpt_log_pause(gpt_log_main());
_exit(130); _exit(130);
} }
} }

View file

@ -532,26 +532,38 @@ struct server_response {
// add the id_task to the list of tasks waiting for response // add the id_task to the list of tasks waiting for response
void add_waiting_task_id(int id_task) { void add_waiting_task_id(int id_task) {
SRV_DBG("waiting for task id = %d\n", id_task); SRV_DBG("add task %d to waiting list. current waiting = %d (before add)\n", id_task, (int) waiting_task_ids.size());
std::unique_lock<std::mutex> lock(mutex_results); std::unique_lock<std::mutex> lock(mutex_results);
waiting_task_ids.insert(id_task); waiting_task_ids.insert(id_task);
} }
void add_waiting_tasks(const std::vector<server_task> & tasks) { void add_waiting_tasks(const std::vector<server_task> & tasks) {
for (const auto & t : tasks) { std::unique_lock<std::mutex> lock(mutex_results);
add_waiting_task_id(t.id);
for (const auto & task : tasks) {
SRV_DBG("add task %d to waiting list. current waiting = %d (before add)\n", task.id, (int) waiting_task_ids.size());
waiting_task_ids.insert(task.id);
} }
} }
// when the request is finished, we can remove task associated with it // when the request is finished, we can remove task associated with it
void remove_waiting_task_id(int id_task) { void remove_waiting_task_id(int id_task) {
SRV_DBG("task id = %d is done\n", id_task); SRV_DBG("remove task %d from waiting list. current waiting = %d (before remove)\n", id_task, (int) waiting_task_ids.size());
std::unique_lock<std::mutex> lock(mutex_results); std::unique_lock<std::mutex> lock(mutex_results);
waiting_task_ids.erase(id_task); waiting_task_ids.erase(id_task);
} }
void remove_waiting_task_ids(const std::unordered_set<int> & id_tasks) {
std::unique_lock<std::mutex> lock(mutex_results);
for (const auto & id_task : id_tasks) {
SRV_DBG("remove task %d from waiting list. current waiting = %d (before remove)\n", id_task, (int) waiting_task_ids.size());
waiting_task_ids.erase(id_task);
}
}
// This function blocks the thread until there is a response for one of the id_tasks // This function blocks the thread until there is a response for one of the id_tasks
server_task_result recv(const std::unordered_set<int> & id_tasks) { server_task_result recv(const std::unordered_set<int> & id_tasks) {
while (true) { while (true) {
@ -2775,6 +2787,8 @@ int main(int argc, char ** argv) {
}, [&](const json & error_data) { }, [&](const json & error_data) {
res_error(res, error_data); res_error(res, error_data);
}); });
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
} else { } else {
const auto chunked_content_provider = [task_ids, &ctx_server](size_t, httplib::DataSink & sink) { const auto chunked_content_provider = [task_ids, &ctx_server](size_t, httplib::DataSink & sink) {
ctx_server.receive_cmpl_results_stream(task_ids, [&](const server_task_result & result) -> bool { ctx_server.receive_cmpl_results_stream(task_ids, [&](const server_task_result & result) -> bool {
@ -2785,7 +2799,12 @@ int main(int argc, char ** argv) {
sink.done(); sink.done();
return false; return false;
}; };
res.set_chunked_content_provider("text/event-stream", chunked_content_provider);
auto on_complete = [task_ids, &ctx_server] (bool) {
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
};
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
} }
}; };
@ -2824,6 +2843,8 @@ int main(int argc, char ** argv) {
}, [&](const json & error_data) { }, [&](const json & error_data) {
res_error(res, error_data); res_error(res, error_data);
}); });
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
} else { } else {
const auto chunked_content_provider = [task_ids, &ctx_server, completion_id](size_t, httplib::DataSink & sink) { const auto chunked_content_provider = [task_ids, &ctx_server, completion_id](size_t, httplib::DataSink & sink) {
ctx_server.receive_cmpl_results_stream(task_ids, [&](const server_task_result & result) -> bool { ctx_server.receive_cmpl_results_stream(task_ids, [&](const server_task_result & result) -> bool {
@ -2845,7 +2866,12 @@ int main(int argc, char ** argv) {
sink.done(); sink.done();
return true; return true;
}; };
res.set_chunked_content_provider("text/event-stream", chunked_content_provider);
auto on_complete = [task_ids, &ctx_server] (bool) {
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
};
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
} }
}; };
@ -2954,6 +2980,8 @@ int main(int argc, char ** argv) {
res_error(res, error_data); res_error(res, error_data);
error = true; error = true;
}); });
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
} }
if (error) { if (error) {