mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-15 03:19:41 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/nix/package.nix # .devops/tools.sh # .github/workflows/build.yml # Makefile # README.md # common/CMakeLists.txt # common/common.h # examples/llava/CMakeLists.txt # examples/run/CMakeLists.txt # examples/run/README.md # examples/run/run.cpp # ggml/CMakeLists.txt # ggml/src/CMakeLists.txt # ggml/src/ggml-kompute/ggml-kompute.cpp # tests/test-backend-ops.cpp # tests/test-rope.cpp
This commit is contained in:
commit
f456ed7237
38 changed files with 10752 additions and 105 deletions
|
@ -459,7 +459,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||
int32_t n_decoded;
|
||||
int32_t n_prompt_tokens;
|
||||
int32_t n_tokens_cached;
|
||||
int32_t has_new_line;
|
||||
bool has_new_line;
|
||||
std::string stopping_word;
|
||||
stop_type stop = STOP_TYPE_NONE;
|
||||
|
||||
|
@ -1079,9 +1079,9 @@ struct server_slot {
|
|||
|
||||
SLT_INF(*this,
|
||||
"\n"
|
||||
"\rprompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n"
|
||||
"\r eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n"
|
||||
"\r total time = %10.2f ms / %5d tokens\n",
|
||||
"prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n"
|
||||
" eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n"
|
||||
" total time = %10.2f ms / %5d tokens\n",
|
||||
t_prompt_processing, n_prompt_tokens_processed, t_prompt, n_prompt_second,
|
||||
t_token_generation, n_decoded, t_gen, n_gen_second,
|
||||
t_prompt_processing + t_token_generation, n_prompt_tokens_processed + n_decoded);
|
||||
|
|
|
@ -25,6 +25,7 @@ def test_completion(prompt: str, n_predict: int, re_content: str, n_prompt: int,
|
|||
assert res.body["timings"]["prompt_n"] == n_prompt
|
||||
assert res.body["timings"]["predicted_n"] == n_predicted
|
||||
assert res.body["truncated"] == truncated
|
||||
assert type(res.body["has_new_line"]) == bool
|
||||
assert match_regex(re_content, res.body["content"])
|
||||
|
||||
|
||||
|
@ -48,6 +49,7 @@ def test_completion_stream(prompt: str, n_predict: int, re_content: str, n_promp
|
|||
assert data["timings"]["predicted_n"] == n_predicted
|
||||
assert data["truncated"] == truncated
|
||||
assert data["stop_type"] == "limit"
|
||||
assert type(data["has_new_line"]) == bool
|
||||
assert "generation_settings" in data
|
||||
assert server.n_predict is not None
|
||||
assert data["generation_settings"]["n_predict"] == min(n_predict, server.n_predict)
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo-0613"
|
||||
#define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo"
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue