mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
Merge commit '1c641e6aac
' into concedo_experimental
# Conflicts: # .devops/cloud-v-pipeline # .devops/llama-cli-cuda.Dockerfile # .devops/llama-cli-rocm.Dockerfile # .devops/llama-cli-vulkan.Dockerfile # .devops/llama-cli.Dockerfile # .devops/llama-cpp-clblast.srpm.spec # .devops/llama-cpp-cuda.srpm.spec # .devops/llama-cpp.srpm.spec # .devops/llama-server-cuda.Dockerfile # .devops/llama-server-rocm.Dockerfile # .devops/llama-server-vulkan.Dockerfile # .devops/llama-server.Dockerfile # .devops/nix/apps.nix # .devops/nix/package.nix # .devops/tools.sh # .dockerignore # .github/ISSUE_TEMPLATE/01-bug-low.yml # .github/ISSUE_TEMPLATE/02-bug-medium.yml # .github/ISSUE_TEMPLATE/03-bug-high.yml # .github/ISSUE_TEMPLATE/04-bug-critical.yml # .github/workflows/bench.yml # .github/workflows/build.yml # .github/workflows/docker.yml # .github/workflows/server.yml # .gitignore # Makefile # README-sycl.md # README.md # ci/run.sh # docs/token_generation_performance_tips.md # flake.nix # grammars/README.md # pocs/vdot/CMakeLists.txt # scripts/get-hellaswag.sh # scripts/get-wikitext-103.sh # scripts/get-wikitext-2.sh # scripts/get-winogrande.sh # scripts/hf.sh # scripts/pod-llama.sh # scripts/qnt-all.sh # scripts/run-all-ppl.sh # scripts/run-with-preset.py # scripts/server-llm.sh # tests/test-backend-ops.cpp
This commit is contained in:
commit
b53e760557
94 changed files with 457 additions and 317 deletions
|
@ -148,7 +148,7 @@ struct server_slot {
|
|||
int32_t n_prompt_tokens = 0;
|
||||
int32_t n_prompt_tokens_processed = 0;
|
||||
|
||||
std::string prompt;
|
||||
json prompt; // can be either a string, array of strings or array of token ids
|
||||
|
||||
// when a task is submitted, we first tokenize the prompt and store it here
|
||||
std::vector<llama_token> prompt_tokens;
|
||||
|
@ -823,8 +823,13 @@ struct server_context {
|
|||
continue;
|
||||
}
|
||||
|
||||
// skip the slot if it does not contains prompt
|
||||
if (!slot.prompt.is_string()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// current slot's prompt
|
||||
std::string slot_prompt = slot.prompt;
|
||||
std::string slot_prompt = slot.prompt.get<std::string>();
|
||||
|
||||
// length of the current slot's prompt
|
||||
int slot_prompt_len = slot_prompt.size();
|
||||
|
@ -958,12 +963,12 @@ struct server_context {
|
|||
return false;
|
||||
}
|
||||
|
||||
if (prompt->is_string()) {
|
||||
slot.prompt = prompt->get<std::string>();
|
||||
} else if (prompt->is_array() && prompt->size() == 1 && prompt->at(0).is_string()) {
|
||||
slot.prompt = prompt->at(0).get<std::string>();
|
||||
if ((prompt->is_string()) ||
|
||||
(prompt->is_array() && prompt->size() == 1 && prompt->at(0).is_string()) ||
|
||||
(prompt->is_array() && !prompt->empty() && prompt->at(0).is_number_integer())) {
|
||||
slot.prompt = *prompt;
|
||||
} else {
|
||||
send_error(task, "\"prompt\" must be a string or an array of strings", ERROR_TYPE_INVALID_REQUEST);
|
||||
send_error(task, "\"prompt\" must be a string or an array of integers", ERROR_TYPE_INVALID_REQUEST);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue