mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge branch 'master' into concedo_experimental
# Conflicts: # .devops/nix/scope.nix # .github/workflows/nix-ci-aarch64.yml # .github/workflows/nix-ci.yml # README.md # scripts/sync-ggml.last
This commit is contained in:
commit
359a14d3c2
10 changed files with 251 additions and 72 deletions
|
@ -401,6 +401,16 @@ struct llama_server_context
|
|||
return true;
|
||||
}
|
||||
|
||||
void validate_model_chat_template(server_params & sparams) {
|
||||
llama_chat_message chat[] = {{"user", "test"}};
|
||||
std::vector<char> buf(1);
|
||||
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
||||
if (res < 0) {
|
||||
LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
|
||||
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
|
||||
}
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
// create slots
|
||||
all_slots_are_idle = true;
|
||||
|
@ -1939,6 +1949,10 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
|||
printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n");
|
||||
printf(" -spf FNAME, --system-prompt-file FNAME\n");
|
||||
printf(" set a file to load a system prompt (initial prompt of all slots), this is useful for chat applications.\n");
|
||||
printf(" -ctk TYPE, --cache-type-k TYPE\n");
|
||||
printf(" KV cache data type for K (default: f16)\n");
|
||||
printf(" -ctv TYPE, --cache-type-v TYPE\n");
|
||||
printf(" KV cache data type for V (default: f16)\n");
|
||||
printf(" --mmproj MMPROJ_FILE path to a multimodal projector file for LLaVA.\n");
|
||||
printf(" --log-disable disables logging to a file.\n");
|
||||
printf(" --slots-endpoint-disable disables slots monitoring endpoint.\n");
|
||||
|
@ -2377,6 +2391,12 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
|||
);
|
||||
llama.process_system_prompt_data(json::parse(systm_content));
|
||||
}
|
||||
else if (arg == "-ctk" || arg == "--cache-type-k") {
|
||||
params.cache_type_k = argv[++i];
|
||||
}
|
||||
else if (arg == "-ctv" || arg == "--cache-type-v") {
|
||||
params.cache_type_v = argv[++i];
|
||||
}
|
||||
else if(arg == "--mmproj")
|
||||
{
|
||||
if (++i >= argc)
|
||||
|
@ -2753,6 +2773,11 @@ int main(int argc, char **argv)
|
|||
LOG_INFO("model loaded", {});
|
||||
}
|
||||
|
||||
if (sparams.chat_template.empty()) { // custom chat template is not supplied
|
||||
// check if the template comes with the model is supported by us
|
||||
llama.validate_model_chat_template(sparams);
|
||||
}
|
||||
|
||||
// Middleware for API key validation
|
||||
auto validate_api_key = [&sparams](const httplib::Request &req, httplib::Response &res) -> bool {
|
||||
// If API key is not set, skip validation
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue