mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/full-rocm.Dockerfile # .devops/llama-cli-rocm.Dockerfile # .devops/llama-server-rocm.Dockerfile # .github/workflows/build.yml # .github/workflows/python-type-check.yml # CMakeLists.txt # CONTRIBUTING.md # README.md # ci/run.sh # examples/embedding/embedding.cpp # examples/server/README.md # flake.lock # ggml/include/ggml.h # ggml/src/ggml.c # requirements/requirements-convert_legacy_llama.txt # scripts/sync-ggml.last # src/llama-vocab.cpp # src/llama.cpp # tests/test-backend-ops.cpp # tests/test-grad0.cpp # tests/test-tokenizer-0.cpp
This commit is contained in:
commit
ce7f9c9a2c
39 changed files with 103400 additions and 102738 deletions
|
@ -285,6 +285,10 @@ static bool gpt_params_parse_ex(int argc, char ** argv, gpt_params_context & ctx
|
|||
params.kv_overrides.back().key[0] = 0;
|
||||
}
|
||||
|
||||
if (params.reranking && params.embedding) {
|
||||
throw std::invalid_argument("error: either --embedding or --reranking can be specified, but not both");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -392,7 +396,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||
[](gpt_params & params) {
|
||||
params.verbose_prompt = true;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
||||
));
|
||||
add_opt(llama_arg(
|
||||
{"--no-display-prompt"},
|
||||
format("don't print prompt at generation (default: %s)", !params.display_prompt ? "true" : "false"),
|
||||
|
@ -1094,13 +1098,14 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||
}
|
||||
).set_sparam());
|
||||
add_opt(llama_arg(
|
||||
{"--pooling"}, "{none,mean,cls,last}",
|
||||
{"--pooling"}, "{none,mean,cls,last,rank}",
|
||||
"pooling type for embeddings, use model default if unspecified",
|
||||
[](gpt_params & params, const std::string & value) {
|
||||
/**/ if (value == "none") { params.pooling_type = LLAMA_POOLING_TYPE_NONE; }
|
||||
else if (value == "mean") { params.pooling_type = LLAMA_POOLING_TYPE_MEAN; }
|
||||
else if (value == "cls") { params.pooling_type = LLAMA_POOLING_TYPE_CLS; }
|
||||
else if (value == "cls") { params.pooling_type = LLAMA_POOLING_TYPE_CLS; }
|
||||
else if (value == "last") { params.pooling_type = LLAMA_POOLING_TYPE_LAST; }
|
||||
else if (value == "rank") { params.pooling_type = LLAMA_POOLING_TYPE_RANK; }
|
||||
else { throw std::invalid_argument("invalid value"); }
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_POOLING"));
|
||||
|
@ -1750,6 +1755,13 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||
params.embedding = true;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_EMBEDDINGS"));
|
||||
add_opt(llama_arg(
|
||||
{"--reranking", "--rerank"},
|
||||
format("enable reranking endpoint on server (default: %s)", params.reranking ? "enabled" : "disabled"),
|
||||
[](gpt_params & params) {
|
||||
params.reranking = true;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_RERANKING"));
|
||||
add_opt(llama_arg(
|
||||
{"--api-key"}, "KEY",
|
||||
"API key to use for authentication (default: none)",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue