mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-31 21:39:42 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # CMakeLists.txt # docs/speculative.md # ggml/src/ggml-cuda/CMakeLists.txt # ggml/src/ggml-hexagon/ggml-hexagon.cpp # ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c # ggml/src/ggml-hexagon/htp/hmx-ops.h # ggml/src/ggml-hexagon/htp/main.c # ggml/src/ggml-hexagon/htp/matmul-ops.c # ggml/src/ggml-hexagon/htp/rope-ops.c # ggml/src/ggml-hexagon/htp/ssm-conv.c # ggml/src/ggml-opencl/ggml-opencl.cpp # scripts/snapdragon/adb/run-bench.sh # scripts/snapdragon/adb/run-cli.sh # scripts/snapdragon/adb/run-completion.sh # scripts/snapdragon/adb/run-mtmd.sh # scripts/snapdragon/windows/run-bench.ps1 # scripts/snapdragon/windows/run-cli.ps1 # scripts/snapdragon/windows/run-completion.ps1 # scripts/snapdragon/windows/run-mtmd.ps1 # src/llama-vocab.cpp # tests/test-backend-ops.cpp # tools/batched-bench/CMakeLists.txt # tools/batched-bench/batched-bench.cpp # tools/cli/CMakeLists.txt # tools/cli/README.md # tools/cli/cli.cpp # tools/completion/CMakeLists.txt # tools/completion/README.md # tools/llama-bench/CMakeLists.txt # tools/llama-bench/llama-bench.cpp # tools/mtmd/CMakeLists.txt # tools/mtmd/tests/test-deepseek-ocr.py # tools/mtmd/tests/tests-requirements.txt # tools/perplexity/CMakeLists.txt # tools/perplexity/perplexity.cpp # tools/quantize/CMakeLists.txt # tools/server/CMakeLists.txt # tools/server/README.md # ty.toml
This commit is contained in:
commit
718dc159b6
83 changed files with 1469 additions and 648 deletions
|
|
@ -3593,6 +3593,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
params.speculative.draft.p_min = std::stof(value);
|
||||
}
|
||||
).set_spec().set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_SPEC_DRAFT_P_MIN"));
|
||||
add_opt(common_arg(
|
||||
{"--spec-draft-backend-sampling"},
|
||||
{"--no-spec-draft-backend-sampling"},
|
||||
string_format("offload draft sampling to the backend (default: %s)",
|
||||
params.speculative.draft.backend_sampling ? "enabled" : "disabled"),
|
||||
[](common_params & params, bool value) {
|
||||
params.speculative.draft.backend_sampling = value;
|
||||
}
|
||||
).set_spec().set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_SPEC_DRAFT_BACKEND_SAMPLING"));
|
||||
add_opt(common_arg(
|
||||
{"--spec-draft-device", "-devd", "--device-draft"}, "<dev1,dev2,..>",
|
||||
"comma-separated list of devices to use for offloading the draft model (none = don't offload)\n"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue