mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 03:30:20 +00:00
This change refactors the reasoning_budget_message parameter from the common params into the sampling parameters specifically. It also removes the reasoning_budget common parameter and standardizes on the existing reasoning_budget_tokens parameter in the sampling configuration. Issue: https://github.com/ggml-org/llama.cpp/issues/20429 Original PR: https://github.com/ggml-org/llama.cpp/pull/20297
This commit is contained in:
parent
134d6e54d4
commit
750579ff14
4 changed files with 7 additions and 8 deletions
|
|
@ -3122,14 +3122,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
"token budget for thinking: -1 for unrestricted, 0 for immediate end, N>0 for token budget (default: -1)",
|
||||
[](common_params & params, int value) {
|
||||
if (value < -1) { throw std::invalid_argument("invalid value"); }
|
||||
params.reasoning_budget = value;
|
||||
params.sampling.reasoning_budget_tokens = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_THINK_BUDGET"));
|
||||
add_opt(common_arg(
|
||||
{"--reasoning-budget-message"}, "MESSAGE",
|
||||
"message injected before the end-of-thinking tag when reasoning budget is exhausted (default: none)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.reasoning_budget_message = value;
|
||||
params.sampling.reasoning_budget_message = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_THINK_BUDGET_MESSAGE"));
|
||||
add_opt(common_arg(
|
||||
|
|
|
|||
|
|
@ -274,6 +274,7 @@ struct common_params_sampling {
|
|||
std::vector<llama_token> reasoning_budget_start; // start tag token sequence
|
||||
std::vector<llama_token> reasoning_budget_end; // end tag token sequence
|
||||
std::vector<llama_token> reasoning_budget_forced; // forced sequence (message + end tag)
|
||||
std::string reasoning_budget_message; // message injected before end tag when budget exhausted
|
||||
|
||||
bool backend_sampling = false;
|
||||
|
||||
|
|
@ -581,8 +582,6 @@ struct common_params {
|
|||
bool force_pure_content_parser = false;
|
||||
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
int enable_reasoning = -1; // -1 = auto, 0 = disable, 1 = enable
|
||||
int reasoning_budget = -1;
|
||||
std::string reasoning_budget_message; // message injected before end tag when budget exhausted
|
||||
bool prefill_assistant = true; // if true, any trailing assistant message will be prefilled into the response
|
||||
int sleep_idle_seconds = -1; // if >0, server will sleep after this many seconds of idle time
|
||||
|
||||
|
|
|
|||
|
|
@ -77,8 +77,8 @@ struct cli_context {
|
|||
// defaults.return_progress = true; // TODO: show progress
|
||||
|
||||
verbose_prompt = params.verbose_prompt;
|
||||
reasoning_budget = params.reasoning_budget;
|
||||
reasoning_budget_message = params.reasoning_budget_message;
|
||||
reasoning_budget = params.sampling.reasoning_budget_tokens;
|
||||
reasoning_budget_message = params.sampling.reasoning_budget_message;
|
||||
}
|
||||
|
||||
std::string generate_completion(result_timings & out_timings) {
|
||||
|
|
|
|||
|
|
@ -1045,8 +1045,8 @@ private:
|
|||
/* allow_image */ mctx ? mtmd_support_vision(mctx) : false,
|
||||
/* allow_audio */ mctx ? mtmd_support_audio (mctx) : false,
|
||||
/* enable_thinking */ enable_thinking,
|
||||
/* reasoning_budget */ params_base.reasoning_budget,
|
||||
/* reasoning_budget_msg */ params_base.reasoning_budget_message,
|
||||
/* reasoning_budget */ params_base.sampling.reasoning_budget_tokens,
|
||||
/* reasoning_budget_msg */ params_base.sampling.reasoning_budget_message,
|
||||
/* media_path */ params_base.media_path,
|
||||
/* force_pure_content */ params_base.force_pure_content_parser
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue