Merge branch 'master' into concedo_experimental

# Conflicts:
#	.devops/nix/package.nix
#	CMakeLists.txt
#	README.md
#	ggml-metal.m
#	ggml.c
This commit is contained in:
Concedo 2024-01-08 14:18:49 +08:00
commit f04b6e7287
18 changed files with 195 additions and 191 deletions

View file

@ -448,8 +448,14 @@ struct llama_client_slot
}
bool has_budget(gpt_params &global_params) {
if (params.n_predict == -1 && global_params.n_predict == -1)
{
return true; // limitless
}
n_remaining = -1;
if(params.n_predict != -1)
if (params.n_predict != -1)
{
n_remaining = params.n_predict - n_decoded;
}
@ -457,7 +463,8 @@ struct llama_client_slot
{
n_remaining = global_params.n_predict - n_decoded;
}
return n_remaining > 0 || n_remaining == -1; // no budget || limitless
return n_remaining > 0; // no budget
}
bool available() const {
@ -1103,7 +1110,7 @@ struct llama_server_context
}
// check the limits
if (slot.n_decoded > 2 && slot.has_next_token && !slot.has_budget(params))
if (slot.n_decoded > 0 && slot.has_next_token && !slot.has_budget(params))
{
slot.stopped_limit = true;
slot.has_next_token = false;
@ -1704,7 +1711,6 @@ struct llama_server_context
llama_batch_add(batch, slot.sampled, system_tokens.size() + slot.n_past, { slot.id }, true);
slot.n_decoded += 1;
slot.n_past += 1;
}
@ -1922,6 +1928,7 @@ struct llama_server_context
llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
slot.n_decoded += 1;
if (slot.n_decoded == 1)
{
slot.t_start_genereration = ggml_time_us();