Merge branch 'master' into concedo_experimental

# Conflicts:
#	Makefile
#	README.md
#	ci/run.sh
#	ggml-quants.c
#	ggml.c
#	grammars/json.gbnf
#	grammars/json_arr.gbnf
#	llama.cpp
#	scripts/compare-llama-bench.py
This commit is contained in:
Concedo 2024-03-09 10:58:49 +08:00
commit b4ca54401d
36 changed files with 49217 additions and 49887 deletions

View file

@ -810,7 +810,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
const int n_batch = params.n_batch;
const int max_tasks_per_batch = 32;
const int max_seq = 4*max_tasks_per_batch;
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_max_seq(ctx));
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
@ -1087,7 +1087,7 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
const int n_batch = params.n_batch;
const int max_tasks_per_batch = 128;
const int max_seq = 2*max_tasks_per_batch;
const int max_seq = std::min(2*max_tasks_per_batch, (int) llama_n_max_seq(ctx));
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
@ -1439,7 +1439,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params
const int n_batch = params.n_batch;
const int max_tasks_per_batch = 32;
const int max_seq = 4*max_tasks_per_batch;
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_max_seq(ctx));
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
@ -1816,6 +1816,9 @@ int main(int argc, char ** argv) {
llama_model * model;
llama_context * ctx;
// ensure there's at least enough seq_ids for HellaSwag
params.n_parallel = std::max(4, params.n_parallel);
// load the model and apply lora adapter, if any
std::tie(model, ctx) = llama_init_from_gpt_params(params);
if (model == NULL) {