larger warmup batch

This commit is contained in:
Concedo 2025-04-05 10:57:04 +08:00
parent 59c02aa1a6
commit b3143384b4

View file

@ -2297,8 +2297,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
}
}
//determine mem per token
std::vector<int> tmp = {1, 2, 3, 4};
//warmup at least 33 tokens to trigger batch
std::vector<int> tmp;
for (int i = 1; i <= 33; ++i) {
tmp.push_back(i);
}
llama_kv_self_clear(llama_ctx_v4);
auto er = llama_decode(llama_ctx_v4, llama_batch_get_one(tmp.data(), tmp.size()));
if(er!=0)