From 2ef03c9de6e185579c391dd4f88235d33d37a3a5 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Fri, 15 Mar 2024 16:45:20 +0800 Subject: [PATCH] fix for physical batch size --- gpttype_adapter.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 1c0e029b0..59d6a3ae1 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -748,6 +748,10 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in kcpp_params->n_threads_batch = inputs.blasthreads; bool isGguf = (file_format == FileFormat::GGUF_GENERIC); kcpp_params->n_batch = GetBatchSize(inputs.blasbatchsize, in_file_format); + if(kcpp_params->n_batch>512) + { + kcpp_params->n_ubatch = (kcpp_params->n_batch>1024?1024:kcpp_params->n_batch); + } modelname = kcpp_params->model = inputs.model_filename; useSmartContext = inputs.use_smartcontext; useContextShift = inputs.use_contextshift;