mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-09 16:44:35 +00:00
allow for single token prompt processing (actual batch size 1)
This commit is contained in:
commit
6b6597ebf1
24 changed files with 413 additions and 846 deletions
|
@ -1832,10 +1832,14 @@ static int GetBatchSize(int desiredBlasBatchSize,FileFormat in_file_format)
|
|||
file_format == FileFormat::GPTJ_2 ||
|
||||
file_format == FileFormat::RWKV_1 ||
|
||||
file_format==FileFormat::RWKV_2);
|
||||
if(!approved_format || desiredBlasBatchSize<=0)
|
||||
if(!approved_format && desiredBlasBatchSize>0)
|
||||
{
|
||||
desiredBlasBatchSize = 16;
|
||||
}
|
||||
if(desiredBlasBatchSize<=0)
|
||||
{
|
||||
desiredBlasBatchSize = 1;
|
||||
}
|
||||
if (file_format != FileFormat::GGML && file_format != FileFormat::GGHF && file_format != FileFormat::GGJT && file_format != FileFormat::GGJT_2 && file_format != FileFormat::GGJT_3 && file_format != FileFormat::GGUF_GENERIC)
|
||||
{
|
||||
desiredBlasBatchSize = (desiredBlasBatchSize > 256 ? 256 : desiredBlasBatchSize);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue