mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 00:54:41 +00:00
merge CLBlast improvements - GPU dequant
This commit is contained in:
parent
3e992eabb4
commit
ad5676810a
4 changed files with 13 additions and 5 deletions
|
@ -30,6 +30,7 @@ static int n_past = 0;
|
|||
static int n_threads = 4;
|
||||
static int n_batch = 8;
|
||||
static bool useSmartContext = false;
|
||||
static int blasbatchsize = 512;
|
||||
static std::string modelname;
|
||||
static std::vector<gpt_vocab::id> last_n_tokens;
|
||||
static std::vector<gpt_vocab::id> current_context_tokens;
|
||||
|
@ -53,6 +54,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
n_batch = params.n_batch = inputs.batch_size;
|
||||
modelname = params.model = inputs.model_filename;
|
||||
useSmartContext = inputs.use_smartcontext;
|
||||
blasbatchsize = inputs.blasbatchsize;
|
||||
params.memory_f16 = inputs.f16_kv;
|
||||
params.n_ctx = inputs.max_context_length;
|
||||
model_v1.hparams.n_ctx = model_v2.hparams.n_ctx = model_gpt2_v1.hparams.n_ctx = model_gpt2_v2.hparams.n_ctx = params.n_ctx;
|
||||
|
@ -208,7 +210,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
|||
int original_threads = params.n_threads;
|
||||
if (blasmode)
|
||||
{
|
||||
params.n_batch = 512; //received reports of 1024 and above crashing on some models
|
||||
params.n_batch = blasbatchsize; //received reports of 1024 and above crashing on some models
|
||||
params.n_threads = 1;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue