reverted sequence mode for rwkv due to multiple issues with speed loss with bigger quantized models

This commit is contained in:
Concedo 2023-06-14 20:03:14 +08:00
parent f83b66606b
commit 3ed3e7b7e2
4 changed files with 5 additions and 5 deletions

View file

@ -479,7 +479,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
}
else
{
n_batch = 8; //use sequence mode to speedup
n_batch = 1; //do not use sequence mode to speedup until it is fixed
//setup buffers for rwkv state
auto padding = 512u;