mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-13 02:19:41 +00:00
reverted sequence mode for rwkv due to multiple issues with speed loss with bigger quantized models
This commit is contained in:
parent
f83b66606b
commit
3ed3e7b7e2
4 changed files with 5 additions and 5 deletions
|
@ -484,8 +484,8 @@ struct rwkv_ggml_context {
|
|||
return;
|
||||
}
|
||||
|
||||
const size_t memory_required_overhead = size_t(256) * 1024 * 1024;
|
||||
const size_t memory_required_overhead_sc = size_t(128) * 1024 * 1024;
|
||||
const size_t memory_required_overhead = size_t(128) * 1024 * 1024;
|
||||
const size_t memory_required_overhead_sc = size_t(64) * 1024 * 1024;
|
||||
|
||||
ctx = ggml_init({ size.objects_count * GGML_OBJECT_SIZE + size.objects_size + memory_required_overhead, NULL, false});
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue