reverted sequence mode for rwkv due to multiple issues with speed loss with bigger quantized models

This commit is contained in:
Concedo 2023-06-14 20:03:14 +08:00
parent f83b66606b
commit 3ed3e7b7e2
4 changed files with 5 additions and 5 deletions

View file

@ -484,8 +484,8 @@ struct rwkv_ggml_context {
return;
}
const size_t memory_required_overhead = size_t(256) * 1024 * 1024;
const size_t memory_required_overhead_sc = size_t(128) * 1024 * 1024;
const size_t memory_required_overhead = size_t(128) * 1024 * 1024;
const size_t memory_required_overhead_sc = size_t(64) * 1024 * 1024;
ctx = ggml_init({ size.objects_count * GGML_OBJECT_SIZE + size.objects_size + memory_required_overhead, NULL, false});