mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-08 09:59:50 +00:00
embeddings memory usage regression fix
This commit is contained in:
parent
3816391a74
commit
7b4517c2fe
3 changed files with 135 additions and 39 deletions
|
|
@ -413,6 +413,7 @@ void llama_context::sched_reserve() {
|
|||
|
||||
sched.reset(ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), max_nodes, cparams.pipeline_parallel, cparams.op_offload));
|
||||
|
||||
if (memory) { //added by kcpp to reduce embeddings memory usage
|
||||
llama_memory_context_ptr mctx;
|
||||
if (memory) {
|
||||
LLAMA_LOG_DEBUG("%s: reserving full memory module\n", __func__);
|
||||
|
|
@ -547,6 +548,7 @@ void llama_context::sched_reserve() {
|
|||
|
||||
LLAMA_LOG_INFO("%s: reserve took %.2f ms, sched copies = %d\n",
|
||||
__func__, (t_end_us - t_start_us)/1000.0, ggml_backend_sched_get_n_copies(sched.get()));
|
||||
} //end kcpp fix to reduce embeddings memory usage
|
||||
}
|
||||
|
||||
void llama_context::synchronize() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue