mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-13 18:39:48 +00:00
Merge branch 'master' into concedo_experimental
should fix multigpu
This commit is contained in:
commit
a62468ec4c
4 changed files with 158 additions and 86 deletions
11
llama.cpp
11
llama.cpp
|
@ -5196,11 +5196,12 @@ static int llama_decode_internal(
|
|||
|
||||
// If all tensors can be run on the GPU then using more than 1 thread is detrimental.
|
||||
const bool full_offload_supported =
|
||||
model.arch == LLM_ARCH_LLAMA ||
|
||||
model.arch == LLM_ARCH_BAICHUAN ||
|
||||
model.arch == LLM_ARCH_FALCON ||
|
||||
model.arch == LLM_ARCH_REFACT ||
|
||||
model.arch == LLM_ARCH_MPT;
|
||||
model.arch == LLM_ARCH_LLAMA ||
|
||||
model.arch == LLM_ARCH_BAICHUAN ||
|
||||
model.arch == LLM_ARCH_FALCON ||
|
||||
model.arch == LLM_ARCH_REFACT ||
|
||||
model.arch == LLM_ARCH_MPT ||
|
||||
model.arch == LLM_ARCH_STARCODER;
|
||||
|
||||
const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 3;
|
||||
if (ggml_cpu_has_cublas() && full_offload_supported && fully_offloaded) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue