mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-13 10:29:43 +00:00
Add CMake flag for pipeline parallelism for multi-GPU (#940)
LCPP Default is set to 4, which is a bit too much in my opinion. Saves VRAM (0.5-1%?), some compute and some electricity if set to 2, at the expense of some potential performance (prompt processing?), that I do not notice in usage. 2 is thus my own setting.
This commit is contained in:
parent
f7a0d252e6
commit
dd5cda06b7
1 changed files with 2 additions and 0 deletions
|
@ -26,6 +26,7 @@ set(LLAMA_GPROF OFF)
|
|||
set(LLAMA_SANITIZE_THREAD OFF)
|
||||
set(LLAMA_SANITIZE_ADDRESS OFF)
|
||||
set(LLAMA_SANITIZE_UNDEFINED OFF)
|
||||
set(LLAMA_SCHED_MAX_COPIES "2" CACHE STRING "llama: max input copies for pipeline parallelism")
|
||||
|
||||
# instruction set specific
|
||||
option(LLAMA_AVX "llama: enable AVX" ON)
|
||||
|
@ -66,6 +67,7 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
|
|||
find_package(Threads REQUIRED)
|
||||
|
||||
add_compile_definitions(LOG_DISABLE_LOGS)
|
||||
add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES})
|
||||
|
||||
file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu")
|
||||
list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue