mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-09 19:46:11 +00:00
fixed cmake compile for ggml v3
This commit is contained in:
parent
db14de5c32
commit
c939bbf7c6
2 changed files with 21 additions and 3 deletions
|
|
@ -69,6 +69,7 @@ find_package(Threads REQUIRED)
|
|||
add_compile_definitions(LOG_DISABLE_LOGS)
|
||||
|
||||
set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h)
|
||||
set(GGML_V3_CUDA_SOURCES otherarch/ggml_v3-cuda.cu otherarch/ggml_v3-cuda.h)
|
||||
set(GGML_V2_CUDA_SOURCES otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h)
|
||||
set(GGML_V2_LEGACY_CUDA_SOURCES otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h)
|
||||
|
||||
|
|
@ -174,6 +175,15 @@ if (LLAMA_HIPBLAS)
|
|||
set_source_files_properties(otherarch/ggml_v2-cuda.cu PROPERTIES LANGUAGE CXX)
|
||||
target_link_libraries(ggml-v2-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
|
||||
|
||||
add_library(ggml-v3-rocm OBJECT ${GGML_V3_CUDA_SOURCES})
|
||||
if (LLAMA_CUDA_FORCE_DMMV)
|
||||
target_compile_definitions(ggml-v3-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
|
||||
endif()
|
||||
target_compile_definitions(ggml-v3-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
||||
target_compile_definitions(ggml-v3-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
||||
target_compile_definitions(ggml-v3-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
||||
set_source_files_properties(otherarch/ggml_v3-cuda.cu PROPERTIES LANGUAGE CXX)
|
||||
target_link_libraries(ggml-v3-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
|
||||
|
||||
add_library(ggml-v2-legacy-rocm OBJECT ${GGML_V2_LEGACY_CUDA_SOURCES})
|
||||
if (LLAMA_CUDA_FORCE_DMMV)
|
||||
|
|
@ -404,6 +414,15 @@ target_compile_features(ggml_v2 PUBLIC c_std_11) # don't bump
|
|||
target_link_libraries(ggml_v2 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
||||
set_target_properties(ggml_v2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
add_library(ggml_v3 OBJECT
|
||||
otherarch/ggml_v3.c
|
||||
otherarch/ggml_v3.h
|
||||
${GGML_V3_CUDA_SOURCES})
|
||||
target_include_directories(ggml_v3 PUBLIC . ./otherarch ./otherarch/tools)
|
||||
target_compile_features(ggml_v3 PUBLIC c_std_11) # don't bump
|
||||
target_link_libraries(ggml_v3 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
||||
set_target_properties(ggml_v3 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
add_library(common2
|
||||
common/common.cpp
|
||||
common/common.h
|
||||
|
|
@ -429,7 +448,7 @@ if (LLAMA_CUBLAS)
|
|||
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
|
||||
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 common2 gpttype_adapter ${LLAMA_EXTRA_LIBS})
|
||||
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter ${LLAMA_EXTRA_LIBS})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
endif()
|
||||
|
||||
|
|
@ -441,7 +460,7 @@ if (LLAMA_HIPBLAS)
|
|||
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")
|
||||
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 common2 gpttype_adapter ${LLAMA_EXTRA_LIBS})
|
||||
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter ${LLAMA_EXTRA_LIBS})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,6 @@ KoboldCpp is an easy-to-use AI text-generation software for GGML and GGUF models
|
|||
- **Any GPU Acceleration**: As a slightly slower alternative, try CLBlast with `--useclblast` flags for a slightly slower but more GPU compatible speedup.
|
||||
- **GPU Layer Offloading**: Want even more speedup? Combine one of the above GPU flags with `--gpulayers` to offload entire layers to the GPU! **Much faster, but uses more VRAM**. Experiment to determine number of layers to offload, and reduce by a few if you run out of memory.
|
||||
- **Increasing Context Size**: Try `--contextsize 4096` to 2x your context size! without much perplexity gain. Note that you'll have to increase the max context in the Kobold Lite UI as well (click and edit the number text field).
|
||||
- **Reducing Prompt Processing**: Try the `--smartcontext` flag to reduce prompt processing frequency.
|
||||
- If you are having crashes or issues, you can try turning off BLAS with the `--noblas` flag. You can also try running in a non-avx2 compatibility mode with `--noavx2`. Lastly, you can try turning off mmap with `--nommap`.
|
||||
|
||||
For more information, be sure to run the program with the `--help` flag, or [check the wiki](https://github.com/LostRuins/koboldcpp/wiki).
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue