tryout smaller binaries

This commit is contained in:
Concedo 2025-05-07 14:56:34 +08:00
parent a5b6f372a3
commit b951310ca5
6 changed files with 35 additions and 13 deletions

View file

@ -11,6 +11,7 @@ on:
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
KCPP_CUDA: 12.1.0
ARCHES_CU12: 1
jobs:
linux:

View file

@ -11,6 +11,7 @@ on:
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
NOAVX2: 1
ARCHES_CU11: 1
jobs:
linux:

View file

@ -131,14 +131,14 @@ if (LLAMA_CUBLAS)
# 70 == (assumed) compute capability at which unrolling a loop in mul_mat_q kernels is faster
# 75 == int8 tensor cores
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") # needed for f16 CUDA intrinsics
set(CMAKE_CUDA_ARCHITECTURES "60-virtual;61-virtual;70-virtual;75-virtual") # needed for f16 CUDA intrinsics
else()
message("CUDA Toolkit Version: ${CUDAToolkit_VERSION}")
if(CUDAToolkit_VERSION VERSION_GREATER 12)
add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
set(CMAKE_CUDA_ARCHITECTURES "50;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics
set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
else()
set(CMAKE_CUDA_ARCHITECTURES "35;50;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics
set(CMAKE_CUDA_ARCHITECTURES "35-virtual;50-virtual;61-virtual;70-virtual;75-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
endif()
endif()
endif()

View file

@ -189,19 +189,32 @@ ifdef LLAMA_ADD_CONDA_PATHS
CUBLASLD_FLAGS += -Lconda/envs/linux/lib -Lconda/envs/linux/lib/stubs
endif
ifdef CUDA_DOCKER_ARCH
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
else
ifdef LLAMA_PORTABLE
ifdef LLAMA_COLAB #colab does not need all targets, all-major doesnt work correctly with pascal
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all-major
ifdef LLAMA_ARCHES_CU11
NVCCFLAGS += -Wno-deprecated-gpu-targets \
-gencode arch=compute_35,code=compute_35 \
-gencode arch=compute_50,code=compute_50 \
-gencode arch=compute_61,code=compute_61 \
-gencode arch=compute_70,code=compute_70 \
-gencode arch=compute_75,code=compute_75
else ifdef LLAMA_ARCHES_CU12
NVCCFLAGS += -Wno-deprecated-gpu-targets \
-gencode arch=compute_50,code=compute_50 \
-gencode arch=compute_61,code=compute_61 \
-gencode arch=compute_70,code=compute_70 \
-gencode arch=compute_75,code=compute_75 \
-gencode arch=compute_80,code=compute_80
else
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all
endif #LLAMA_COLAB
endif
else
NVCCFLAGS += -arch=native
endif #LLAMA_PORTABLE
endif # CUDA_DOCKER_ARCH
endif # LLAMA_PORTABLE
ifdef LLAMA_CUDA_F16
NVCCFLAGS += -DGGML_CUDA_F16

View file

@ -59,7 +59,7 @@ KoboldCpp can now also be run on Novita AI, a newer alternative GPU cloud provid
## Docker
- The official docker can be found at https://hub.docker.com/r/koboldai/koboldcpp
- If you're building your own docker, remember to set CUDA_DOCKER_ARCH or enable LLAMA_PORTABLE
- If you're building your own docker, remember to enable LLAMA_PORTABLE
## Obtaining a GGUF model
- KoboldCpp uses GGUF models. They are not included with KoboldCpp, but you can download GGUF files from other places such as [Bartowski's Huggingface](https://huggingface.co/bartowski). Search for "GGUF" on huggingface.co for plenty of compatible models in the `.gguf` format.

View file

@ -21,11 +21,18 @@ KCPP_CUDA=$(<conda/envs/linux/cudaver)
KCPP_CUDAAPPEND=-cuda${KCPP_CUDA//.}$KCPP_APPEND
LLAMA_NOAVX2_FLAG=""
ARCHES_FLAG=""
if [ -n "$NOAVX2" ]; then
LLAMA_NOAVX2_FLAG="LLAMA_NOAVX2=1"
fi
if [ -n "$ARCHES_CU11" ]; then
ARCHES_FLAG="LLAMA_ARCHES_CU11=1"
fi
if [ -n "$ARCHES_CU12" ]; then
ARCHES_FLAG="LLAMA_ARCHES_CU12=1"
fi
bin/micromamba run -r conda -p conda/envs/linux make -j$(nproc) LLAMA_VULKAN=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 LLAMA_ADD_CONDA_PATHS=1 $LLAMA_NOAVX2_FLAG
bin/micromamba run -r conda -p conda/envs/linux make -j$(nproc) LLAMA_VULKAN=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 LLAMA_ADD_CONDA_PATHS=1 $LLAMA_NOAVX2_FLAG $ARCHES_FLAG
if [ $? -ne 0 ]; then
echo "Error: make failed."
exit 1