mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 03:30:20 +00:00
breaking change: due to cuda12 upgrade, release filenames will change. standardize them to windows naming for the future. (+1 squashed commits)
Squashed commits: [75842919a] cuda12.4 test
This commit is contained in:
parent
50a27793d3
commit
eec5a8ad16
10 changed files with 23 additions and 39 deletions
|
|
@ -10,7 +10,7 @@ on:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||||
KCPP_CUDA: 12.1.0
|
KCPP_CUDA: 12.4.0
|
||||||
ARCHES_CU12: 1
|
ARCHES_CU12: 1
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|
@ -51,6 +51,9 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
./koboldcpp.sh dist
|
./koboldcpp.sh dist
|
||||||
|
|
||||||
|
- name: Rename file before upload
|
||||||
|
run: mv dist/koboldcpp-linux-x64-cuda1240 dist/koboldcpp-linux-x64-cuda12
|
||||||
|
|
||||||
- name: Save artifact
|
- name: Save artifact
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,9 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
./koboldcpp.sh dist
|
./koboldcpp.sh dist
|
||||||
|
|
||||||
|
- name: Rename file before upload
|
||||||
|
run: mv dist/koboldcpp-linux-x64-cuda1150 dist/koboldcpp-linux-x64-cuda11
|
||||||
|
|
||||||
- name: Save artifact
|
- name: Save artifact
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
|
|
|
||||||
|
|
@ -40,34 +40,6 @@ jobs:
|
||||||
& "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -all -products * -format json
|
& "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -all -products * -format json
|
||||||
shell: pwsh
|
shell: pwsh
|
||||||
|
|
||||||
- name: Visual Studio 2019 Reinstall
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
@echo off
|
|
||||||
echo Preparing setup
|
|
||||||
curl -fLO https://download.visualstudio.microsoft.com/download/pr/1fbe074b-8ae1-4e9b-8e83-d1ce4200c9d1/61098e228df7ba3a6a8b4e920a415ad8878d386de6dd0f23f194fe1a55db189a/vs_Enterprise.exe
|
|
||||||
vs_Enterprise.exe --quiet --add Microsoft.VisualStudio.Workload.VCTools --add Microsoft.VisualStudio.Component.VC.CLI.Support --add Microsoft.VisualStudio.Component.Windows10SDK.19041 --add Microsoft.VisualStudio.Workload.UniversalBuildTools --add Microsoft.VisualStudio.Component.VC.CMake.Project
|
|
||||||
echo Waiting for VS2019 setup
|
|
||||||
set "ProcessName=setup.exe"
|
|
||||||
:CheckProcess
|
|
||||||
tasklist /FI "IMAGENAME eq %ProcessName%" | find /I "%ProcessName%" >nul
|
|
||||||
if %errorlevel%==0 (
|
|
||||||
ping 127.0.0.1 /n 5 >nul
|
|
||||||
goto CheckProcess
|
|
||||||
)
|
|
||||||
echo VS2019 Setup completed
|
|
||||||
exit /b 0
|
|
||||||
|
|
||||||
- name: Disable Visual Studio 2022 by Renaming
|
|
||||||
run: |
|
|
||||||
Rename-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise" "Enterprise_DISABLED"
|
|
||||||
shell: pwsh
|
|
||||||
|
|
||||||
- name: Display full Visual Studio info After
|
|
||||||
run: |
|
|
||||||
& "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -all -products * -format json
|
|
||||||
shell: pwsh
|
|
||||||
|
|
||||||
- name: Download and install win64devkit
|
- name: Download and install win64devkit
|
||||||
run: |
|
run: |
|
||||||
curl -L https://github.com/skeeto/w64devkit/releases/download/v1.22.0/w64devkit-1.22.0.zip --output w64devkit.zip
|
curl -L https://github.com/skeeto/w64devkit/releases/download/v1.22.0/w64devkit-1.22.0.zip --output w64devkit.zip
|
||||||
|
|
@ -94,7 +66,7 @@ jobs:
|
||||||
- uses: Jimver/cuda-toolkit@v0.2.15
|
- uses: Jimver/cuda-toolkit@v0.2.15
|
||||||
id: cuda-toolkit
|
id: cuda-toolkit
|
||||||
with:
|
with:
|
||||||
cuda: '12.1.0'
|
cuda: '12.4.0'
|
||||||
use-github-cache: false
|
use-github-cache: false
|
||||||
|
|
||||||
- name: Build CUDA
|
- name: Build CUDA
|
||||||
|
|
@ -114,8 +86,8 @@ jobs:
|
||||||
# ls
|
# ls
|
||||||
- name: Copy CuBLAS Libraries
|
- name: Copy CuBLAS Libraries
|
||||||
run: |
|
run: |
|
||||||
copy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\bin\cublasLt64_12.dll" .
|
copy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin\cublasLt64_12.dll" .
|
||||||
copy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\bin\cublas64_12.dll" .
|
copy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin\cublas64_12.dll" .
|
||||||
ls
|
ls
|
||||||
|
|
||||||
- name: Package PyInstallers
|
- name: Package PyInstallers
|
||||||
|
|
|
||||||
|
|
@ -134,6 +134,7 @@ if (LLAMA_CUBLAS)
|
||||||
add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
|
add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
|
||||||
set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
|
set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
|
||||||
else()
|
else()
|
||||||
|
add_compile_definitions(KCPP_LIMIT_CUDA_MAX_ARCH=750) #will cause issues with ggml_cuda_highest_compiled_arch if removed
|
||||||
set(CMAKE_CUDA_ARCHITECTURES "35-virtual;50-virtual;61-virtual;70-virtual;75-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
|
set(CMAKE_CUDA_ARCHITECTURES "35-virtual;50-virtual;61-virtual;70-virtual;75-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
|
||||||
3
Makefile
3
Makefile
|
|
@ -198,7 +198,8 @@ ifdef LLAMA_ARCHES_CU11
|
||||||
-gencode arch=compute_50,code=compute_50 \
|
-gencode arch=compute_50,code=compute_50 \
|
||||||
-gencode arch=compute_61,code=compute_61 \
|
-gencode arch=compute_61,code=compute_61 \
|
||||||
-gencode arch=compute_70,code=compute_70 \
|
-gencode arch=compute_70,code=compute_70 \
|
||||||
-gencode arch=compute_75,code=compute_75
|
-gencode arch=compute_75,code=compute_75 \
|
||||||
|
-DKCPP_LIMIT_CUDA_MAX_ARCH=750
|
||||||
|
|
||||||
else ifdef LLAMA_ARCHES_CU12
|
else ifdef LLAMA_ARCHES_CU12
|
||||||
NVCCFLAGS += -Wno-deprecated-gpu-targets \
|
NVCCFLAGS += -Wno-deprecated-gpu-targets \
|
||||||
|
|
|
||||||
|
|
@ -33,11 +33,11 @@ KoboldCpp is an easy-to-use AI text-generation software for GGML and GGUF models
|
||||||
- You can also run it using the command line. For info, please check `koboldcpp.exe --help`
|
- You can also run it using the command line. For info, please check `koboldcpp.exe --help`
|
||||||
|
|
||||||
## Linux Usage (Precompiled Binary, Recommended)
|
## Linux Usage (Precompiled Binary, Recommended)
|
||||||
On modern Linux systems, you should download the `koboldcpp-linux-x64-cuda1150` prebuilt PyInstaller binary for greatest compatibility on the **[releases page](https://github.com/LostRuins/koboldcpp/releases/latest)**. Simply download and run the binary (You may have to `chmod +x` it first). If you have a newer device, you can also try the `koboldcpp-linux-x64-cuda1210` instead for better speeds.
|
On modern Linux systems, you should download the `koboldcpp-linux-x64-cuda11` prebuilt PyInstaller binary for greatest compatibility on the **[releases page](https://github.com/LostRuins/koboldcpp/releases/latest)**. Simply download and run the binary (You may have to `chmod +x` it first). If you have a newer device, you can also try the `koboldcpp-linux-x64-cuda12` instead for better speeds.
|
||||||
|
|
||||||
Alternatively, you can also install koboldcpp to the current directory by running the following terminal command:
|
Alternatively, you can also install koboldcpp to the current directory by running the following terminal command:
|
||||||
```
|
```
|
||||||
curl -fLo koboldcpp https://github.com/LostRuins/koboldcpp/releases/latest/download/koboldcpp-linux-x64-cuda1150 && chmod +x koboldcpp
|
curl -fLo koboldcpp https://github.com/LostRuins/koboldcpp/releases/latest/download/koboldcpp-linux-x64-cuda11 && chmod +x koboldcpp
|
||||||
```
|
```
|
||||||
After running this command you can launch Koboldcpp from the current directory using `./koboldcpp` in the terminal (for CLI usage, run with `--help`).
|
After running this command you can launch Koboldcpp from the current directory using `./koboldcpp` in the terminal (for CLI usage, run with `--help`).
|
||||||
Finally, obtain and load a GGUF model. See [here](#Obtaining-a-GGUF-model)
|
Finally, obtain and load a GGUF model. See [here](#Obtaining-a-GGUF-model)
|
||||||
|
|
|
||||||
BIN
cudart64_12.dll
BIN
cudart64_12.dll
Binary file not shown.
|
|
@ -1,6 +1,6 @@
|
||||||
name: koboldcpp
|
name: koboldcpp
|
||||||
channels:
|
channels:
|
||||||
- nvidia/label/cuda-12.1.0
|
- nvidia/label/cuda-12.4.0
|
||||||
- conda-forge
|
- conda-forge
|
||||||
- defaults
|
- defaults
|
||||||
dependencies:
|
dependencies:
|
||||||
|
|
|
||||||
|
|
@ -122,7 +122,11 @@ constexpr int ggml_cuda_highest_compiled_arch(const int arch) {
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static int ggml_cuda_highest_compiled_arch(const int arch) {
|
static int ggml_cuda_highest_compiled_arch(const int arch) {
|
||||||
return (arch>__CUDA_ARCH__?__CUDA_ARCH__:arch);
|
#ifndef KCPP_LIMIT_CUDA_MAX_ARCH
|
||||||
|
return arch;
|
||||||
|
#else
|
||||||
|
return (arch > KCPP_LIMIT_CUDA_MAX_ARCH ? KCPP_LIMIT_CUDA_MAX_ARCH : arch);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif // __CUDA_ARCH_LIST__
|
#endif // __CUDA_ARCH_LIST__
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,9 +7,9 @@ fi
|
||||||
if [[ ! -f "conda/envs/linux/bin/python" && $KCPP_CUDA != "rocm" || $1 == "rebuild" && $KCPP_CUDA != "rocm" ]]; then
|
if [[ ! -f "conda/envs/linux/bin/python" && $KCPP_CUDA != "rocm" || $1 == "rebuild" && $KCPP_CUDA != "rocm" ]]; then
|
||||||
cp environment.yaml environment.tmp.yaml
|
cp environment.yaml environment.tmp.yaml
|
||||||
if [ -n "$KCPP_CUDA" ]; then
|
if [ -n "$KCPP_CUDA" ]; then
|
||||||
sed -i -e "s/nvidia\/label\/cuda-12.1.0/nvidia\/label\/cuda-$KCPP_CUDA/g" environment.tmp.yaml
|
sed -i -e "s/nvidia\/label\/cuda-12.4.0/nvidia\/label\/cuda-$KCPP_CUDA/g" environment.tmp.yaml
|
||||||
else
|
else
|
||||||
KCPP_CUDA=12.1.0
|
KCPP_CUDA=12.4.0
|
||||||
fi
|
fi
|
||||||
bin/micromamba create --no-rc --no-shortcuts -r conda -p conda/envs/linux -f environment.tmp.yaml -y
|
bin/micromamba create --no-rc --no-shortcuts -r conda -p conda/envs/linux -f environment.tmp.yaml -y
|
||||||
bin/micromamba create --no-rc --no-shortcuts -r conda -p conda/envs/linux -f environment.tmp.yaml -y
|
bin/micromamba create --no-rc --no-shortcuts -r conda -p conda/envs/linux -f environment.tmp.yaml -y
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue