removed openblas backend, merged into CPU (with llamafile for BLAS). GPU backend is now automatically selected when running from CLI unless noblas is specified.

This commit is contained in:
Concedo 2024-09-15 19:21:52 +08:00
parent 01c7d82185
commit 53bf0fb32d
14 changed files with 115 additions and 159 deletions

2
.gitignore vendored
View file

@ -107,7 +107,6 @@ tests/test-tokenizer-1-bpe
/koboldcpp_default.so /koboldcpp_default.so
/koboldcpp_failsafe.so /koboldcpp_failsafe.so
/koboldcpp_openblas.so
/koboldcpp_noavx2.so /koboldcpp_noavx2.so
/koboldcpp_clblast.so /koboldcpp_clblast.so
/koboldcpp_clblast_noavx2.so /koboldcpp_clblast_noavx2.so
@ -116,7 +115,6 @@ tests/test-tokenizer-1-bpe
/koboldcpp_vulkan_noavx2.so /koboldcpp_vulkan_noavx2.so
/koboldcpp_default.dll /koboldcpp_default.dll
/koboldcpp_failsafe.dll /koboldcpp_failsafe.dll
/koboldcpp_openblas.dll
/koboldcpp_noavx2.dll /koboldcpp_noavx2.dll
/koboldcpp_clblast.dll /koboldcpp_clblast.dll
/koboldcpp_clblast_noavx2.dll /koboldcpp_clblast_noavx2.dll

View file

@ -1,9 +1,9 @@
# Add custom options to Makefile.local rather than editing this file. # Add custom options to Makefile.local rather than editing this file.
-include $(abspath $(lastword ${MAKEFILE_LIST})).local -include $(abspath $(lastword ${MAKEFILE_LIST})).local
default: koboldcpp_default koboldcpp_failsafe koboldcpp_openblas koboldcpp_noavx2 koboldcpp_clblast koboldcpp_clblast_noavx2 koboldcpp_cublas koboldcpp_hipblas koboldcpp_vulkan koboldcpp_vulkan_noavx2 default: koboldcpp_default koboldcpp_failsafe koboldcpp_noavx2 koboldcpp_clblast koboldcpp_clblast_noavx2 koboldcpp_cublas koboldcpp_hipblas koboldcpp_vulkan koboldcpp_vulkan_noavx2
tools: quantize_gpt2 quantize_gptj quantize_gguf quantize_neox quantize_mpt quantize_clip whispermain sdmain gguf-split tools: quantize_gpt2 quantize_gptj quantize_gguf quantize_neox quantize_mpt quantize_clip whispermain sdmain gguf-split
dev: koboldcpp_openblas dev: koboldcpp_default
dev2: koboldcpp_clblast dev2: koboldcpp_clblast
dev3: koboldcpp_vulkan dev3: koboldcpp_vulkan
@ -75,7 +75,6 @@ SIMPLECFLAGS =
FULLCFLAGS = FULLCFLAGS =
NONECFLAGS = NONECFLAGS =
OPENBLAS_FLAGS = -DGGML_USE_OPENBLAS -DGGML_USE_BLAS -I/usr/local/include/openblas
CLBLAST_FLAGS = -DGGML_USE_CLBLAST CLBLAST_FLAGS = -DGGML_USE_CLBLAST
FAILSAFE_FLAGS = -DUSE_FAILSAFE FAILSAFE_FLAGS = -DUSE_FAILSAFE
VULKAN_FLAGS = -DGGML_USE_VULKAN -DSD_USE_VULKAN VULKAN_FLAGS = -DGGML_USE_VULKAN -DSD_USE_VULKAN
@ -336,7 +335,6 @@ endif
DEFAULT_BUILD = DEFAULT_BUILD =
FAILSAFE_BUILD = FAILSAFE_BUILD =
OPENBLAS_BUILD =
NOAVX2_BUILD = NOAVX2_BUILD =
CLBLAST_BUILD = CLBLAST_BUILD =
CUBLAS_BUILD = CUBLAS_BUILD =
@ -346,7 +344,6 @@ VULKAN_BUILD =
ifeq ($(OS),Windows_NT) ifeq ($(OS),Windows_NT)
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS) CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS)
VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ lib/vulkan-1.lib -shared -o $@.dll $(LDFLAGS) VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ lib/vulkan-1.lib -shared -o $@.dll $(LDFLAGS)
@ -364,14 +361,11 @@ else
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS) NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
endif endif
ifdef LLAMA_OPENBLAS
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
endif
ifdef LLAMA_CLBLAST ifdef LLAMA_CLBLAST
ifeq ($(UNAME_S),Darwin) ifeq ($(UNAME_S),Darwin)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS)
else else
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS)
endif endif
endif endif
ifdef LLAMA_CUBLAS ifdef LLAMA_CUBLAS
@ -384,13 +378,11 @@ else
VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ -lvulkan -shared -o $@.so $(LDFLAGS) VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ -lvulkan -shared -o $@.so $(LDFLAGS)
endif endif
ifndef LLAMA_OPENBLAS
ifndef LLAMA_CLBLAST ifndef LLAMA_CLBLAST
ifndef LLAMA_CUBLAS ifndef LLAMA_CUBLAS
ifndef LLAMA_HIPBLAS ifndef LLAMA_HIPBLAS
ifndef LLAMA_VULKAN ifndef LLAMA_VULKAN
OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.' VULKAN_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_VULKAN=1 to compile with Vulkan support. This is just a reminder, not an error.'
endif
endif endif
endif endif
endif endif
@ -421,8 +413,6 @@ $(info )
ggml.o: ggml/src/ggml.c ggml/include/ggml.h ggml.o: ggml/src/ggml.c ggml/include/ggml.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@
ggml_v4_openblas.o: ggml/src/ggml.c ggml/include/ggml.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
ggml_v4_failsafe.o: ggml/src/ggml.c ggml/include/ggml.h ggml_v4_failsafe.o: ggml/src/ggml.c ggml/include/ggml.h
$(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@
ggml_v4_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h ggml_v4_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
@ -480,15 +470,13 @@ llavaclip_cublas.o: examples/llava/clip.cpp examples/llava/clip.h
llavaclip_vulkan.o: examples/llava/clip.cpp examples/llava/clip.h llavaclip_vulkan.o: examples/llava/clip.cpp examples/llava/clip.h
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
#this is only used for openblas and accelerate #this is only used for accelerate
ggml-blas.o: ggml/src/ggml-blas.cpp ggml/include/ggml-blas.h ggml-blas.o: ggml/src/ggml-blas.cpp ggml/include/ggml-blas.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
#version 3 libs #version 3 libs
ggml_v3.o: otherarch/ggml_v3.c otherarch/ggml_v3.h ggml_v3.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@
ggml_v3_openblas.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
ggml_v3_failsafe.o: otherarch/ggml_v3.c otherarch/ggml_v3.h ggml_v3_failsafe.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
$(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@
ggml_v3_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h ggml_v3_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
@ -503,8 +491,6 @@ ggml_v3_clblast_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
#version 2 libs #version 2 libs
ggml_v2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h ggml_v2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@
ggml_v2_openblas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
ggml_v2_failsafe.o: otherarch/ggml_v2.c otherarch/ggml_v2.h ggml_v2_failsafe.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
$(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@
ggml_v2_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h ggml_v2_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
@ -569,8 +555,6 @@ gpttype_adapter_failsafe.o: $(GPTTYPE_ADAPTER)
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) -c $< -o $@
gpttype_adapter.o: $(GPTTYPE_ADAPTER) gpttype_adapter.o: $(GPTTYPE_ADAPTER)
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
gpttype_adapter_openblas.o: $(GPTTYPE_ADAPTER)
$(CXX) $(CXXFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
gpttype_adapter_clblast.o: $(GPTTYPE_ADAPTER) gpttype_adapter_clblast.o: $(GPTTYPE_ADAPTER)
$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
gpttype_adapter_cublas.o: $(GPTTYPE_ADAPTER) gpttype_adapter_cublas.o: $(GPTTYPE_ADAPTER)
@ -583,7 +567,7 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER)
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) $(VULKAN_FLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) $(VULKAN_FLAGS) -c $< -o $@
clean: clean:
rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe imatrix.exe gguf.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe imatrix.exe gguf.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
rm -vrf ggml/src/ggml-cuda/*.o rm -vrf ggml/src/ggml-cuda/*.o
rm -vrf ggml/src/ggml-cuda/template-instances/*.o rm -vrf ggml/src/ggml-cuda/template-instances/*.o
@ -612,14 +596,6 @@ vulkan-shaders-gen: ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
koboldcpp_default: ggml.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o gpttype_adapter.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FULL) $(OBJS) koboldcpp_default: ggml.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o gpttype_adapter.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FULL) $(OBJS)
$(DEFAULT_BUILD) $(DEFAULT_BUILD)
ifdef OPENBLAS_BUILD
koboldcpp_openblas: ggml_v4_openblas.o ggml_v3_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o gpttype_adapter_openblas.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-blas.o $(OBJS_FULL) $(OBJS)
$(OPENBLAS_BUILD)
else
koboldcpp_openblas:
$(DONOTHING)
endif
ifdef FAILSAFE_BUILD ifdef FAILSAFE_BUILD
koboldcpp_failsafe: ggml_v4_failsafe.o ggml_v3_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o gpttype_adapter_failsafe.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FAILSAFE) $(OBJS) koboldcpp_failsafe: ggml_v4_failsafe.o ggml_v3_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o gpttype_adapter_failsafe.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FAILSAFE) $(OBJS)
$(FAILSAFE_BUILD) $(FAILSAFE_BUILD)

View file

@ -15,7 +15,7 @@ KoboldCpp is an easy-to-use AI text-generation software for GGML and GGUF models
- You can also run it using the command line. For info, please check `koboldcpp.exe --help` - You can also run it using the command line. For info, please check `koboldcpp.exe --help`
## Linux Usage (Precompiled Binary, Recommended) ## Linux Usage (Precompiled Binary, Recommended)
On modern Linux systems, you should download the `koboldcpp-linux-x64-cuda1150` prebuilt PyInstaller binary on the **[releases page](https://github.com/LostRuins/koboldcpp/releases/latest)**. Simply download and run the binary. On modern Linux systems, you should download the `koboldcpp-linux-x64-cuda1150` prebuilt PyInstaller binary on the **[releases page](https://github.com/LostRuins/koboldcpp/releases/latest)**. Simply download and run the binary (You may have to `chmod +x` it first).
Alternatively, you can also install koboldcpp to the current directory by running the following terminal command: Alternatively, you can also install koboldcpp to the current directory by running the following terminal command:
``` ```
@ -24,7 +24,8 @@ curl -fLo koboldcpp https://github.com/LostRuins/koboldcpp/releases/latest/downl
After running this command you can launch Koboldcpp from the current directory using `./koboldcpp` in the terminal (for CLI usage, run with `--help`). After running this command you can launch Koboldcpp from the current directory using `./koboldcpp` in the terminal (for CLI usage, run with `--help`).
## MacOS (Precompiled Binary) ## MacOS (Precompiled Binary)
- PyInstaller binaries for Modern ARM64 MacOS (M1, M2, M3) are now available! **[Simply download and run the MacOS binary](https://github.com/LostRuins/koboldcpp/releases/latest)** - PyInstaller binaries for Modern ARM64 MacOS (M1, M2, M3) are now available! **[Simply download the MacOS binary](https://github.com/LostRuins/koboldcpp/releases/latest)**
- In a MacOS terminal window, set the file to executable `chmod +x koboldcpp-mac-arm64` and run it with `./koboldcpp-mac-arm64`.
- Alternatively, or for older x86 MacOS computers, you can clone the repo and compile from source code, see Compiling for MacOS below. - Alternatively, or for older x86 MacOS computers, you can clone the repo and compile from source code, see Compiling for MacOS below.
## Run on Colab ## Run on Colab
@ -70,13 +71,13 @@ when you can't use the precompiled binary directly, we provide an automated buil
### Compiling on Linux (Manual Method) ### Compiling on Linux (Manual Method)
- To compile your binaries from source, clone the repo with `git clone https://github.com/LostRuins/koboldcpp.git` - To compile your binaries from source, clone the repo with `git clone https://github.com/LostRuins/koboldcpp.git`
- A makefile is provided, simply run `make`. - A makefile is provided, simply run `make`.
- Optional OpenBLAS: Link your own install of OpenBLAS manually with `make LLAMA_OPENBLAS=1` - Optional Vulkan: Link your own install of Vulkan SDK manually with `make LLAMA_VULKAN=1`
- Optional CLBlast: Link your own install of CLBlast manually with `make LLAMA_CLBLAST=1` - Optional CLBlast: Link your own install of CLBlast manually with `make LLAMA_CLBLAST=1`
- Note: for these you will need to obtain and link OpenCL and CLBlast libraries. - Note: for these you will need to obtain and link OpenCL and CLBlast libraries.
- For Arch Linux: Install `cblas` `openblas` and `clblast`. - For Arch Linux: Install `cblas` and `clblast`.
- For Debian: Install `libclblast-dev` and `libopenblas-dev`. - For Debian: Install `libclblast-dev`.
- You can attempt a CuBLAS build with `LLAMA_CUBLAS=1`, (or `LLAMA_HIPBLAS=1` for AMD). You will need CUDA Toolkit installed. Some have also reported success with the CMake file, though that is more for windows. - You can attempt a CuBLAS build with `LLAMA_CUBLAS=1`, (or `LLAMA_HIPBLAS=1` for AMD). You will need CUDA Toolkit installed. Some have also reported success with the CMake file, though that is more for windows.
- For a full featured build (all backends), do `make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_VULKAN=1`. (Note that `LLAMA_CUBLAS=1` will not work on windows, you need visual studio) - For a full featured build (all backends), do `make LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_VULKAN=1`. (Note that `LLAMA_CUBLAS=1` will not work on windows, you need visual studio)
- After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.gguf] [port]` - After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.gguf] [port]`
### Compiling on Windows ### Compiling on Windows
@ -87,12 +88,11 @@ when you can't use the precompiled binary directly, we provide an automated buil
- If you want to generate the .exe file, make sure you have the python module PyInstaller installed with pip (`pip install PyInstaller`). Then run the script `make_pyinstaller.bat` - If you want to generate the .exe file, make sure you have the python module PyInstaller installed with pip (`pip install PyInstaller`). Then run the script `make_pyinstaller.bat`
- The koboldcpp.exe file will be at your dist folder. - The koboldcpp.exe file will be at your dist folder.
- **Building with CUDA**: Visual Studio, CMake and CUDA Toolkit is required. Clone the repo, then open the CMake file and compile it in Visual Studio. Copy the `koboldcpp_cublas.dll` generated into the same directory as the `koboldcpp.py` file. If you are bundling executables, you may need to include CUDA dynamic libraries (such as `cublasLt64_11.dll` and `cublas64_11.dll`) in order for the executable to work correctly on a different PC. - **Building with CUDA**: Visual Studio, CMake and CUDA Toolkit is required. Clone the repo, then open the CMake file and compile it in Visual Studio. Copy the `koboldcpp_cublas.dll` generated into the same directory as the `koboldcpp.py` file. If you are bundling executables, you may need to include CUDA dynamic libraries (such as `cublasLt64_11.dll` and `cublas64_11.dll`) in order for the executable to work correctly on a different PC.
- **Replacing Libraries (Not Recommended)**: If you wish to use your own version of the additional Windows libraries (OpenCL, CLBlast and OpenBLAS), you can do it with: - **Replacing Libraries (Not Recommended)**: If you wish to use your own version of the additional Windows libraries (OpenCL, CLBlast, Vulkan), you can do it with:
- OpenCL - tested with https://github.com/KhronosGroup/OpenCL-SDK . If you wish to compile it, follow the repository instructions. You will need vcpkg. - OpenCL - tested with https://github.com/KhronosGroup/OpenCL-SDK . If you wish to compile it, follow the repository instructions. You will need vcpkg.
- CLBlast - tested with https://github.com/CNugteren/CLBlast . If you wish to compile it you will need to reference the OpenCL files. It will only generate the ".lib" file if you compile using MSVC. - CLBlast - tested with https://github.com/CNugteren/CLBlast . If you wish to compile it you will need to reference the OpenCL files. It will only generate the ".lib" file if you compile using MSVC.
- OpenBLAS - tested with https://github.com/xianyi/OpenBLAS .
- Move the respectives .lib files to the /lib folder of your project, overwriting the older files. - Move the respectives .lib files to the /lib folder of your project, overwriting the older files.
- Also, replace the existing versions of the corresponding .dll files located in the project directory root (e.g. libopenblas.dll). - Also, replace the existing versions of the corresponding .dll files located in the project directory root (e.g. clblast.dll).
- Make the KoboldCpp project using the instructions above. - Make the KoboldCpp project using the instructions above.
### Compiling on MacOS ### Compiling on MacOS
@ -127,7 +127,7 @@ when you can't use the precompiled binary directly, we provide an automated buil
- Metal is enabled by default on macOS, Vulkan support is enabled by default on both Linux and macOS, ROCm support isn't available yet. - Metal is enabled by default on macOS, Vulkan support is enabled by default on both Linux and macOS, ROCm support isn't available yet.
- You can also use `nix3-run` to use KoboldCpp: `nix run --expr ``with import <nixpkgs> { config = { allowUnfree = true; cudaSupport = true; }; }; koboldcpp`` --impure` - You can also use `nix3-run` to use KoboldCpp: `nix run --expr ``with import <nixpkgs> { config = { allowUnfree = true; cudaSupport = true; }; }; koboldcpp`` --impure`
- Or use `nix-shell`: `nix-shell --expr 'with import <nixpkgs> { config = { allowUnfree = true; cudaSupport = true; }; }; koboldcpp' --run "koboldcpp" --impure` - Or use `nix-shell`: `nix-shell --expr 'with import <nixpkgs> { config = { allowUnfree = true; cudaSupport = true; }; }; koboldcpp' --run "koboldcpp" --impure`
- Packages (like OpenBlast, CLBLast, Vulkan, etc.) can be overridden, please refer to the [17th Nix Pill - Nixpkgs Overriding Packages](https://nixos.org/guides/nix-pills/17-nixpkgs-overriding-packages) - Packages (like CLBLast, Vulkan, etc.) can be overridden, please refer to the [17th Nix Pill - Nixpkgs Overriding Packages](https://nixos.org/guides/nix-pills/17-nixpkgs-overriding-packages)
## Questions and Help Wiki ## Questions and Help Wiki
- **First, please check out [The KoboldCpp FAQ and Knowledgebase](https://github.com/LostRuins/koboldcpp/wiki) which may already have answers to your questions! Also please search through past issues and discussions.** - **First, please check out [The KoboldCpp FAQ and Knowledgebase](https://github.com/LostRuins/koboldcpp/wiki) which may already have answers to your questions! Also please search through past issues and discussions.**
@ -141,13 +141,13 @@ when you can't use the precompiled binary directly, we provide an automated buil
## Considerations ## Considerations
- For Windows: No installation, single file executable, (It Just Works) - For Windows: No installation, single file executable, (It Just Works)
- Since v1.0.6, requires libopenblas, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without BLAS.
- Since v1.15, requires CLBlast if enabled, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without CLBlast. - Since v1.15, requires CLBlast if enabled, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without CLBlast.
- Since v1.33, you can set the context size to be above what the model supports officially. It does increases perplexity but should still work well below 4096 even on untuned models. (For GPT-NeoX, GPT-J, and Llama models) Customize this with `--ropeconfig`. - Since v1.33, you can set the context size to be above what the model supports officially. It does increases perplexity but should still work well below 4096 even on untuned models. (For GPT-NeoX, GPT-J, and Llama models) Customize this with `--ropeconfig`.
- Since v1.42, supports GGUF models for LLAMA and Falcon - Since v1.42, supports GGUF models for LLAMA and Falcon
- Since v1.55, lcuda paths on Linux are hardcoded and may require manual changes to the makefile if you do not use koboldcpp.sh for the compilation. - Since v1.55, lcuda paths on Linux are hardcoded and may require manual changes to the makefile if you do not use koboldcpp.sh for the compilation.
- Since v1.60, provides native image generation with StableDiffusion.cpp, you can load any SD1.5 or SDXL .safetensors model and it will provide an A1111 compatible API to use. - Since v1.60, provides native image generation with StableDiffusion.cpp, you can load any SD1.5 or SDXL .safetensors model and it will provide an A1111 compatible API to use.
- **I try to keep backwards compatibility with ALL past llama.cpp models**. But you are also encouraged to reconvert/update your models if possible for best results. - **I try to keep backwards compatibility with ALL past llama.cpp models**. But you are also encouraged to reconvert/update your models if possible for best results.
- Since v1.75, openblas has been deprecated and removed in favor of the native CPU implementation.
## License ## License
- The original GGML library and llama.cpp by ggerganov are licensed under the MIT License - The original GGML library and llama.cpp by ggerganov are licensed under the MIT License

View file

@ -97,9 +97,9 @@ class model_backend(InferenceModel):
"menu_path": "", "menu_path": "",
"refresh_model_inputs": False, "refresh_model_inputs": False,
"extra_classes": "", "extra_classes": "",
'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use OpenBLAS', 'value': 1}, {'text': 'Use CuBLAS', 'value': 2}, 'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use CuBLAS', 'value': 1},
{'text': 'Use CLBLast GPU #1', 'value': 3},{'text': 'Use CLBLast GPU #2', 'value': 4},{'text': 'Use CLBLast GPU #3', 'value': 5} {'text': 'Use CLBLast GPU #1', 'value': 2},{'text': 'Use CLBLast GPU #2', 'value': 3},{'text': 'Use CLBLast GPU #3', 'value': 4}
,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 6},{'text': 'Failsafe Mode (Old CPU)', 'value': 7},{'text': 'Use Vulkan GPU #1', 'value': 8},{'text': 'Use Vulkan GPU #2', 'value': 9}], ,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 5},{'text': 'Failsafe Mode (Old CPU)', 'value': 6},{'text': 'Use Vulkan GPU #1', 'value': 7},{'text': 'Use Vulkan GPU #2', 'value': 8}],
}) })
requested_parameters.append({ requested_parameters.append({
"uitype": "text", "uitype": "text",
@ -238,24 +238,22 @@ class model_backend(InferenceModel):
if accel==0: if accel==0:
self.kcpp_noblas = True self.kcpp_noblas = True
elif accel==1: elif accel==1:
pass
elif accel==2:
self.kcpp_usecublas = ["normal"] self.kcpp_usecublas = ["normal"]
elif accel==3: elif accel==2:
self.kcpp_useclblast = [0,0] self.kcpp_useclblast = [0,0]
elif accel==4: elif accel==3:
self.kcpp_useclblast = [1,0] self.kcpp_useclblast = [1,0]
elif accel==5: elif accel==4:
self.kcpp_useclblast = [0,1] self.kcpp_useclblast = [0,1]
elif accel==6: elif accel==5:
self.kcpp_noavx2 = True self.kcpp_noavx2 = True
elif accel==7: elif accel==6:
self.kcpp_noavx2 = True self.kcpp_noavx2 = True
self.kcpp_noblas = True self.kcpp_noblas = True
self.kcpp_nommap = True self.kcpp_nommap = True
elif accel==8: elif accel==7:
self.kcpp_usevulkan = [0] self.kcpp_usevulkan = [0]
elif accel==9: elif accel==8:
self.kcpp_usevulkan = [1] self.kcpp_usevulkan = [1]
pass pass

View file

@ -11,8 +11,6 @@ dependencies:
- gxx=10 - gxx=10
- pip - pip
- git=2.35.1 - git=2.35.1
- libopenblas
- openblas
- clblast - clblast
- ninja - ninja
- make - make

View file

@ -1624,7 +1624,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
gptj_ctx_v3.hparams.rope_freq_scale = neox_ctx_v3.hparams.rope_freq_scale = rope_freq_scale; gptj_ctx_v3.hparams.rope_freq_scale = neox_ctx_v3.hparams.rope_freq_scale = rope_freq_scale;
gptj_ctx_v3.hparams.rope_freq_base = neox_ctx_v3.hparams.rope_freq_base = rope_freq_base; gptj_ctx_v3.hparams.rope_freq_base = neox_ctx_v3.hparams.rope_freq_base = rope_freq_base;
//this is used for the mem_per_token eval, openblas needs more RAM //this is used for the mem_per_token eval, blas needs more RAM
bool v3_use_scratch = ggml_v3_cpu_has_gpublas(); bool v3_use_scratch = ggml_v3_cpu_has_gpublas();
int cu_parseinfo_maindevice = inputs.cublas_info<=0?0:inputs.cublas_info; int cu_parseinfo_maindevice = inputs.cublas_info<=0?0:inputs.cublas_info;
@ -2362,11 +2362,11 @@ int GetThreadsToUse(bool blasmode)
{ {
if(!ggml_cpu_has_gpublas()) if(!ggml_cpu_has_gpublas())
{ {
return 1; return std::min(kcpp_data->n_blasthreads, 4);
} }
else else
{ {
return kcpp_data->n_blasthreads; return kcpp_data->n_blasthreads;
} }
} }
return kcpp_data->n_threads; return kcpp_data->n_threads;

View file

@ -292,7 +292,6 @@ def pick_existant_file(ntoption,nonntoption):
lib_default = pick_existant_file("koboldcpp_default.dll","koboldcpp_default.so") lib_default = pick_existant_file("koboldcpp_default.dll","koboldcpp_default.so")
lib_failsafe = pick_existant_file("koboldcpp_failsafe.dll","koboldcpp_failsafe.so") lib_failsafe = pick_existant_file("koboldcpp_failsafe.dll","koboldcpp_failsafe.so")
lib_openblas = pick_existant_file("koboldcpp_openblas.dll","koboldcpp_openblas.so")
lib_noavx2 = pick_existant_file("koboldcpp_noavx2.dll","koboldcpp_noavx2.so") lib_noavx2 = pick_existant_file("koboldcpp_noavx2.dll","koboldcpp_noavx2.so")
lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so") lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so")
lib_clblast_noavx2 = pick_existant_file("koboldcpp_clblast_noavx2.dll","koboldcpp_clblast_noavx2.so") lib_clblast_noavx2 = pick_existant_file("koboldcpp_clblast_noavx2.dll","koboldcpp_clblast_noavx2.so")
@ -302,25 +301,23 @@ lib_vulkan = pick_existant_file("koboldcpp_vulkan.dll","koboldcpp_vulkan.so")
lib_vulkan_noavx2 = pick_existant_file("koboldcpp_vulkan_noavx2.dll","koboldcpp_vulkan_noavx2.so") lib_vulkan_noavx2 = pick_existant_file("koboldcpp_vulkan_noavx2.dll","koboldcpp_vulkan_noavx2.so")
libname = "" libname = ""
lib_option_pairs = [ lib_option_pairs = [
(lib_openblas, "Use OpenBLAS"), (lib_default, "Use CPU"),
(lib_default, "Use No BLAS"),
(lib_clblast, "Use CLBlast"), (lib_clblast, "Use CLBlast"),
(lib_cublas, "Use CuBLAS"), (lib_cublas, "Use CuBLAS"),
(lib_hipblas, "Use hipBLAS (ROCm)"), (lib_hipblas, "Use hipBLAS (ROCm)"),
(lib_vulkan, "Use Vulkan"), (lib_vulkan, "Use Vulkan"),
(lib_noavx2, "NoAVX2 Mode (Old CPU)"), (lib_noavx2, "Use CPU (Old CPU)"),
(lib_clblast_noavx2, "CLBlast NoAVX2 (Old CPU)"), (lib_clblast_noavx2, "Use CLBlast (Old CPU)"),
(lib_vulkan_noavx2, "Vulkan NoAVX2 (Old CPU)"), (lib_vulkan_noavx2, "Use Vulkan (Old CPU)"),
(lib_failsafe, "Failsafe Mode (Old CPU)")] (lib_failsafe, "Failsafe Mode (Old CPU)")]
openblas_option, default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs) default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)] runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
def init_library(): def init_library():
global handle, args, libname global handle, args, libname
global lib_default,lib_failsafe,lib_openblas,lib_noavx2,lib_clblast,lib_clblast_noavx2,lib_cublas,lib_hipblas,lib_vulkan,lib_vulkan_noavx2 global lib_default,lib_failsafe,lib_noavx2,lib_clblast,lib_clblast_noavx2,lib_cublas,lib_hipblas,lib_vulkan,lib_vulkan_noavx2
libname = "" libname = ""
use_openblas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir.
use_clblast = False #uses CLBlast instead use_clblast = False #uses CLBlast instead
use_cublas = False #uses cublas instead use_cublas = False #uses cublas instead
use_hipblas = False #uses hipblas instead use_hipblas = False #uses hipblas instead
@ -373,15 +370,7 @@ def init_library():
print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.") print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.")
use_clblast = True use_clblast = True
else: else:
if not file_exists(lib_openblas) or (os.name=='nt' and not file_exists("libopenblas.dll")): print("Attempting to use Non-BLAS library.")
print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.")
elif args.noblas:
print("Attempting to library without OpenBLAS.")
else:
use_openblas = True
print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas will be required.")
if sys.platform=="darwin":
print("Mac OSX note: Some people have found Accelerate actually faster than OpenBLAS. To compare, run Koboldcpp with --noblas instead.")
if use_noavx2: if use_noavx2:
if use_failsafe: if use_failsafe:
@ -399,8 +388,6 @@ def init_library():
libname = lib_cublas libname = lib_cublas
elif use_hipblas: elif use_hipblas:
libname = lib_hipblas libname = lib_hipblas
elif use_openblas:
libname = lib_openblas
elif use_vulkan: elif use_vulkan:
libname = lib_vulkan libname = lib_vulkan
else: else:
@ -712,35 +699,6 @@ def autoset_gpu_layers(ctxsize,sdquanted,bbs): #shitty algo to determine how man
def fetch_gpu_properties(testCL,testCU,testVK): def fetch_gpu_properties(testCL,testCU,testVK):
import subprocess import subprocess
time.sleep(0.1)
if testCL:
try: # Get OpenCL GPU names on windows using a special binary. overwrite at known index if found.
basepath = os.path.abspath(os.path.dirname(__file__))
output = ""
data = None
try:
output = subprocess.run(["clinfo","--json"], capture_output=True, text=True, check=True, encoding='utf-8').stdout
data = json.loads(output)
except Exception as e1:
output = subprocess.run([((os.path.join(basepath, "winclinfo.exe")) if os.name == 'nt' else "clinfo"),"--json"], capture_output=True, text=True, check=True, creationflags=subprocess.CREATE_NO_WINDOW | subprocess.DETACHED_PROCESS, encoding='utf-8').stdout
data = json.loads(output)
plat = 0
dev = 0
lowestclmem = 0
for platform in data["devices"]:
dev = 0
for device in platform["online"]:
dname = device["CL_DEVICE_NAME"]
dmem = int(device["CL_DEVICE_GLOBAL_MEM_SIZE"])
idx = plat+dev*2
if idx<len(CLDevices):
CLDevicesNames[idx] = dname
lowestclmem = dmem if lowestclmem==0 else (dmem if dmem<lowestclmem else lowestclmem)
dev += 1
plat += 1
MaxMemory[0] = lowestclmem
except Exception as e:
pass
if testCU: if testCU:
FetchedCUdevices = [] FetchedCUdevices = []
@ -804,20 +762,54 @@ def fetch_gpu_properties(testCL,testCU,testVK):
idx += 1 idx += 1
except Exception as e: except Exception as e:
pass pass
if testCL:
try: # Get OpenCL GPU names on windows using a special binary. overwrite at known index if found.
basepath = os.path.abspath(os.path.dirname(__file__))
output = ""
data = None
try:
output = subprocess.run(["clinfo","--json"], capture_output=True, text=True, check=True, encoding='utf-8').stdout
data = json.loads(output)
except Exception as e1:
output = subprocess.run([((os.path.join(basepath, "winclinfo.exe")) if os.name == 'nt' else "clinfo"),"--json"], capture_output=True, text=True, check=True, creationflags=subprocess.CREATE_NO_WINDOW | subprocess.DETACHED_PROCESS, encoding='utf-8').stdout
data = json.loads(output)
plat = 0
dev = 0
lowestclmem = 0
for platform in data["devices"]:
dev = 0
for device in platform["online"]:
dname = device["CL_DEVICE_NAME"]
dmem = int(device["CL_DEVICE_GLOBAL_MEM_SIZE"])
idx = plat+dev*2
if idx<len(CLDevices):
CLDevicesNames[idx] = dname
lowestclmem = dmem if lowestclmem==0 else (dmem if dmem<lowestclmem else lowestclmem)
dev += 1
plat += 1
MaxMemory[0] = max(lowestclmem,MaxMemory[0])
except Exception as e:
pass
return return
def auto_set_backend_cli(): def auto_set_backend_cli():
fetch_gpu_properties(False,True,True) fetch_gpu_properties(False,True,True)
found_new_backend = False
if exitcounter < 100 and MaxMemory[0]>3500000000 and (("Use CuBLAS" in runopts and CUDevicesNames[0]!="") or "Use hipBLAS (ROCm)" in runopts) and any(CUDevicesNames): if exitcounter < 100 and MaxMemory[0]>3500000000 and (("Use CuBLAS" in runopts and CUDevicesNames[0]!="") or "Use hipBLAS (ROCm)" in runopts) and any(CUDevicesNames):
if "Use CuBLAS" in runopts or "Use hipBLAS (ROCm)" in runopts: if "Use CuBLAS" in runopts or "Use hipBLAS (ROCm)" in runopts:
args.usecublas = ["normal","mmq"] args.usecublas = ["normal","mmq"]
print("Auto Selected CUDA Backend...\n") print("Auto Selected CUDA Backend...\n")
found_new_backend = True
elif exitcounter < 100 and (1 in VKIsDGPU) and "Use Vulkan" in runopts: elif exitcounter < 100 and (1 in VKIsDGPU) and "Use Vulkan" in runopts:
for i in range(0,len(VKIsDGPU)): for i in range(0,len(VKIsDGPU)):
if VKIsDGPU[i]==1: if VKIsDGPU[i]==1:
args.usevulkan = [] args.usevulkan = []
print("Auto Selected Vulkan Backend...\n") print("Auto Selected Vulkan Backend...\n")
found_new_backend = True
break break
if not found_new_backend:
print("No GPU Backend found...\n")
def load_model(model_filename): def load_model(model_filename):
global args global args
@ -2177,7 +2169,7 @@ def RunServerMultiThreaded(addr, port):
finally: finally:
exitcounter = 999 exitcounter = 999
self.httpd.server_close() self.httpd.server_close()
sys.exit(0) os._exit(0)
def stop(self): def stop(self):
global exitcounter global exitcounter
exitcounter = 999 exitcounter = 999
@ -2337,7 +2329,7 @@ def show_gui():
if not any(runopts): if not any(runopts):
exitcounter = 999 exitcounter = 999
exit_with_error(2,"KoboldCPP couldn't locate any backends to use (i.e Default, OpenBLAS, CLBlast, CuBLAS).\n\nTo use the program, please run the 'make' command from the directory.","No Backends Available!") exit_with_error(2,"KoboldCPP couldn't locate any backends to use (i.e Default, Vulkan, CLBlast, CuBLAS).\n\nTo use the program, please run the 'make' command from the directory.","No Backends Available!")
# Vars - should be in scope to be used by multiple widgets # Vars - should be in scope to be used by multiple widgets
gpulayers_var = ctk.StringVar(value="-1") gpulayers_var = ctk.StringVar(value="-1")
@ -2530,8 +2522,8 @@ def show_gui():
def setup_backend_tooltip(parent): def setup_backend_tooltip(parent):
# backend count label with the tooltip function # backend count label with the tooltip function
nl = '\n' nl = '\n'
tooltxt = f"Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) != 6 else "") tooltxt = f"Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "")
num_backends_built = makelabel(parent, str(len(runopts)) + f"/9", 5, 2,tooltxt) num_backends_built = makelabel(parent, str(len(runopts)) + f"/8", 5, 2,tooltxt)
num_backends_built.grid(row=1, column=1, padx=195, pady=0) num_backends_built.grid(row=1, column=1, padx=195, pady=0)
num_backends_built.configure(text_color="#00ff00") num_backends_built.configure(text_color="#00ff00")
@ -2550,7 +2542,7 @@ def show_gui():
predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())])) predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())]))
max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "") max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "")
index = runopts_var.get() index = runopts_var.get()
gpu_be = (index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)") gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)")
layercounter_label.grid(row=6, column=1, padx=75, sticky="W") layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W") quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
if sys.platform=="darwin" and gpulayers_var.get()=="-1": if sys.platform=="darwin" and gpulayers_var.get()=="-1":
@ -2578,10 +2570,10 @@ def show_gui():
try: try:
s = int(gpu_choice_var.get())-1 s = int(gpu_choice_var.get())-1
v = runopts_var.get() v = runopts_var.get()
if v == "Use Vulkan" or v == "Vulkan NoAVX2 (Old CPU)": if v == "Use Vulkan" or v == "Use Vulkan (Old CPU)":
quick_gpuname_label.configure(text=VKDevicesNames[s]) quick_gpuname_label.configure(text=VKDevicesNames[s])
gpuname_label.configure(text=VKDevicesNames[s]) gpuname_label.configure(text=VKDevicesNames[s])
elif v == "Use CLBlast" or v == "CLBlast NoAVX2 (Old CPU)": elif v == "Use CLBlast" or v == "Use CLBlast (Old CPU)":
quick_gpuname_label.configure(text=CLDevicesNames[s]) quick_gpuname_label.configure(text=CLDevicesNames[s])
gpuname_label.configure(text=CLDevicesNames[s]) gpuname_label.configure(text=CLDevicesNames[s])
else: else:
@ -2631,19 +2623,19 @@ def show_gui():
global runmode_untouched global runmode_untouched
runmode_untouched = False runmode_untouched = False
index = runopts_var.get() index = runopts_var.get()
if index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W") quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
gpuname_label.grid(row=3, column=1, padx=75, sticky="W") gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
if index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)": if index == "Use CLBlast" or index == "Use CLBlast (Old CPU)":
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
CUDA_gpu_selector_box.grid_remove() CUDA_gpu_selector_box.grid_remove()
CUDA_quick_gpu_selector_box.grid_remove() CUDA_quick_gpu_selector_box.grid_remove()
if gpu_choice_var.get()=="All": if gpu_choice_var.get()=="All":
gpu_choice_var.set("1") gpu_choice_var.set("1")
elif index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": elif index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
gpu_selector_box.grid_remove() gpu_selector_box.grid_remove()
quick_gpu_selector_box.grid_remove() quick_gpu_selector_box.grid_remove()
CUDA_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") CUDA_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
@ -2677,7 +2669,7 @@ def show_gui():
tensor_split_label.grid(row=8, column=0, padx = 8, pady=1, stick="nw") tensor_split_label.grid(row=8, column=0, padx = 8, pady=1, stick="nw")
tensor_split_entry.grid(row=8, column=1, padx=8, pady=1, stick="nw") tensor_split_entry.grid(row=8, column=1, padx=8, pady=1, stick="nw")
if index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw") gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
gpu_layers_entry.grid(row=6, column=1, padx=8, pady=1, stick="nw") gpu_layers_entry.grid(row=6, column=1, padx=8, pady=1, stick="nw")
quick_gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw") quick_gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
@ -2697,7 +2689,7 @@ def show_gui():
# presets selector # presets selector
makelabel(quick_tab, "Presets:", 1,0,"Select a backend to use.\nOpenBLAS and NoBLAS runs purely on CPU only.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nCLBlast works on all GPUs but is somewhat slower.\nNoAVX2 and Failsafe modes support older PCs.") makelabel(quick_tab, "Presets:", 1,0,"Select a backend to use.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nVulkan and CLBlast works on all GPUs but is somewhat slower.\nOtherwise, runs on CPU only.\nNoAVX2 and Failsafe modes support older PCs.")
runoptbox = ctk.CTkComboBox(quick_tab, values=runopts, width=180,variable=runopts_var, state="readonly") runoptbox = ctk.CTkComboBox(quick_tab, values=runopts, width=180,variable=runopts_var, state="readonly")
runoptbox.grid(row=1, column=1,padx=8, stick="nw") runoptbox.grid(row=1, column=1,padx=8, stick="nw")
@ -2743,7 +2735,7 @@ def show_gui():
hardware_tab = tabcontent["Hardware"] hardware_tab = tabcontent["Hardware"]
# presets selector # presets selector
makelabel(hardware_tab, "Presets:", 1,0,"Select a backend to use.\nOpenBLAS and NoBLAS runs purely on CPU only.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nCLBlast works on all GPUs but is somewhat slower.\nNoAVX2 and Failsafe modes support older PCs.") makelabel(hardware_tab, "Presets:", 1,0,"Select a backend to use.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nVulkan and CLBlast works on all GPUs but is somewhat slower.\nOtherwise, runs on CPU only.\nNoAVX2 and Failsafe modes support older PCs.")
runoptbox = ctk.CTkComboBox(hardware_tab, values=runopts, width=180,variable=runopts_var, state="readonly") runoptbox = ctk.CTkComboBox(hardware_tab, values=runopts, width=180,variable=runopts_var, state="readonly")
runoptbox.grid(row=1, column=1,padx=8, stick="nw") runoptbox.grid(row=1, column=1,padx=8, stick="nw")
runoptbox.set(runopts[0]) # Set to first available option runoptbox.set(runopts[0]) # Set to first available option
@ -3011,9 +3003,9 @@ def show_gui():
gpuchoiceidx = 0 gpuchoiceidx = 0
if gpu_choice_var.get()!="All": if gpu_choice_var.get()!="All":
gpuchoiceidx = int(gpu_choice_var.get())-1 gpuchoiceidx = int(gpu_choice_var.get())-1
if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "CLBlast NoAVX2 (Old CPU)": if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Old CPU)":
args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx] args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx]
if runopts_var.get() == "CLBlast NoAVX2 (Old CPU)": if runopts_var.get() == "CUse CLBlast (Old CPU)":
args.noavx2 = True args.noavx2 = True
if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)": if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)":
if gpu_choice_var.get()=="All": if gpu_choice_var.get()=="All":
@ -3024,18 +3016,18 @@ def show_gui():
args.usecublas.append("mmq") args.usecublas.append("mmq")
if rowsplit_var.get()==1: if rowsplit_var.get()==1:
args.usecublas.append("rowsplit") args.usecublas.append("rowsplit")
if runopts_var.get() == "Use Vulkan" or runopts_var.get() == "Vulkan NoAVX2 (Old CPU)": if runopts_var.get() == "Use Vulkan" or runopts_var.get() == "Use Vulkan (Old CPU)":
if gpu_choice_var.get()=="All": if gpu_choice_var.get()=="All":
args.usevulkan = [] args.usevulkan = []
else: else:
args.usevulkan = [int(gpuchoiceidx)] args.usevulkan = [int(gpuchoiceidx)]
if runopts_var.get() == "Vulkan NoAVX2 (Old CPU)": if runopts_var.get() == "Use Vulkan (Old CPU)":
args.noavx2 = True args.noavx2 = True
if gpulayers_var.get(): if gpulayers_var.get():
args.gpulayers = int(gpulayers_var.get()) args.gpulayers = int(gpulayers_var.get())
if runopts_var.get()=="Use No BLAS": if runopts_var.get()=="Use CPU":
args.noblas = True args.noblas = True
if runopts_var.get()=="NoAVX2 Mode (Old CPU)": if runopts_var.get()=="Use CPU (Old CPU)":
args.noavx2 = True args.noavx2 = True
if runopts_var.get()=="Failsafe Mode (Old CPU)": if runopts_var.get()=="Failsafe Mode (Old CPU)":
args.noavx2 = True args.noavx2 = True
@ -3193,8 +3185,6 @@ def show_gui():
elif "noblas" in dict and dict["noblas"]: elif "noblas" in dict and dict["noblas"]:
if default_option is not None: if default_option is not None:
runopts_var.set(default_option) runopts_var.set(default_option)
elif openblas_option is not None:
runopts_var.set(openblas_option)
if "gpulayers" in dict and dict["gpulayers"]: if "gpulayers" in dict and dict["gpulayers"]:
gpulayers_var.set(dict["gpulayers"]) gpulayers_var.set(dict["gpulayers"])
else: else:
@ -4019,34 +4009,31 @@ def main(launch_args,start_server=True):
nocertify = True nocertify = True
if args.gpulayers: if args.gpulayers:
global libname, lib_default, lib_openblas, lib_failsafe, lib_noavx2
nogood = [lib_default,lib_openblas,lib_failsafe,lib_noavx2]
shouldavoidgpu = False shouldavoidgpu = False
if libname in nogood and sys.platform!="darwin": if args.noblas and sys.platform!="darwin":
shouldavoidgpu = True shouldavoidgpu = True
if args.gpulayers>0: if args.gpulayers and args.gpulayers>0:
if shouldavoidgpu: print("WARNING: GPU layers is set, but a GPU backend was not selected! GPU will not be used!")
print("WARNING: GPU layers is set, but a GPU backend was not selected!") args.gpulayers = 0
pass
elif args.gpulayers==-1 and sys.platform=="darwin" and args.model_param and os.path.exists(args.model_param): elif args.gpulayers==-1 and sys.platform=="darwin" and args.model_param and os.path.exists(args.model_param):
print(f"MacOS detected: Auto GPU layers set to maximum") print(f"MacOS detected: Auto GPU layers set to maximum")
args.gpulayers = 200 args.gpulayers = 200
elif args.gpulayers==-1 and not shouldavoidgpu and args.model_param and os.path.exists(args.model_param): elif not shouldavoidgpu and args.model_param and os.path.exists(args.model_param):
if not args.usecublas and not args.usevulkan and not args.useclblast: if not args.usecublas and (args.usevulkan is None) and not args.useclblast:
print("NOTE: Auto GPU layers was set without picking a GPU backend! Trying to assign one for you automatically...") print("No GPU or CPU backend was selected. Trying to assign one for you automatically...")
auto_set_backend_cli() auto_set_backend_cli()
print("Trying to automatically determine GPU layers...")
if MaxMemory[0] == 0: #try to get gpu vram for cuda if not picked yet if MaxMemory[0] == 0: #try to get gpu vram for cuda if not picked yet
fetch_gpu_properties(False,True,True) fetch_gpu_properties(False,True,True)
pass pass
if MaxMemory[0] > 0: if args.gpulayers==-1:
extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj) if MaxMemory[0] > 0 and (not args.noblas) and (args.usecublas or (args.usevulkan is not None) or args.useclblast or sys.platform=="darwin"):
layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.blasbatchsize) extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj)
print(f"Auto Recommended Layers: {layeramt}") layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.blasbatchsize)
args.gpulayers = layeramt print(f"Auto Recommended GPU Layers: {layeramt}")
else: args.gpulayers = layeramt
print(f"Could not automatically determine layers. Please set it manually.") else:
args.gpulayers = 0 print(f"No GPU backend found, or could not automatically determine GPU layers. Please set it manually.")
args.gpulayers = 0
if args.threads == -1: if args.threads == -1:
args.threads = get_default_threads() args.threads = get_default_threads()
@ -4398,9 +4385,9 @@ if __name__ == '__main__':
compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs. For hipBLAS binaries, please check YellowRoseCx rocm fork.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq] [rowsplit]'), choices=['normal', 'lowvram', '0', '1', '2', '3', 'mmq', 'rowsplit']) compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs. For hipBLAS binaries, please check YellowRoseCx rocm fork.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq] [rowsplit]'), choices=['normal', 'lowvram', '0', '1', '2', '3', 'mmq', 'rowsplit'])
compatgroup.add_argument("--usevulkan", help="Use Vulkan for GPU Acceleration. Can optionally specify GPU Device ID (e.g. --usevulkan 0).", metavar=('[Device ID]'), nargs='*', type=int, default=None) compatgroup.add_argument("--usevulkan", help="Use Vulkan for GPU Acceleration. Can optionally specify GPU Device ID (e.g. --usevulkan 0).", metavar=('[Device ID]'), nargs='*', type=int, default=None)
compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2) compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
compatgroup.add_argument("--noblas", help="Do not use any accelerated prompt ingestion", action='store_true') compatgroup.add_argument("--noblas", help="Do not use any GPU acceleration (CPU Only)", action='store_true')
parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 4096). Supported values are [256,512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768,49152,65536,98304,131072]. IF YOU USE ANYTHING ELSE YOU ARE ON YOUR OWN.",metavar=('[256,512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768,49152,65536,98304,131072]'), type=check_range(int,256,262144), default=4096) parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 4096). Supported values are [256,512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768,49152,65536,98304,131072]. IF YOU USE ANYTHING ELSE YOU ARE ON YOUR OWN.",metavar=('[256,512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768,49152,65536,98304,131072]'), type=check_range(int,256,262144), default=4096)
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU. Set to -1 to try autodetect (experimental)",metavar=('[GPU layers]'), nargs='?', const=1, type=int, default=0) parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU. Set to -1 to try autodetect, set to 0 to disable GPU offload.",metavar=('[GPU layers]'), nargs='?', const=1, type=int, default=-1)
parser.add_argument("--tensor_split", help="For CUDA and Vulkan only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+') parser.add_argument("--tensor_split", help="For CUDA and Vulkan only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+')
#more advanced params #more advanced params

View file

@ -25,7 +25,7 @@ if [ -n "$NOAVX2" ]; then
LLAMA_NOAVX2_FLAG="LLAMA_NOAVX2=1" LLAMA_NOAVX2_FLAG="LLAMA_NOAVX2=1"
fi fi
bin/micromamba run -r conda -p conda/envs/linux make -j$(nproc) LLAMA_VULKAN=1 LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 LLAMA_ADD_CONDA_PATHS=1 $LLAMA_NOAVX2_FLAG bin/micromamba run -r conda -p conda/envs/linux make -j$(nproc) LLAMA_VULKAN=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 LLAMA_ADD_CONDA_PATHS=1 $LLAMA_NOAVX2_FLAG
if [[ $1 == "rebuild" ]]; then if [[ $1 == "rebuild" ]]; then
echo Rebuild complete, you can now try to launch Koboldcpp. echo Rebuild complete, you can now try to launch Koboldcpp.
@ -34,8 +34,8 @@ elif [[ $1 == "dist" ]]; then
if [ -n "$NOAVX2" ]; then if [ -n "$NOAVX2" ]; then
bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND" bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND"
else else
bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_openblas.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND" bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND"
bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_openblas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --clean --console koboldcpp.py -n "koboldcpp-linux-x64-nocuda$KCPP_APPEND" bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --clean --console koboldcpp.py -n "koboldcpp-linux-x64-nocuda$KCPP_APPEND"
fi fi
bin/micromamba install --no-rc -r conda -p conda/envs/linux ocl-icd -c conda-forge -y bin/micromamba install --no-rc -r conda -p conda/envs/linux ocl-icd -c conda-forge -y
else else

Binary file not shown.

Binary file not shown.

View file

@ -1 +1 @@
PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./niko.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp_nocuda.exe" PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./niko.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp_nocuda.exe"

View file

@ -9,7 +9,6 @@ pyinstaller --noconfirm --onefile --clean --console --collect-all customtkinter
--add-data "./taesd.embd:." \ --add-data "./taesd.embd:." \
--add-data "./taesd_xl.embd:." \ --add-data "./taesd_xl.embd:." \
--add-data "./koboldcpp_default.so:." \ --add-data "./koboldcpp_default.so:." \
--add-data "./koboldcpp_openblas.so:." \
--add-data "./koboldcpp_failsafe.so:." \ --add-data "./koboldcpp_failsafe.so:." \
--add-data "./koboldcpp_noavx2.so:." \ --add-data "./koboldcpp_noavx2.so:." \
--add-data "./koboldcpp_clblast.so:." \ --add-data "./koboldcpp_clblast.so:." \

View file

@ -1 +1 @@
PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe" PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe"

View file

@ -1 +1 @@
PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_12.dll;." --add-data "./cublasLt64_12.dll;." --add-data "./cudart64_12.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp_cu12.exe" PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_12.dll;." --add-data "./cublasLt64_12.dll;." --add-data "./cudart64_12.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp_cu12.exe"