diff --git a/.gitignore b/.gitignore index 4d4f4336e..18148200b 100644 --- a/.gitignore +++ b/.gitignore @@ -107,7 +107,6 @@ tests/test-tokenizer-1-bpe /koboldcpp_default.so /koboldcpp_failsafe.so -/koboldcpp_openblas.so /koboldcpp_noavx2.so /koboldcpp_clblast.so /koboldcpp_clblast_noavx2.so @@ -116,7 +115,6 @@ tests/test-tokenizer-1-bpe /koboldcpp_vulkan_noavx2.so /koboldcpp_default.dll /koboldcpp_failsafe.dll -/koboldcpp_openblas.dll /koboldcpp_noavx2.dll /koboldcpp_clblast.dll /koboldcpp_clblast_noavx2.dll diff --git a/Makefile b/Makefile index 7139cce6c..7d1361087 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ # Add custom options to Makefile.local rather than editing this file. -include $(abspath $(lastword ${MAKEFILE_LIST})).local -default: koboldcpp_default koboldcpp_failsafe koboldcpp_openblas koboldcpp_noavx2 koboldcpp_clblast koboldcpp_clblast_noavx2 koboldcpp_cublas koboldcpp_hipblas koboldcpp_vulkan koboldcpp_vulkan_noavx2 +default: koboldcpp_default koboldcpp_failsafe koboldcpp_noavx2 koboldcpp_clblast koboldcpp_clblast_noavx2 koboldcpp_cublas koboldcpp_hipblas koboldcpp_vulkan koboldcpp_vulkan_noavx2 tools: quantize_gpt2 quantize_gptj quantize_gguf quantize_neox quantize_mpt quantize_clip whispermain sdmain gguf-split -dev: koboldcpp_openblas +dev: koboldcpp_default dev2: koboldcpp_clblast dev3: koboldcpp_vulkan @@ -75,7 +75,6 @@ SIMPLECFLAGS = FULLCFLAGS = NONECFLAGS = -OPENBLAS_FLAGS = -DGGML_USE_OPENBLAS -DGGML_USE_BLAS -I/usr/local/include/openblas CLBLAST_FLAGS = -DGGML_USE_CLBLAST FAILSAFE_FLAGS = -DUSE_FAILSAFE VULKAN_FLAGS = -DGGML_USE_VULKAN -DSD_USE_VULKAN @@ -336,7 +335,6 @@ endif DEFAULT_BUILD = FAILSAFE_BUILD = -OPENBLAS_BUILD = NOAVX2_BUILD = CLBLAST_BUILD = CUBLAS_BUILD = @@ -346,7 +344,6 @@ VULKAN_BUILD = ifeq ($(OS),Windows_NT) DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) - OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS) NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS) VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ lib/vulkan-1.lib -shared -o $@.dll $(LDFLAGS) @@ -364,14 +361,11 @@ else NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS) endif - ifdef LLAMA_OPENBLAS - OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) - endif ifdef LLAMA_CLBLAST ifeq ($(UNAME_S),Darwin) - CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) + CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS) else - CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) + CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS) endif endif ifdef LLAMA_CUBLAS @@ -384,13 +378,11 @@ else VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ -lvulkan -shared -o $@.so $(LDFLAGS) endif - ifndef LLAMA_OPENBLAS ifndef LLAMA_CLBLAST ifndef LLAMA_CUBLAS ifndef LLAMA_HIPBLAS ifndef LLAMA_VULKAN - OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.' - endif + VULKAN_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_VULKAN=1 to compile with Vulkan support. This is just a reminder, not an error.' endif endif endif @@ -421,8 +413,6 @@ $(info ) ggml.o: ggml/src/ggml.c ggml/include/ggml.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@ -ggml_v4_openblas.o: ggml/src/ggml.c ggml/include/ggml.h - $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ ggml_v4_failsafe.o: ggml/src/ggml.c ggml/include/ggml.h $(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@ ggml_v4_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h @@ -480,15 +470,13 @@ llavaclip_cublas.o: examples/llava/clip.cpp examples/llava/clip.h llavaclip_vulkan.o: examples/llava/clip.cpp examples/llava/clip.h $(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@ -#this is only used for openblas and accelerate +#this is only used for accelerate ggml-blas.o: ggml/src/ggml-blas.cpp ggml/include/ggml-blas.h $(CXX) $(CXXFLAGS) -c $< -o $@ #version 3 libs ggml_v3.o: otherarch/ggml_v3.c otherarch/ggml_v3.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@ -ggml_v3_openblas.o: otherarch/ggml_v3.c otherarch/ggml_v3.h - $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ ggml_v3_failsafe.o: otherarch/ggml_v3.c otherarch/ggml_v3.h $(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@ ggml_v3_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h @@ -503,8 +491,6 @@ ggml_v3_clblast_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h #version 2 libs ggml_v2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@ -ggml_v2_openblas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h - $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ ggml_v2_failsafe.o: otherarch/ggml_v2.c otherarch/ggml_v2.h $(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@ ggml_v2_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h @@ -569,8 +555,6 @@ gpttype_adapter_failsafe.o: $(GPTTYPE_ADAPTER) $(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) -c $< -o $@ gpttype_adapter.o: $(GPTTYPE_ADAPTER) $(CXX) $(CXXFLAGS) -c $< -o $@ -gpttype_adapter_openblas.o: $(GPTTYPE_ADAPTER) - $(CXX) $(CXXFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ gpttype_adapter_clblast.o: $(GPTTYPE_ADAPTER) $(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ gpttype_adapter_cublas.o: $(GPTTYPE_ADAPTER) @@ -583,7 +567,7 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER) $(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) $(VULKAN_FLAGS) -c $< -o $@ clean: - rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe imatrix.exe gguf.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so + rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe imatrix.exe gguf.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so rm -vrf ggml/src/ggml-cuda/*.o rm -vrf ggml/src/ggml-cuda/template-instances/*.o @@ -612,14 +596,6 @@ vulkan-shaders-gen: ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp koboldcpp_default: ggml.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o gpttype_adapter.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FULL) $(OBJS) $(DEFAULT_BUILD) -ifdef OPENBLAS_BUILD -koboldcpp_openblas: ggml_v4_openblas.o ggml_v3_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o gpttype_adapter_openblas.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-blas.o $(OBJS_FULL) $(OBJS) - $(OPENBLAS_BUILD) -else -koboldcpp_openblas: - $(DONOTHING) -endif - ifdef FAILSAFE_BUILD koboldcpp_failsafe: ggml_v4_failsafe.o ggml_v3_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o gpttype_adapter_failsafe.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FAILSAFE) $(OBJS) $(FAILSAFE_BUILD) diff --git a/README.md b/README.md index 27f06685e..ad8a39b5d 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ KoboldCpp is an easy-to-use AI text-generation software for GGML and GGUF models - You can also run it using the command line. For info, please check `koboldcpp.exe --help` ## Linux Usage (Precompiled Binary, Recommended) -On modern Linux systems, you should download the `koboldcpp-linux-x64-cuda1150` prebuilt PyInstaller binary on the **[releases page](https://github.com/LostRuins/koboldcpp/releases/latest)**. Simply download and run the binary. +On modern Linux systems, you should download the `koboldcpp-linux-x64-cuda1150` prebuilt PyInstaller binary on the **[releases page](https://github.com/LostRuins/koboldcpp/releases/latest)**. Simply download and run the binary (You may have to `chmod +x` it first). Alternatively, you can also install koboldcpp to the current directory by running the following terminal command: ``` @@ -24,7 +24,8 @@ curl -fLo koboldcpp https://github.com/LostRuins/koboldcpp/releases/latest/downl After running this command you can launch Koboldcpp from the current directory using `./koboldcpp` in the terminal (for CLI usage, run with `--help`). ## MacOS (Precompiled Binary) -- PyInstaller binaries for Modern ARM64 MacOS (M1, M2, M3) are now available! **[Simply download and run the MacOS binary](https://github.com/LostRuins/koboldcpp/releases/latest)** +- PyInstaller binaries for Modern ARM64 MacOS (M1, M2, M3) are now available! **[Simply download the MacOS binary](https://github.com/LostRuins/koboldcpp/releases/latest)** +- In a MacOS terminal window, set the file to executable `chmod +x koboldcpp-mac-arm64` and run it with `./koboldcpp-mac-arm64`. - Alternatively, or for older x86 MacOS computers, you can clone the repo and compile from source code, see Compiling for MacOS below. ## Run on Colab @@ -70,13 +71,13 @@ when you can't use the precompiled binary directly, we provide an automated buil ### Compiling on Linux (Manual Method) - To compile your binaries from source, clone the repo with `git clone https://github.com/LostRuins/koboldcpp.git` - A makefile is provided, simply run `make`. -- Optional OpenBLAS: Link your own install of OpenBLAS manually with `make LLAMA_OPENBLAS=1` +- Optional Vulkan: Link your own install of Vulkan SDK manually with `make LLAMA_VULKAN=1` - Optional CLBlast: Link your own install of CLBlast manually with `make LLAMA_CLBLAST=1` - Note: for these you will need to obtain and link OpenCL and CLBlast libraries. - - For Arch Linux: Install `cblas` `openblas` and `clblast`. - - For Debian: Install `libclblast-dev` and `libopenblas-dev`. + - For Arch Linux: Install `cblas` and `clblast`. + - For Debian: Install `libclblast-dev`. - You can attempt a CuBLAS build with `LLAMA_CUBLAS=1`, (or `LLAMA_HIPBLAS=1` for AMD). You will need CUDA Toolkit installed. Some have also reported success with the CMake file, though that is more for windows. -- For a full featured build (all backends), do `make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_VULKAN=1`. (Note that `LLAMA_CUBLAS=1` will not work on windows, you need visual studio) +- For a full featured build (all backends), do `make LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_VULKAN=1`. (Note that `LLAMA_CUBLAS=1` will not work on windows, you need visual studio) - After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.gguf] [port]` ### Compiling on Windows @@ -87,12 +88,11 @@ when you can't use the precompiled binary directly, we provide an automated buil - If you want to generate the .exe file, make sure you have the python module PyInstaller installed with pip (`pip install PyInstaller`). Then run the script `make_pyinstaller.bat` - The koboldcpp.exe file will be at your dist folder. - **Building with CUDA**: Visual Studio, CMake and CUDA Toolkit is required. Clone the repo, then open the CMake file and compile it in Visual Studio. Copy the `koboldcpp_cublas.dll` generated into the same directory as the `koboldcpp.py` file. If you are bundling executables, you may need to include CUDA dynamic libraries (such as `cublasLt64_11.dll` and `cublas64_11.dll`) in order for the executable to work correctly on a different PC. -- **Replacing Libraries (Not Recommended)**: If you wish to use your own version of the additional Windows libraries (OpenCL, CLBlast and OpenBLAS), you can do it with: +- **Replacing Libraries (Not Recommended)**: If you wish to use your own version of the additional Windows libraries (OpenCL, CLBlast, Vulkan), you can do it with: - OpenCL - tested with https://github.com/KhronosGroup/OpenCL-SDK . If you wish to compile it, follow the repository instructions. You will need vcpkg. - CLBlast - tested with https://github.com/CNugteren/CLBlast . If you wish to compile it you will need to reference the OpenCL files. It will only generate the ".lib" file if you compile using MSVC. - - OpenBLAS - tested with https://github.com/xianyi/OpenBLAS . - Move the respectives .lib files to the /lib folder of your project, overwriting the older files. - - Also, replace the existing versions of the corresponding .dll files located in the project directory root (e.g. libopenblas.dll). + - Also, replace the existing versions of the corresponding .dll files located in the project directory root (e.g. clblast.dll). - Make the KoboldCpp project using the instructions above. ### Compiling on MacOS @@ -127,7 +127,7 @@ when you can't use the precompiled binary directly, we provide an automated buil - Metal is enabled by default on macOS, Vulkan support is enabled by default on both Linux and macOS, ROCm support isn't available yet. - You can also use `nix3-run` to use KoboldCpp: `nix run --expr ``with import { config = { allowUnfree = true; cudaSupport = true; }; }; koboldcpp`` --impure` - Or use `nix-shell`: `nix-shell --expr 'with import { config = { allowUnfree = true; cudaSupport = true; }; }; koboldcpp' --run "koboldcpp" --impure` - - Packages (like OpenBlast, CLBLast, Vulkan, etc.) can be overridden, please refer to the [17th Nix Pill - Nixpkgs Overriding Packages](https://nixos.org/guides/nix-pills/17-nixpkgs-overriding-packages) + - Packages (like CLBLast, Vulkan, etc.) can be overridden, please refer to the [17th Nix Pill - Nixpkgs Overriding Packages](https://nixos.org/guides/nix-pills/17-nixpkgs-overriding-packages) ## Questions and Help Wiki - **First, please check out [The KoboldCpp FAQ and Knowledgebase](https://github.com/LostRuins/koboldcpp/wiki) which may already have answers to your questions! Also please search through past issues and discussions.** @@ -141,13 +141,13 @@ when you can't use the precompiled binary directly, we provide an automated buil ## Considerations - For Windows: No installation, single file executable, (It Just Works) -- Since v1.0.6, requires libopenblas, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without BLAS. - Since v1.15, requires CLBlast if enabled, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without CLBlast. - Since v1.33, you can set the context size to be above what the model supports officially. It does increases perplexity but should still work well below 4096 even on untuned models. (For GPT-NeoX, GPT-J, and Llama models) Customize this with `--ropeconfig`. - Since v1.42, supports GGUF models for LLAMA and Falcon - Since v1.55, lcuda paths on Linux are hardcoded and may require manual changes to the makefile if you do not use koboldcpp.sh for the compilation. - Since v1.60, provides native image generation with StableDiffusion.cpp, you can load any SD1.5 or SDXL .safetensors model and it will provide an A1111 compatible API to use. - **I try to keep backwards compatibility with ALL past llama.cpp models**. But you are also encouraged to reconvert/update your models if possible for best results. +- Since v1.75, openblas has been deprecated and removed in favor of the native CPU implementation. ## License - The original GGML library and llama.cpp by ggerganov are licensed under the MIT License diff --git a/class.py b/class.py index 242c9cc63..9dc09741c 100644 --- a/class.py +++ b/class.py @@ -97,9 +97,9 @@ class model_backend(InferenceModel): "menu_path": "", "refresh_model_inputs": False, "extra_classes": "", - 'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use OpenBLAS', 'value': 1}, {'text': 'Use CuBLAS', 'value': 2}, - {'text': 'Use CLBLast GPU #1', 'value': 3},{'text': 'Use CLBLast GPU #2', 'value': 4},{'text': 'Use CLBLast GPU #3', 'value': 5} - ,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 6},{'text': 'Failsafe Mode (Old CPU)', 'value': 7},{'text': 'Use Vulkan GPU #1', 'value': 8},{'text': 'Use Vulkan GPU #2', 'value': 9}], + 'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use CuBLAS', 'value': 1}, + {'text': 'Use CLBLast GPU #1', 'value': 2},{'text': 'Use CLBLast GPU #2', 'value': 3},{'text': 'Use CLBLast GPU #3', 'value': 4} + ,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 5},{'text': 'Failsafe Mode (Old CPU)', 'value': 6},{'text': 'Use Vulkan GPU #1', 'value': 7},{'text': 'Use Vulkan GPU #2', 'value': 8}], }) requested_parameters.append({ "uitype": "text", @@ -238,24 +238,22 @@ class model_backend(InferenceModel): if accel==0: self.kcpp_noblas = True elif accel==1: - pass - elif accel==2: self.kcpp_usecublas = ["normal"] - elif accel==3: + elif accel==2: self.kcpp_useclblast = [0,0] - elif accel==4: + elif accel==3: self.kcpp_useclblast = [1,0] - elif accel==5: + elif accel==4: self.kcpp_useclblast = [0,1] - elif accel==6: + elif accel==5: self.kcpp_noavx2 = True - elif accel==7: + elif accel==6: self.kcpp_noavx2 = True self.kcpp_noblas = True self.kcpp_nommap = True - elif accel==8: + elif accel==7: self.kcpp_usevulkan = [0] - elif accel==9: + elif accel==8: self.kcpp_usevulkan = [1] pass diff --git a/environment.yaml b/environment.yaml index 8ffc0ba04..0134aabbe 100644 --- a/environment.yaml +++ b/environment.yaml @@ -11,8 +11,6 @@ dependencies: - gxx=10 - pip - git=2.35.1 - - libopenblas - - openblas - clblast - ninja - make diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index d6df0885b..7b295155e 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1624,7 +1624,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in gptj_ctx_v3.hparams.rope_freq_scale = neox_ctx_v3.hparams.rope_freq_scale = rope_freq_scale; gptj_ctx_v3.hparams.rope_freq_base = neox_ctx_v3.hparams.rope_freq_base = rope_freq_base; - //this is used for the mem_per_token eval, openblas needs more RAM + //this is used for the mem_per_token eval, blas needs more RAM bool v3_use_scratch = ggml_v3_cpu_has_gpublas(); int cu_parseinfo_maindevice = inputs.cublas_info<=0?0:inputs.cublas_info; @@ -2362,11 +2362,11 @@ int GetThreadsToUse(bool blasmode) { if(!ggml_cpu_has_gpublas()) { - return 1; + return std::min(kcpp_data->n_blasthreads, 4); } else { - return kcpp_data->n_blasthreads; + return kcpp_data->n_blasthreads; } } return kcpp_data->n_threads; diff --git a/koboldcpp.py b/koboldcpp.py index 8c3021270..30f3cfb53 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -292,7 +292,6 @@ def pick_existant_file(ntoption,nonntoption): lib_default = pick_existant_file("koboldcpp_default.dll","koboldcpp_default.so") lib_failsafe = pick_existant_file("koboldcpp_failsafe.dll","koboldcpp_failsafe.so") -lib_openblas = pick_existant_file("koboldcpp_openblas.dll","koboldcpp_openblas.so") lib_noavx2 = pick_existant_file("koboldcpp_noavx2.dll","koboldcpp_noavx2.so") lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so") lib_clblast_noavx2 = pick_existant_file("koboldcpp_clblast_noavx2.dll","koboldcpp_clblast_noavx2.so") @@ -302,25 +301,23 @@ lib_vulkan = pick_existant_file("koboldcpp_vulkan.dll","koboldcpp_vulkan.so") lib_vulkan_noavx2 = pick_existant_file("koboldcpp_vulkan_noavx2.dll","koboldcpp_vulkan_noavx2.so") libname = "" lib_option_pairs = [ - (lib_openblas, "Use OpenBLAS"), - (lib_default, "Use No BLAS"), + (lib_default, "Use CPU"), (lib_clblast, "Use CLBlast"), (lib_cublas, "Use CuBLAS"), (lib_hipblas, "Use hipBLAS (ROCm)"), (lib_vulkan, "Use Vulkan"), - (lib_noavx2, "NoAVX2 Mode (Old CPU)"), - (lib_clblast_noavx2, "CLBlast NoAVX2 (Old CPU)"), - (lib_vulkan_noavx2, "Vulkan NoAVX2 (Old CPU)"), + (lib_noavx2, "Use CPU (Old CPU)"), + (lib_clblast_noavx2, "Use CLBlast (Old CPU)"), + (lib_vulkan_noavx2, "Use Vulkan (Old CPU)"), (lib_failsafe, "Failsafe Mode (Old CPU)")] -openblas_option, default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs) +default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs) runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)] def init_library(): global handle, args, libname - global lib_default,lib_failsafe,lib_openblas,lib_noavx2,lib_clblast,lib_clblast_noavx2,lib_cublas,lib_hipblas,lib_vulkan,lib_vulkan_noavx2 + global lib_default,lib_failsafe,lib_noavx2,lib_clblast,lib_clblast_noavx2,lib_cublas,lib_hipblas,lib_vulkan,lib_vulkan_noavx2 libname = "" - use_openblas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir. use_clblast = False #uses CLBlast instead use_cublas = False #uses cublas instead use_hipblas = False #uses hipblas instead @@ -373,15 +370,7 @@ def init_library(): print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.") use_clblast = True else: - if not file_exists(lib_openblas) or (os.name=='nt' and not file_exists("libopenblas.dll")): - print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.") - elif args.noblas: - print("Attempting to library without OpenBLAS.") - else: - use_openblas = True - print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas will be required.") - if sys.platform=="darwin": - print("Mac OSX note: Some people have found Accelerate actually faster than OpenBLAS. To compare, run Koboldcpp with --noblas instead.") + print("Attempting to use Non-BLAS library.") if use_noavx2: if use_failsafe: @@ -399,8 +388,6 @@ def init_library(): libname = lib_cublas elif use_hipblas: libname = lib_hipblas - elif use_openblas: - libname = lib_openblas elif use_vulkan: libname = lib_vulkan else: @@ -712,35 +699,6 @@ def autoset_gpu_layers(ctxsize,sdquanted,bbs): #shitty algo to determine how man def fetch_gpu_properties(testCL,testCU,testVK): import subprocess - time.sleep(0.1) - if testCL: - try: # Get OpenCL GPU names on windows using a special binary. overwrite at known index if found. - basepath = os.path.abspath(os.path.dirname(__file__)) - output = "" - data = None - try: - output = subprocess.run(["clinfo","--json"], capture_output=True, text=True, check=True, encoding='utf-8').stdout - data = json.loads(output) - except Exception as e1: - output = subprocess.run([((os.path.join(basepath, "winclinfo.exe")) if os.name == 'nt' else "clinfo"),"--json"], capture_output=True, text=True, check=True, creationflags=subprocess.CREATE_NO_WINDOW | subprocess.DETACHED_PROCESS, encoding='utf-8').stdout - data = json.loads(output) - plat = 0 - dev = 0 - lowestclmem = 0 - for platform in data["devices"]: - dev = 0 - for device in platform["online"]: - dname = device["CL_DEVICE_NAME"] - dmem = int(device["CL_DEVICE_GLOBAL_MEM_SIZE"]) - idx = plat+dev*2 - if idx3500000000 and (("Use CuBLAS" in runopts and CUDevicesNames[0]!="") or "Use hipBLAS (ROCm)" in runopts) and any(CUDevicesNames): if "Use CuBLAS" in runopts or "Use hipBLAS (ROCm)" in runopts: args.usecublas = ["normal","mmq"] print("Auto Selected CUDA Backend...\n") + found_new_backend = True elif exitcounter < 100 and (1 in VKIsDGPU) and "Use Vulkan" in runopts: for i in range(0,len(VKIsDGPU)): if VKIsDGPU[i]==1: args.usevulkan = [] print("Auto Selected Vulkan Backend...\n") + found_new_backend = True break + if not found_new_backend: + print("No GPU Backend found...\n") def load_model(model_filename): global args @@ -2177,7 +2169,7 @@ def RunServerMultiThreaded(addr, port): finally: exitcounter = 999 self.httpd.server_close() - sys.exit(0) + os._exit(0) def stop(self): global exitcounter exitcounter = 999 @@ -2337,7 +2329,7 @@ def show_gui(): if not any(runopts): exitcounter = 999 - exit_with_error(2,"KoboldCPP couldn't locate any backends to use (i.e Default, OpenBLAS, CLBlast, CuBLAS).\n\nTo use the program, please run the 'make' command from the directory.","No Backends Available!") + exit_with_error(2,"KoboldCPP couldn't locate any backends to use (i.e Default, Vulkan, CLBlast, CuBLAS).\n\nTo use the program, please run the 'make' command from the directory.","No Backends Available!") # Vars - should be in scope to be used by multiple widgets gpulayers_var = ctk.StringVar(value="-1") @@ -2530,8 +2522,8 @@ def show_gui(): def setup_backend_tooltip(parent): # backend count label with the tooltip function nl = '\n' - tooltxt = f"Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) != 6 else "") - num_backends_built = makelabel(parent, str(len(runopts)) + f"/9", 5, 2,tooltxt) + tooltxt = f"Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "") + num_backends_built = makelabel(parent, str(len(runopts)) + f"/8", 5, 2,tooltxt) num_backends_built.grid(row=1, column=1, padx=195, pady=0) num_backends_built.configure(text_color="#00ff00") @@ -2550,7 +2542,7 @@ def show_gui(): predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())])) max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "") index = runopts_var.get() - gpu_be = (index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)") + gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)") layercounter_label.grid(row=6, column=1, padx=75, sticky="W") quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W") if sys.platform=="darwin" and gpulayers_var.get()=="-1": @@ -2578,10 +2570,10 @@ def show_gui(): try: s = int(gpu_choice_var.get())-1 v = runopts_var.get() - if v == "Use Vulkan" or v == "Vulkan NoAVX2 (Old CPU)": + if v == "Use Vulkan" or v == "Use Vulkan (Old CPU)": quick_gpuname_label.configure(text=VKDevicesNames[s]) gpuname_label.configure(text=VKDevicesNames[s]) - elif v == "Use CLBlast" or v == "CLBlast NoAVX2 (Old CPU)": + elif v == "Use CLBlast" or v == "Use CLBlast (Old CPU)": quick_gpuname_label.configure(text=CLDevicesNames[s]) gpuname_label.configure(text=CLDevicesNames[s]) else: @@ -2631,19 +2623,19 @@ def show_gui(): global runmode_untouched runmode_untouched = False index = runopts_var.get() - if index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": + if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W") gpuname_label.grid(row=3, column=1, padx=75, sticky="W") gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") - if index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)": + if index == "Use CLBlast" or index == "Use CLBlast (Old CPU)": gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") CUDA_gpu_selector_box.grid_remove() CUDA_quick_gpu_selector_box.grid_remove() if gpu_choice_var.get()=="All": gpu_choice_var.set("1") - elif index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": + elif index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": gpu_selector_box.grid_remove() quick_gpu_selector_box.grid_remove() CUDA_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") @@ -2677,7 +2669,7 @@ def show_gui(): tensor_split_label.grid(row=8, column=0, padx = 8, pady=1, stick="nw") tensor_split_entry.grid(row=8, column=1, padx=8, pady=1, stick="nw") - if index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": + if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw") gpu_layers_entry.grid(row=6, column=1, padx=8, pady=1, stick="nw") quick_gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw") @@ -2697,7 +2689,7 @@ def show_gui(): # presets selector - makelabel(quick_tab, "Presets:", 1,0,"Select a backend to use.\nOpenBLAS and NoBLAS runs purely on CPU only.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nCLBlast works on all GPUs but is somewhat slower.\nNoAVX2 and Failsafe modes support older PCs.") + makelabel(quick_tab, "Presets:", 1,0,"Select a backend to use.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nVulkan and CLBlast works on all GPUs but is somewhat slower.\nOtherwise, runs on CPU only.\nNoAVX2 and Failsafe modes support older PCs.") runoptbox = ctk.CTkComboBox(quick_tab, values=runopts, width=180,variable=runopts_var, state="readonly") runoptbox.grid(row=1, column=1,padx=8, stick="nw") @@ -2743,7 +2735,7 @@ def show_gui(): hardware_tab = tabcontent["Hardware"] # presets selector - makelabel(hardware_tab, "Presets:", 1,0,"Select a backend to use.\nOpenBLAS and NoBLAS runs purely on CPU only.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nCLBlast works on all GPUs but is somewhat slower.\nNoAVX2 and Failsafe modes support older PCs.") + makelabel(hardware_tab, "Presets:", 1,0,"Select a backend to use.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nVulkan and CLBlast works on all GPUs but is somewhat slower.\nOtherwise, runs on CPU only.\nNoAVX2 and Failsafe modes support older PCs.") runoptbox = ctk.CTkComboBox(hardware_tab, values=runopts, width=180,variable=runopts_var, state="readonly") runoptbox.grid(row=1, column=1,padx=8, stick="nw") runoptbox.set(runopts[0]) # Set to first available option @@ -3011,9 +3003,9 @@ def show_gui(): gpuchoiceidx = 0 if gpu_choice_var.get()!="All": gpuchoiceidx = int(gpu_choice_var.get())-1 - if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "CLBlast NoAVX2 (Old CPU)": + if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Old CPU)": args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx] - if runopts_var.get() == "CLBlast NoAVX2 (Old CPU)": + if runopts_var.get() == "CUse CLBlast (Old CPU)": args.noavx2 = True if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)": if gpu_choice_var.get()=="All": @@ -3024,18 +3016,18 @@ def show_gui(): args.usecublas.append("mmq") if rowsplit_var.get()==1: args.usecublas.append("rowsplit") - if runopts_var.get() == "Use Vulkan" or runopts_var.get() == "Vulkan NoAVX2 (Old CPU)": + if runopts_var.get() == "Use Vulkan" or runopts_var.get() == "Use Vulkan (Old CPU)": if gpu_choice_var.get()=="All": args.usevulkan = [] else: args.usevulkan = [int(gpuchoiceidx)] - if runopts_var.get() == "Vulkan NoAVX2 (Old CPU)": + if runopts_var.get() == "Use Vulkan (Old CPU)": args.noavx2 = True if gpulayers_var.get(): args.gpulayers = int(gpulayers_var.get()) - if runopts_var.get()=="Use No BLAS": + if runopts_var.get()=="Use CPU": args.noblas = True - if runopts_var.get()=="NoAVX2 Mode (Old CPU)": + if runopts_var.get()=="Use CPU (Old CPU)": args.noavx2 = True if runopts_var.get()=="Failsafe Mode (Old CPU)": args.noavx2 = True @@ -3193,8 +3185,6 @@ def show_gui(): elif "noblas" in dict and dict["noblas"]: if default_option is not None: runopts_var.set(default_option) - elif openblas_option is not None: - runopts_var.set(openblas_option) if "gpulayers" in dict and dict["gpulayers"]: gpulayers_var.set(dict["gpulayers"]) else: @@ -4019,34 +4009,31 @@ def main(launch_args,start_server=True): nocertify = True if args.gpulayers: - global libname, lib_default, lib_openblas, lib_failsafe, lib_noavx2 - nogood = [lib_default,lib_openblas,lib_failsafe,lib_noavx2] shouldavoidgpu = False - if libname in nogood and sys.platform!="darwin": + if args.noblas and sys.platform!="darwin": shouldavoidgpu = True - if args.gpulayers>0: - if shouldavoidgpu: - print("WARNING: GPU layers is set, but a GPU backend was not selected!") - pass + if args.gpulayers and args.gpulayers>0: + print("WARNING: GPU layers is set, but a GPU backend was not selected! GPU will not be used!") + args.gpulayers = 0 elif args.gpulayers==-1 and sys.platform=="darwin" and args.model_param and os.path.exists(args.model_param): print(f"MacOS detected: Auto GPU layers set to maximum") args.gpulayers = 200 - elif args.gpulayers==-1 and not shouldavoidgpu and args.model_param and os.path.exists(args.model_param): - if not args.usecublas and not args.usevulkan and not args.useclblast: - print("NOTE: Auto GPU layers was set without picking a GPU backend! Trying to assign one for you automatically...") + elif not shouldavoidgpu and args.model_param and os.path.exists(args.model_param): + if not args.usecublas and (args.usevulkan is None) and not args.useclblast: + print("No GPU or CPU backend was selected. Trying to assign one for you automatically...") auto_set_backend_cli() - print("Trying to automatically determine GPU layers...") if MaxMemory[0] == 0: #try to get gpu vram for cuda if not picked yet fetch_gpu_properties(False,True,True) pass - if MaxMemory[0] > 0: - extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj) - layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.blasbatchsize) - print(f"Auto Recommended Layers: {layeramt}") - args.gpulayers = layeramt - else: - print(f"Could not automatically determine layers. Please set it manually.") - args.gpulayers = 0 + if args.gpulayers==-1: + if MaxMemory[0] > 0 and (not args.noblas) and (args.usecublas or (args.usevulkan is not None) or args.useclblast or sys.platform=="darwin"): + extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj) + layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.blasbatchsize) + print(f"Auto Recommended GPU Layers: {layeramt}") + args.gpulayers = layeramt + else: + print(f"No GPU backend found, or could not automatically determine GPU layers. Please set it manually.") + args.gpulayers = 0 if args.threads == -1: args.threads = get_default_threads() @@ -4398,9 +4385,9 @@ if __name__ == '__main__': compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs. For hipBLAS binaries, please check YellowRoseCx rocm fork.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq] [rowsplit]'), choices=['normal', 'lowvram', '0', '1', '2', '3', 'mmq', 'rowsplit']) compatgroup.add_argument("--usevulkan", help="Use Vulkan for GPU Acceleration. Can optionally specify GPU Device ID (e.g. --usevulkan 0).", metavar=('[Device ID]'), nargs='*', type=int, default=None) compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2) - compatgroup.add_argument("--noblas", help="Do not use any accelerated prompt ingestion", action='store_true') + compatgroup.add_argument("--noblas", help="Do not use any GPU acceleration (CPU Only)", action='store_true') parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 4096). Supported values are [256,512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768,49152,65536,98304,131072]. IF YOU USE ANYTHING ELSE YOU ARE ON YOUR OWN.",metavar=('[256,512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768,49152,65536,98304,131072]'), type=check_range(int,256,262144), default=4096) - parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU. Set to -1 to try autodetect (experimental)",metavar=('[GPU layers]'), nargs='?', const=1, type=int, default=0) + parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU. Set to -1 to try autodetect, set to 0 to disable GPU offload.",metavar=('[GPU layers]'), nargs='?', const=1, type=int, default=-1) parser.add_argument("--tensor_split", help="For CUDA and Vulkan only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+') #more advanced params diff --git a/koboldcpp.sh b/koboldcpp.sh index 73006f493..0c38a812d 100755 --- a/koboldcpp.sh +++ b/koboldcpp.sh @@ -25,7 +25,7 @@ if [ -n "$NOAVX2" ]; then LLAMA_NOAVX2_FLAG="LLAMA_NOAVX2=1" fi -bin/micromamba run -r conda -p conda/envs/linux make -j$(nproc) LLAMA_VULKAN=1 LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 LLAMA_ADD_CONDA_PATHS=1 $LLAMA_NOAVX2_FLAG +bin/micromamba run -r conda -p conda/envs/linux make -j$(nproc) LLAMA_VULKAN=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 LLAMA_ADD_CONDA_PATHS=1 $LLAMA_NOAVX2_FLAG if [[ $1 == "rebuild" ]]; then echo Rebuild complete, you can now try to launch Koboldcpp. @@ -34,8 +34,8 @@ elif [[ $1 == "dist" ]]; then if [ -n "$NOAVX2" ]; then bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND" else - bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_openblas.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND" - bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_openblas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --clean --console koboldcpp.py -n "koboldcpp-linux-x64-nocuda$KCPP_APPEND" + bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND" + bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --clean --console koboldcpp.py -n "koboldcpp-linux-x64-nocuda$KCPP_APPEND" fi bin/micromamba install --no-rc -r conda -p conda/envs/linux ocl-icd -c conda-forge -y else diff --git a/lib/libopenblas.lib b/lib/libopenblas.lib deleted file mode 100644 index dbd3d882b..000000000 Binary files a/lib/libopenblas.lib and /dev/null differ diff --git a/libopenblas.dll b/libopenblas.dll deleted file mode 100644 index 7cb7e91d4..000000000 Binary files a/libopenblas.dll and /dev/null differ diff --git a/make_pyinstaller.bat b/make_pyinstaller.bat index 11a261fa9..8ef3379f5 100644 --- a/make_pyinstaller.bat +++ b/make_pyinstaller.bat @@ -1 +1 @@ -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./niko.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp_nocuda.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./niko.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp_nocuda.exe" \ No newline at end of file diff --git a/make_pyinstaller.sh b/make_pyinstaller.sh index 40e5a59cd..afd663f58 100644 --- a/make_pyinstaller.sh +++ b/make_pyinstaller.sh @@ -9,7 +9,6 @@ pyinstaller --noconfirm --onefile --clean --console --collect-all customtkinter --add-data "./taesd.embd:." \ --add-data "./taesd_xl.embd:." \ --add-data "./koboldcpp_default.so:." \ ---add-data "./koboldcpp_openblas.so:." \ --add-data "./koboldcpp_failsafe.so:." \ --add-data "./koboldcpp_noavx2.so:." \ --add-data "./koboldcpp_clblast.so:." \ diff --git a/make_pyinstaller_cuda.bat b/make_pyinstaller_cuda.bat index 1857c5e06..ae89fdd74 100644 --- a/make_pyinstaller_cuda.bat +++ b/make_pyinstaller_cuda.bat @@ -1 +1 @@ -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file diff --git a/make_pyinstaller_cuda12.bat b/make_pyinstaller_cuda12.bat index 7922b4c57..67305a7a9 100644 --- a/make_pyinstaller_cuda12.bat +++ b/make_pyinstaller_cuda12.bat @@ -1 +1 @@ -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_12.dll;." --add-data "./cublasLt64_12.dll;." --add-data "./cudart64_12.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp_cu12.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_12.dll;." --add-data "./cublasLt64_12.dll;." --add-data "./cudart64_12.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp_cu12.exe" \ No newline at end of file