added more compile flags to set apart the conda paths, and also for colab. updated readme for multitool

2025-09-11 01:24:36 +00:00 · 2024-01-21 17:38:33 +08:00 · 2024-01-21 17:38:33 +08:00 · 14de08586e
commit 14de08586e
parent 1cb8a5e955
5 changed files with 16 additions and 7 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,6 +1,5 @@
-# DO NOT USE THIS FILE.
-# IT'S ONLY FOR CUBLAS BUILD PURPOSES ON WINDOWS VISUAL STUDIO.
-# IT WILL NOT BE UPDATED OR MAINTAINED !!!
+# THIS FILE IS ONLY INTENDED CUBLAS BUILD PURPOSES ON WINDOWS VISUAL STUDIO.
+# YOU'RE NOT RECOMMENDED TO USE IT

 message(STATUS "============== ============== ==============")
 message(STATUS "WARNING! Recommend NOT to use this file. It is UNSUPPORTED for normal users. Use MAKE instead.")
--- a/13
+++ b/13
@ -138,19 +138,27 @@ endif
 # it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
 ifdef LLAMA_CUBLAS
 	CUBLAS_FLAGS = -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
-	CUBLASLD_FLAGS = -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -Lconda/envs/linux/lib -Lconda/envs/linux/lib/stubs -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
+	CUBLASLD_FLAGS = -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
 	CUBLAS_OBJS = ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
 	NVCC      = nvcc
 	NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math

+ifdef LLAMA_ADD_CONDA_PATHS
+	CUBLASLD_FLAGS += -Lconda/envs/linux/lib -Lconda/envs/linux/lib/stubs
+endif
+
 ifdef CUDA_DOCKER_ARCH
 	NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
 else
 ifdef LLAMA_PORTABLE
+ifdef LLAMA_COLAB #colab does not need all targets, all-major doesnt work correctly with pascal
+	NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all-major
+else
 	NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all
+endif #LLAMA_COLAB
 else
 	NVCCFLAGS += -arch=native
-endif
+endif #LLAMA_PORTABLE
 endif # CUDA_DOCKER_ARCH

 ifdef LLAMA_CUDA_FORCE_DMMV
@ -187,6 +195,7 @@ endif
 ifdef LLAMA_CUDA_CCBIN
 	NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
 endif
+
 ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
 	$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
 ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
--- a/README.md
+++ b/README.md
@ -40,6 +40,7 @@ For more information, be sure to run the program with the `--help` flag, or [che
 - You can attempt a CuBLAS build with `LLAMA_CUBLAS=1`. You will need CUDA Toolkit installed. Some have also reported success with the CMake file, though that is more for windows.
 - For a full featured build, do `make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1`
 - After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]`
+- NEW: There's now an automated build script provided, which uses conda to obtain all dependencies and generates a ready-to-use a pyinstaller binary for linux users. Simply execute the build script with `./koboldcpp.sh dist` and run the generated binary.

 - Note: Many OSX users have found that the using Accelerate is actually faster than OpenBLAS. To try, you may wish to run with `--noblas` and compare speeds.

--- a/colab.ipynb
+++ b/colab.ipynb
@ -67,7 +67,7 @@
        "!echo Finding prebuilt binary for {kvers}\r\n",
        "!wget -O dlfile.tmp https://kcppcolab.concedo.workers.dev/?{kvers} && mv dlfile.tmp koboldcpp_cublas.so\r\n",
        "!test -f koboldcpp_cublas.so && echo Prebuilt Binary Exists || echo Prebuilt Binary Does Not Exist\r\n",
-        "!test -f koboldcpp_cublas.so && echo Build Skipped || make koboldcpp_cublas LLAMA_CUBLAS=1 LLAMA_PORTABLE=1\r\n",
+        "!test -f koboldcpp_cublas.so && echo Build Skipped || make koboldcpp_cublas LLAMA_CUBLAS=1 LLAMA_COLAB=1 LLAMA_PORTABLE=1\r\n",
        "!cp koboldcpp_cublas.so koboldcpp_cublas.dat\r\n",
        "!apt install aria2 -y\r\n",
        "!aria2c -x 10 -o model.ggml --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $Model\r\n",
--- a/koboldcpp.sh
+++ b/koboldcpp.sh
@ -9,7 +9,7 @@ if [[ ! -f "conda/envs/linux/bin/python" || $1 == "rebuild" ]]; then
 	bin/micromamba run -r conda -n linux make clean
 fi

-bin/micromamba run -r conda -n linux make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1
+bin/micromamba run -r conda -n linux make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 LLAMA_ADD_CONDA_PATHS=1

 if [[ $1 == "rebuild" ]]; then
 	echo Rebuild complete, you can now try to launch Koboldcpp.