diff --git a/CMakeLists.txt b/CMakeLists.txt index 957e6117b..df6ed0ef1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,5 @@ -# DO NOT USE THIS FILE. -# IT'S ONLY FOR CUBLAS BUILD PURPOSES ON WINDOWS VISUAL STUDIO. -# IT WILL NOT BE UPDATED OR MAINTAINED !!! +# THIS FILE IS ONLY INTENDED CUBLAS BUILD PURPOSES ON WINDOWS VISUAL STUDIO. +# YOU'RE NOT RECOMMENDED TO USE IT message(STATUS "============== ============== ==============") message(STATUS "WARNING! Recommend NOT to use this file. It is UNSUPPORTED for normal users. Use MAKE instead.") diff --git a/Makefile b/Makefile index c2a69d579..2cf1d3df1 100644 --- a/Makefile +++ b/Makefile @@ -138,19 +138,27 @@ endif # it is recommended to use the CMAKE file to build for cublas if you can - will likely work better ifdef LLAMA_CUBLAS CUBLAS_FLAGS = -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include - CUBLASLD_FLAGS = -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -Lconda/envs/linux/lib -Lconda/envs/linux/lib/stubs -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib + CUBLASLD_FLAGS = -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib CUBLAS_OBJS = ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o NVCC = nvcc NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math +ifdef LLAMA_ADD_CONDA_PATHS + CUBLASLD_FLAGS += -Lconda/envs/linux/lib -Lconda/envs/linux/lib/stubs +endif + ifdef CUDA_DOCKER_ARCH NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) else ifdef LLAMA_PORTABLE +ifdef LLAMA_COLAB #colab does not need all targets, all-major doesnt work correctly with pascal + NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all-major +else NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all +endif #LLAMA_COLAB else NVCCFLAGS += -arch=native -endif +endif #LLAMA_PORTABLE endif # CUDA_DOCKER_ARCH ifdef LLAMA_CUDA_FORCE_DMMV @@ -187,6 +195,7 @@ endif ifdef LLAMA_CUDA_CCBIN NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN) endif + ggml-cuda.o: ggml-cuda.cu ggml-cuda.h $(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@ ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h diff --git a/README.md b/README.md index 282b6ff23..dc04891dc 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ For more information, be sure to run the program with the `--help` flag, or [che - You can attempt a CuBLAS build with `LLAMA_CUBLAS=1`. You will need CUDA Toolkit installed. Some have also reported success with the CMake file, though that is more for windows. - For a full featured build, do `make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1` - After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]` +- NEW: There's now an automated build script provided, which uses conda to obtain all dependencies and generates a ready-to-use a pyinstaller binary for linux users. Simply execute the build script with `./koboldcpp.sh dist` and run the generated binary. - Note: Many OSX users have found that the using Accelerate is actually faster than OpenBLAS. To try, you may wish to run with `--noblas` and compare speeds. diff --git a/colab.ipynb b/colab.ipynb index e8d9e0d8e..9e77ac91c 100644 --- a/colab.ipynb +++ b/colab.ipynb @@ -67,7 +67,7 @@ "!echo Finding prebuilt binary for {kvers}\r\n", "!wget -O dlfile.tmp https://kcppcolab.concedo.workers.dev/?{kvers} && mv dlfile.tmp koboldcpp_cublas.so\r\n", "!test -f koboldcpp_cublas.so && echo Prebuilt Binary Exists || echo Prebuilt Binary Does Not Exist\r\n", - "!test -f koboldcpp_cublas.so && echo Build Skipped || make koboldcpp_cublas LLAMA_CUBLAS=1 LLAMA_PORTABLE=1\r\n", + "!test -f koboldcpp_cublas.so && echo Build Skipped || make koboldcpp_cublas LLAMA_CUBLAS=1 LLAMA_COLAB=1 LLAMA_PORTABLE=1\r\n", "!cp koboldcpp_cublas.so koboldcpp_cublas.dat\r\n", "!apt install aria2 -y\r\n", "!aria2c -x 10 -o model.ggml --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $Model\r\n", diff --git a/koboldcpp.sh b/koboldcpp.sh index 59d9bdd17..5f15bb556 100755 --- a/koboldcpp.sh +++ b/koboldcpp.sh @@ -9,7 +9,7 @@ if [[ ! -f "conda/envs/linux/bin/python" || $1 == "rebuild" ]]; then bin/micromamba run -r conda -n linux make clean fi -bin/micromamba run -r conda -n linux make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 +bin/micromamba run -r conda -n linux make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 LLAMA_ADD_CONDA_PATHS=1 if [[ $1 == "rebuild" ]]; then echo Rebuild complete, you can now try to launch Koboldcpp.