added more compile flags to set apart the conda paths, and also for colab. updated readme for multitool

This commit is contained in:
Concedo 2024-01-21 17:38:33 +08:00
parent 1cb8a5e955
commit 14de08586e
5 changed files with 16 additions and 7 deletions

View file

@ -1,6 +1,5 @@
# DO NOT USE THIS FILE. # THIS FILE IS ONLY INTENDED CUBLAS BUILD PURPOSES ON WINDOWS VISUAL STUDIO.
# IT'S ONLY FOR CUBLAS BUILD PURPOSES ON WINDOWS VISUAL STUDIO. # YOU'RE NOT RECOMMENDED TO USE IT
# IT WILL NOT BE UPDATED OR MAINTAINED !!!
message(STATUS "============== ============== ==============") message(STATUS "============== ============== ==============")
message(STATUS "WARNING! Recommend NOT to use this file. It is UNSUPPORTED for normal users. Use MAKE instead.") message(STATUS "WARNING! Recommend NOT to use this file. It is UNSUPPORTED for normal users. Use MAKE instead.")

View file

@ -138,19 +138,27 @@ endif
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better # it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
ifdef LLAMA_CUBLAS ifdef LLAMA_CUBLAS
CUBLAS_FLAGS = -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include CUBLAS_FLAGS = -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
CUBLASLD_FLAGS = -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -Lconda/envs/linux/lib -Lconda/envs/linux/lib/stubs -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib CUBLASLD_FLAGS = -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
CUBLAS_OBJS = ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o CUBLAS_OBJS = ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
NVCC = nvcc NVCC = nvcc
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
ifdef LLAMA_ADD_CONDA_PATHS
CUBLASLD_FLAGS += -Lconda/envs/linux/lib -Lconda/envs/linux/lib/stubs
endif
ifdef CUDA_DOCKER_ARCH ifdef CUDA_DOCKER_ARCH
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
else else
ifdef LLAMA_PORTABLE ifdef LLAMA_PORTABLE
ifdef LLAMA_COLAB #colab does not need all targets, all-major doesnt work correctly with pascal
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all-major
else
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all
endif #LLAMA_COLAB
else else
NVCCFLAGS += -arch=native NVCCFLAGS += -arch=native
endif endif #LLAMA_PORTABLE
endif # CUDA_DOCKER_ARCH endif # CUDA_DOCKER_ARCH
ifdef LLAMA_CUDA_FORCE_DMMV ifdef LLAMA_CUDA_FORCE_DMMV
@ -187,6 +195,7 @@ endif
ifdef LLAMA_CUDA_CCBIN ifdef LLAMA_CUDA_CCBIN
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN) NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
endif endif
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@ $(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h

View file

@ -40,6 +40,7 @@ For more information, be sure to run the program with the `--help` flag, or [che
- You can attempt a CuBLAS build with `LLAMA_CUBLAS=1`. You will need CUDA Toolkit installed. Some have also reported success with the CMake file, though that is more for windows. - You can attempt a CuBLAS build with `LLAMA_CUBLAS=1`. You will need CUDA Toolkit installed. Some have also reported success with the CMake file, though that is more for windows.
- For a full featured build, do `make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1` - For a full featured build, do `make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1`
- After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]` - After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]`
- NEW: There's now an automated build script provided, which uses conda to obtain all dependencies and generates a ready-to-use a pyinstaller binary for linux users. Simply execute the build script with `./koboldcpp.sh dist` and run the generated binary.
- Note: Many OSX users have found that the using Accelerate is actually faster than OpenBLAS. To try, you may wish to run with `--noblas` and compare speeds. - Note: Many OSX users have found that the using Accelerate is actually faster than OpenBLAS. To try, you may wish to run with `--noblas` and compare speeds.

View file

@ -67,7 +67,7 @@
"!echo Finding prebuilt binary for {kvers}\r\n", "!echo Finding prebuilt binary for {kvers}\r\n",
"!wget -O dlfile.tmp https://kcppcolab.concedo.workers.dev/?{kvers} && mv dlfile.tmp koboldcpp_cublas.so\r\n", "!wget -O dlfile.tmp https://kcppcolab.concedo.workers.dev/?{kvers} && mv dlfile.tmp koboldcpp_cublas.so\r\n",
"!test -f koboldcpp_cublas.so && echo Prebuilt Binary Exists || echo Prebuilt Binary Does Not Exist\r\n", "!test -f koboldcpp_cublas.so && echo Prebuilt Binary Exists || echo Prebuilt Binary Does Not Exist\r\n",
"!test -f koboldcpp_cublas.so && echo Build Skipped || make koboldcpp_cublas LLAMA_CUBLAS=1 LLAMA_PORTABLE=1\r\n", "!test -f koboldcpp_cublas.so && echo Build Skipped || make koboldcpp_cublas LLAMA_CUBLAS=1 LLAMA_COLAB=1 LLAMA_PORTABLE=1\r\n",
"!cp koboldcpp_cublas.so koboldcpp_cublas.dat\r\n", "!cp koboldcpp_cublas.so koboldcpp_cublas.dat\r\n",
"!apt install aria2 -y\r\n", "!apt install aria2 -y\r\n",
"!aria2c -x 10 -o model.ggml --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $Model\r\n", "!aria2c -x 10 -o model.ggml --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $Model\r\n",

View file

@ -9,7 +9,7 @@ if [[ ! -f "conda/envs/linux/bin/python" || $1 == "rebuild" ]]; then
bin/micromamba run -r conda -n linux make clean bin/micromamba run -r conda -n linux make clean
fi fi
bin/micromamba run -r conda -n linux make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 bin/micromamba run -r conda -n linux make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 LLAMA_ADD_CONDA_PATHS=1
if [[ $1 == "rebuild" ]]; then if [[ $1 == "rebuild" ]]; then
echo Rebuild complete, you can now try to launch Koboldcpp. echo Rebuild complete, you can now try to launch Koboldcpp.