mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
added more compile flags to set apart the conda paths, and also for colab. updated readme for multitool
This commit is contained in:
parent
1cb8a5e955
commit
14de08586e
5 changed files with 16 additions and 7 deletions
|
@ -1,6 +1,5 @@
|
||||||
# DO NOT USE THIS FILE.
|
# THIS FILE IS ONLY INTENDED CUBLAS BUILD PURPOSES ON WINDOWS VISUAL STUDIO.
|
||||||
# IT'S ONLY FOR CUBLAS BUILD PURPOSES ON WINDOWS VISUAL STUDIO.
|
# YOU'RE NOT RECOMMENDED TO USE IT
|
||||||
# IT WILL NOT BE UPDATED OR MAINTAINED !!!
|
|
||||||
|
|
||||||
message(STATUS "============== ============== ==============")
|
message(STATUS "============== ============== ==============")
|
||||||
message(STATUS "WARNING! Recommend NOT to use this file. It is UNSUPPORTED for normal users. Use MAKE instead.")
|
message(STATUS "WARNING! Recommend NOT to use this file. It is UNSUPPORTED for normal users. Use MAKE instead.")
|
||||||
|
|
13
Makefile
13
Makefile
|
@ -138,19 +138,27 @@ endif
|
||||||
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
|
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
|
||||||
ifdef LLAMA_CUBLAS
|
ifdef LLAMA_CUBLAS
|
||||||
CUBLAS_FLAGS = -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
CUBLAS_FLAGS = -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
||||||
CUBLASLD_FLAGS = -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -Lconda/envs/linux/lib -Lconda/envs/linux/lib/stubs -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
|
CUBLASLD_FLAGS = -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
|
||||||
CUBLAS_OBJS = ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
CUBLAS_OBJS = ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
||||||
NVCC = nvcc
|
NVCC = nvcc
|
||||||
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
|
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
|
||||||
|
|
||||||
|
ifdef LLAMA_ADD_CONDA_PATHS
|
||||||
|
CUBLASLD_FLAGS += -Lconda/envs/linux/lib -Lconda/envs/linux/lib/stubs
|
||||||
|
endif
|
||||||
|
|
||||||
ifdef CUDA_DOCKER_ARCH
|
ifdef CUDA_DOCKER_ARCH
|
||||||
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
|
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
|
||||||
else
|
else
|
||||||
ifdef LLAMA_PORTABLE
|
ifdef LLAMA_PORTABLE
|
||||||
|
ifdef LLAMA_COLAB #colab does not need all targets, all-major doesnt work correctly with pascal
|
||||||
|
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all-major
|
||||||
|
else
|
||||||
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all
|
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all
|
||||||
|
endif #LLAMA_COLAB
|
||||||
else
|
else
|
||||||
NVCCFLAGS += -arch=native
|
NVCCFLAGS += -arch=native
|
||||||
endif
|
endif #LLAMA_PORTABLE
|
||||||
endif # CUDA_DOCKER_ARCH
|
endif # CUDA_DOCKER_ARCH
|
||||||
|
|
||||||
ifdef LLAMA_CUDA_FORCE_DMMV
|
ifdef LLAMA_CUDA_FORCE_DMMV
|
||||||
|
@ -187,6 +195,7 @@ endif
|
||||||
ifdef LLAMA_CUDA_CCBIN
|
ifdef LLAMA_CUDA_CCBIN
|
||||||
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||||
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||||
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
||||||
|
|
|
@ -40,6 +40,7 @@ For more information, be sure to run the program with the `--help` flag, or [che
|
||||||
- You can attempt a CuBLAS build with `LLAMA_CUBLAS=1`. You will need CUDA Toolkit installed. Some have also reported success with the CMake file, though that is more for windows.
|
- You can attempt a CuBLAS build with `LLAMA_CUBLAS=1`. You will need CUDA Toolkit installed. Some have also reported success with the CMake file, though that is more for windows.
|
||||||
- For a full featured build, do `make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1`
|
- For a full featured build, do `make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1`
|
||||||
- After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]`
|
- After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]`
|
||||||
|
- NEW: There's now an automated build script provided, which uses conda to obtain all dependencies and generates a ready-to-use a pyinstaller binary for linux users. Simply execute the build script with `./koboldcpp.sh dist` and run the generated binary.
|
||||||
|
|
||||||
- Note: Many OSX users have found that the using Accelerate is actually faster than OpenBLAS. To try, you may wish to run with `--noblas` and compare speeds.
|
- Note: Many OSX users have found that the using Accelerate is actually faster than OpenBLAS. To try, you may wish to run with `--noblas` and compare speeds.
|
||||||
|
|
||||||
|
|
|
@ -67,7 +67,7 @@
|
||||||
"!echo Finding prebuilt binary for {kvers}\r\n",
|
"!echo Finding prebuilt binary for {kvers}\r\n",
|
||||||
"!wget -O dlfile.tmp https://kcppcolab.concedo.workers.dev/?{kvers} && mv dlfile.tmp koboldcpp_cublas.so\r\n",
|
"!wget -O dlfile.tmp https://kcppcolab.concedo.workers.dev/?{kvers} && mv dlfile.tmp koboldcpp_cublas.so\r\n",
|
||||||
"!test -f koboldcpp_cublas.so && echo Prebuilt Binary Exists || echo Prebuilt Binary Does Not Exist\r\n",
|
"!test -f koboldcpp_cublas.so && echo Prebuilt Binary Exists || echo Prebuilt Binary Does Not Exist\r\n",
|
||||||
"!test -f koboldcpp_cublas.so && echo Build Skipped || make koboldcpp_cublas LLAMA_CUBLAS=1 LLAMA_PORTABLE=1\r\n",
|
"!test -f koboldcpp_cublas.so && echo Build Skipped || make koboldcpp_cublas LLAMA_CUBLAS=1 LLAMA_COLAB=1 LLAMA_PORTABLE=1\r\n",
|
||||||
"!cp koboldcpp_cublas.so koboldcpp_cublas.dat\r\n",
|
"!cp koboldcpp_cublas.so koboldcpp_cublas.dat\r\n",
|
||||||
"!apt install aria2 -y\r\n",
|
"!apt install aria2 -y\r\n",
|
||||||
"!aria2c -x 10 -o model.ggml --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $Model\r\n",
|
"!aria2c -x 10 -o model.ggml --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $Model\r\n",
|
||||||
|
|
|
@ -9,7 +9,7 @@ if [[ ! -f "conda/envs/linux/bin/python" || $1 == "rebuild" ]]; then
|
||||||
bin/micromamba run -r conda -n linux make clean
|
bin/micromamba run -r conda -n linux make clean
|
||||||
fi
|
fi
|
||||||
|
|
||||||
bin/micromamba run -r conda -n linux make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1
|
bin/micromamba run -r conda -n linux make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 LLAMA_ADD_CONDA_PATHS=1
|
||||||
|
|
||||||
if [[ $1 == "rebuild" ]]; then
|
if [[ $1 == "rebuild" ]]; then
|
||||||
echo Rebuild complete, you can now try to launch Koboldcpp.
|
echo Rebuild complete, you can now try to launch Koboldcpp.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue