mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-08 18:30:50 +00:00
try to fix cuda build makefile
This commit is contained in:
parent
aa5124439d
commit
87175db07d
1 changed files with 17 additions and 21 deletions
38
Makefile
38
Makefile
|
|
@ -145,6 +145,7 @@ ifdef LLAMA_CUBLAS
|
|||
CUBLAS_FLAGS = -DGGML_USE_CUDA -DSD_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
||||
CUBLASLD_FLAGS = -lcuda -lcublas -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/local/cuda/targets/sbsa-linux/lib -L/usr/lib/wsl/lib
|
||||
CUBLAS_OBJS = ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
||||
CUBLAS_OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
||||
NVCC = nvcc
|
||||
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
|
||||
|
||||
|
|
@ -201,8 +202,10 @@ ifdef LLAMA_CUDA_CCBIN
|
|||
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||
endif
|
||||
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h $(wildcard ggml-cuda/*.cu) $(wildcard ggml-cuda/*.cuh)
|
||||
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||
ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/common.cuh
|
||||
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
||||
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
||||
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||
ggml_v2-cuda-legacy.o: otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h
|
||||
|
|
@ -228,25 +231,17 @@ ifdef LLAMA_HIPBLAS
|
|||
LLAMA_CUDA_KQUANTS_ITER ?= 2
|
||||
HIPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA -DSD_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
|
||||
HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 -lrocblas
|
||||
HIP_OBJS += ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
||||
ggml-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
|
||||
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
|
||||
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
|
||||
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||
ggml_v2-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
|
||||
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
|
||||
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
|
||||
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||
ggml_v2-cuda-legacy.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
|
||||
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
|
||||
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
|
||||
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||
ggml_v3-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
|
||||
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
|
||||
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
|
||||
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h $(wildcard ggml-cuda/*.cu) $(wildcard ggml-cuda/*.cuh)
|
||||
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
||||
HIP_OBJS += ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
||||
HIP_OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
||||
HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS))
|
||||
HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
|
||||
HIPFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
|
||||
HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||
|
||||
ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/common.cuh
|
||||
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
||||
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
||||
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
||||
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
||||
ggml_v2-cuda-legacy.o: otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h
|
||||
|
|
@ -514,6 +509,7 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER)
|
|||
|
||||
clean:
|
||||
rm -vf *.o main sdmain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix imatrix.exe gguf.exe main.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
|
||||
rm -vrf ggml-cuda/*.o
|
||||
|
||||
# useful tools
|
||||
main: examples/main/main.cpp common/sampling.cpp build-info.h ggml.o ggml-quants.o ggml-alloc.o unicode.o unicode-data.o ggml-backend.o llama.o common.o console.o grammar-parser.o $(OBJS)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue