diff --git a/Makefile b/Makefile index 3f15c8c29..353eb2fc2 100644 --- a/Makefile +++ b/Makefile @@ -225,17 +225,22 @@ endif # LLAMA_CUBLAS ifdef LLAMA_HIPBLAS ifeq ($(wildcard /opt/rocm),) - ROCM_PATH ?= /usr + ROCM_PATH ?= /usr GPU_TARGETS ?= $(shell $(shell which amdgpu-arch)) HCC := $(ROCM_PATH)/bin/hipcc HCXX := $(ROCM_PATH)/bin/hipcc else - ROCM_PATH ?= /opt/rocm + ROCM_PATH ?= /opt/rocm GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch) HCC := $(ROCM_PATH)/llvm/bin/clang HCXX := $(ROCM_PATH)/llvm/bin/clang++ endif - HIPFLAGS += -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_HIP_ROCWMMA_FATTN -DGGML_USE_CUDA -DSD_USE_CUBLAS -I$(ROCM_PATH)/include/rocwmma -I$(ROCM_PATH)/include $(shell $(ROCM_PATH)/bin/hipconfig -C) + DETECT_ROCWMMA := $(shell find -L /opt/rocm/include /usr/include -type f -name rocwmma.hpp 2>/dev/null | head -n 1) +ifdef DETECT_ROCWMMA + HIPFLAGS += -DGGML_HIP_ROCWMMA_FATTN -I$(dir $(DETECT_ROCWMMA)) +endif + + HIPFLAGS += -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C) HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib HIPLDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64 HIPLDFLAGS += -lhipblas -lamdhip64 -lrocblas diff --git a/koboldcpp.py b/koboldcpp.py index 7c68c48cb..23d9944d1 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -5959,7 +5959,7 @@ if __name__ == '__main__': parser.add_argument("--threads", metavar=('[threads]'), help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=get_default_threads()) compatgroup = parser.add_mutually_exclusive_group() - compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs. For hipBLAS binaries, please check YellowRoseCx rocm fork.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq|nommq] [rowsplit]'), choices=['normal', 'lowvram', '0', '1', '2', '3', 'all', 'mmq', 'nommq', 'rowsplit']) + compatgroup.add_argument("--usecublas", "--usehipblas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs. For hipBLAS binaries, please check YellowRoseCx rocm fork.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq|nommq] [rowsplit]'), choices=['normal', 'lowvram', '0', '1', '2', '3', 'all', 'mmq', 'nommq', 'rowsplit']) compatgroup.add_argument("--usevulkan", help="Use Vulkan for GPU Acceleration. Can optionally specify one or more GPU Device ID (e.g. --usevulkan 0), leave blank to autodetect.", metavar=('[Device IDs]'), nargs='*', type=int, default=None) compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2) compatgroup.add_argument("--usecpu", help="Do not use any GPU acceleration (CPU Only)", action='store_true')