try to make rocm work for the github ci, requires disabling rocwmma

This commit is contained in:
Concedo 2025-06-08 21:52:29 +08:00
parent 7f57846c2f
commit 6c5c8be48d
4 changed files with 14 additions and 5 deletions

View file

@ -12,6 +12,7 @@ env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }} BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
KCPP_CUDA: rocm KCPP_CUDA: rocm
ARCHES_CU12: 1 ARCHES_CU12: 1
NO_WMMA: 1
jobs: jobs:
linux: linux:

View file

@ -244,7 +244,7 @@ ifdef LLAMA_HIPBLAS
ifeq ($(wildcard /opt/rocm),) ifeq ($(wildcard /opt/rocm),)
ROCM_PATH ?= /usr ROCM_PATH ?= /usr
ifdef LLAMA_PORTABLE ifdef LLAMA_PORTABLE
GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx942 gfx1010 gfx1030 gfx1031 gfx1032 gfx1100 gfx1101 gfx1102 $(shell $(shell which amdgpu-arch)) GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx942 gfx1010 gfx1030 gfx1031 gfx1032 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201 $(shell $(shell which amdgpu-arch))
else else
GPU_TARGETS ?= $(shell $(shell which amdgpu-arch)) GPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
endif endif
@ -252,13 +252,17 @@ endif
HCXX := $(ROCM_PATH)/bin/hipcc HCXX := $(ROCM_PATH)/bin/hipcc
else else
ROCM_PATH ?= /opt/rocm ROCM_PATH ?= /opt/rocm
GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx942 gfx1010 gfx1030 gfx1031 gfx1032 gfx1100 gfx1101 gfx1102 $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch) GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx942 gfx1010 gfx1030 gfx1031 gfx1032 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201 $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
HCC := $(ROCM_PATH)/llvm/bin/clang HCC := $(ROCM_PATH)/llvm/bin/clang
HCXX := $(ROCM_PATH)/llvm/bin/clang++ HCXX := $(ROCM_PATH)/llvm/bin/clang++
endif endif
ifdef LLAMA_NO_WMMA
HIPFLAGS += -DGGML_HIP_NO_ROCWMMA_FATTN
else
DETECT_ROCWMMA := $(shell find -L /opt/rocm/include /usr/include -type f -name rocwmma.hpp 2>/dev/null | head -n 1) DETECT_ROCWMMA := $(shell find -L /opt/rocm/include /usr/include -type f -name rocwmma.hpp 2>/dev/null | head -n 1)
ifdef DETECT_ROCWMMA ifdef DETECT_ROCWMMA
HIPFLAGS += -DGGML_HIP_ROCWMMA_FATTN -I$(dir $(DETECT_ROCWMMA)) HIPFLAGS += -DGGML_HIP_ROCWMMA_FATTN -I$(dir $(DETECT_ROCWMMA))
endif
endif endif
HIPFLAGS += -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA $(shell $(ROCM_PATH)/bin/hipconfig -C) HIPFLAGS += -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA $(shell $(ROCM_PATH)/bin/hipconfig -C)

View file

@ -56,7 +56,7 @@ static __global__ void flash_attn_ext_f16(
const int ne1, const int ne1,
const int ne2, const int ne2,
const int ne3) { const int ne3) {
#if defined(FLASH_ATTN_AVAILABLE) && ((__CUDA_ARCH__ == GGML_CUDA_CC_VOLTA || __CUDA_ARCH__ == GGML_CUDA_CC_TURING) || (defined(GGML_HIP_ROCWMMA_FATTN) && defined(FP16_MMA_AVAILABLE))) #if !defined(GGML_HIP_NO_ROCWMMA_FATTN) && defined(FLASH_ATTN_AVAILABLE) && ((__CUDA_ARCH__ == GGML_CUDA_CC_VOLTA || __CUDA_ARCH__ == GGML_CUDA_CC_TURING) || (defined(GGML_HIP_ROCWMMA_FATTN) && defined(FP16_MMA_AVAILABLE)))
// Skip unused kernel variants for faster compilation: // Skip unused kernel variants for faster compilation:
if (use_logit_softcap && !(D == 128 || D == 256)) { if (use_logit_softcap && !(D == 128 || D == 256)) {
NO_DEVICE_CODE; NO_DEVICE_CODE;

View file

@ -29,6 +29,7 @@ KCPP_CUDAAPPEND=-cuda${KCPP_CUDA//.}$KCPP_APPEND
LLAMA_NOAVX2_FLAG="" LLAMA_NOAVX2_FLAG=""
ARCHES_FLAG="" ARCHES_FLAG=""
NO_WMMA_FLAG=""
if [ -n "$NOAVX2" ]; then if [ -n "$NOAVX2" ]; then
LLAMA_NOAVX2_FLAG="LLAMA_NOAVX2=1" LLAMA_NOAVX2_FLAG="LLAMA_NOAVX2=1"
fi fi
@ -38,11 +39,14 @@ fi
if [ -n "$ARCHES_CU12" ]; then if [ -n "$ARCHES_CU12" ]; then
ARCHES_FLAG="LLAMA_ARCHES_CU12=1" ARCHES_FLAG="LLAMA_ARCHES_CU12=1"
fi fi
if [ -n "$NO_WMMA" ]; then
NO_WMMA_FLAG="LLAMA_NO_WMMA=1"
fi
if [ "$KCPP_CUDA" = "rocm" ]; then if [ "$KCPP_CUDA" = "rocm" ]; then
bin/micromamba run -r conda -p conda/envs/linux make -j$(nproc) LLAMA_VULKAN=1 LLAMA_CLBLAST=1 LLAMA_HIPBLAS=1 LLAMA_PORTABLE=1 LLAMA_USE_BUNDLED_GLSLC=1 LLAMA_ADD_CONDA_PATHS=1 $LLAMA_NOAVX2_FLAG $ARCHES_FLAG bin/micromamba run -r conda -p conda/envs/linux make -j$(nproc) LLAMA_VULKAN=1 LLAMA_CLBLAST=1 LLAMA_HIPBLAS=1 LLAMA_PORTABLE=1 LLAMA_USE_BUNDLED_GLSLC=1 LLAMA_ADD_CONDA_PATHS=1 $LLAMA_NOAVX2_FLAG $ARCHES_FLAG $NO_WMMA_FLAG
else else
bin/micromamba run -r conda -p conda/envs/linux make -j$(nproc) LLAMA_VULKAN=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 LLAMA_USE_BUNDLED_GLSLC=1 LLAMA_ADD_CONDA_PATHS=1 $LLAMA_NOAVX2_FLAG $ARCHES_FLAG bin/micromamba run -r conda -p conda/envs/linux make -j$(nproc) LLAMA_VULKAN=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_PORTABLE=1 LLAMA_USE_BUNDLED_GLSLC=1 LLAMA_ADD_CONDA_PATHS=1 $LLAMA_NOAVX2_FLAG $ARCHES_FLAG $NO_WMMA_FLAG
fi fi
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then