#if defined(GGML_USE_HIPBLAS) #include "hipblas/hipblas.h" #include "hip/hip_runtime.h" #define CUBLAS_COMPUTE_32F HIPBLAS_R_32F #define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F #define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT #define CUBLAS_OP_N HIPBLAS_OP_N #define CUBLAS_OP_T HIPBLAS_OP_T #define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS #define CUBLAS_TF32_TENSOR_OP_MATH 0 #define CUDA_R_16F HIPBLAS_R_16F #define CUDA_R_32F HIPBLAS_R_32F #define cublasCreate hipblasCreate #define cublasGemmEx hipblasGemmEx #define cublasHandle_t hipblasHandle_t #define cublasSetMathMode(h, m) HIPBLAS_STATUS_SUCCESS #define cublasSetStream hipblasSetStream #define cublasSgemm hipblasSgemm #define cublasStatus_t hipblasStatus_t #define cudaDeviceSynchronize hipDeviceSynchronize #define cudaError_t hipError_t #define cudaEventCreateWithFlags hipEventCreateWithFlags #define cudaEventDisableTiming hipEventDisableTiming #define cudaEventRecord hipEventRecord #define cudaEvent_t hipEvent_t #define cudaFree hipFree #define cudaFreeHost hipFreeHost #define cudaGetErrorString hipGetErrorString #define cudaGetLastError hipGetLastError #define cudaMalloc hipMalloc #define cudaMallocHost hipMallocHost #define cudaMemcpy2DAsync hipMemcpy2DAsync #define cudaMemcpyAsync hipMemcpyAsync #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost #define cudaMemcpyHostToDevice hipMemcpyHostToDevice #define cudaStreamCreateWithFlags hipStreamCreateWithFlags #define cudaStreamNonBlocking hipStreamNonBlocking #define cudaStreamSynchronize hipStreamSynchronize #define cudaStreamWaitEvent hipStreamWaitEvent #define cudaStream_t hipStream_t #define cudaSuccess hipSuccess #define GGML_USE_CUBLAS #else #include #include #endif #include "ggml.h" #ifdef __cplusplus extern "C" { #endif void ggml_init_cublas(void); bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize); // TODO: export these with GGML_API void * ggml_cuda_host_malloc(size_t size); void ggml_cuda_host_free(void * ptr); #ifdef __cplusplus } #endif