mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
65 lines
2.3 KiB
C
65 lines
2.3 KiB
C
#if defined(GGML_USE_HIPBLAS)
|
|
#include "hipblas/hipblas.h"
|
|
#include "hip/hip_runtime.h"
|
|
#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
|
|
#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
|
|
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
|
|
#define CUBLAS_OP_N HIPBLAS_OP_N
|
|
#define CUBLAS_OP_T HIPBLAS_OP_T
|
|
#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
|
|
#define CUBLAS_TF32_TENSOR_OP_MATH 0
|
|
#define CUDA_R_16F HIPBLAS_R_16F
|
|
#define CUDA_R_32F HIPBLAS_R_32F
|
|
#define cublasCreate hipblasCreate
|
|
#define cublasGemmEx hipblasGemmEx
|
|
#define cublasHandle_t hipblasHandle_t
|
|
#define cublasSetMathMode(h, m) HIPBLAS_STATUS_SUCCESS
|
|
#define cublasSetStream hipblasSetStream
|
|
#define cublasSgemm hipblasSgemm
|
|
#define cublasStatus_t hipblasStatus_t
|
|
#define cudaDeviceSynchronize hipDeviceSynchronize
|
|
#define cudaError_t hipError_t
|
|
#define cudaEventCreateWithFlags hipEventCreateWithFlags
|
|
#define cudaEventDisableTiming hipEventDisableTiming
|
|
#define cudaEventRecord hipEventRecord
|
|
#define cudaEvent_t hipEvent_t
|
|
#define cudaFree hipFree
|
|
#define cudaFreeHost hipFreeHost
|
|
#define cudaGetErrorString hipGetErrorString
|
|
#define cudaGetLastError hipGetLastError
|
|
#define cudaMalloc hipMalloc
|
|
#define cudaMallocHost hipMallocHost
|
|
#define cudaMemcpy2DAsync hipMemcpy2DAsync
|
|
#define cudaMemcpyAsync hipMemcpyAsync
|
|
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
|
|
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
|
|
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
|
|
#define cudaStreamNonBlocking hipStreamNonBlocking
|
|
#define cudaStreamSynchronize hipStreamSynchronize
|
|
#define cudaStreamWaitEvent hipStreamWaitEvent
|
|
#define cudaStream_t hipStream_t
|
|
#define cudaSuccess hipSuccess
|
|
#define GGML_USE_CUBLAS
|
|
#else
|
|
#include <cublas_v2.h>
|
|
#include <cuda_runtime.h>
|
|
#endif
|
|
#include "ggml.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
void ggml_init_cublas(void);
|
|
|
|
bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
|
size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
|
void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
|
|
|
// TODO: export these with GGML_API
|
|
void * ggml_cuda_host_malloc(size_t size);
|
|
void ggml_cuda_host_free(void * ptr);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|