reinstated the q4_3 format, for backwards compatibility.

This commit is contained in:
Concedo 2023-04-29 11:42:04 +08:00
parent 0fc1772a8f
commit bb282a4ecf
12 changed files with 364 additions and 26 deletions

View file

@ -40,6 +40,7 @@ void ggml_cuda_pool_free(void * ptr, size_t size);
void dequantize_row_q4_0_cuda(const void * vx, float * y, int k, cudaStream_t stream);
void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStream_t stream);
void dequantize_row_q4_2_cuda(const void * vx, float * y, int k, cudaStream_t stream);
void dequantize_row_q4_3_cuda(const void * vx, float * y, int k, cudaStream_t stream);
void dequantize_row_q5_0_cuda(const void * vx, float * y, int k, cudaStream_t stream);
void dequantize_row_q5_1_cuda(const void * vx, float * y, int k, cudaStream_t stream);
void dequantize_row_q8_0_cuda(const void * vx, float * y, int k, cudaStream_t stream);