mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 15:29:39 +00:00
[ADD] support multi-gpu qlen>1 q5_k
This commit is contained in:
parent
f293803156
commit
f5f79f5c0e
63 changed files with 3271 additions and 1285 deletions
19
third_party/llamafile/sgemm.cpp
vendored
19
third_party/llamafile/sgemm.cpp
vendored
|
@ -22,19 +22,22 @@
|
|||
|
||||
#include "sgemm.h"
|
||||
// #include <cosmo.h>
|
||||
#include <cpuid.h>
|
||||
// #include <cpuid.h>
|
||||
// #include <libc/sysv/consts/hwcap.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/auxv.h>
|
||||
// #include <sys/auxv.h>
|
||||
#include <cassert>
|
||||
// #include "llamafile.h"
|
||||
|
||||
static const struct GemmFuncs {
|
||||
typeof(llamafile_sgemm)* sgemm;
|
||||
typeof(llamafile_mixmul)* mixmul;
|
||||
typeof(llamafile_mixmul_iqk)* iqk_mixmul = iqk_mul_mat_moe_unsupported;
|
||||
bool (*sgemm)(long, long, long, const void*, long, const void*, long, void*, long, int, int, int, int, int, int, int);
|
||||
bool (*mixmul)(const struct ggml_compute_params*, const struct ggml_tensor*, const struct ggml_tensor*, const struct ggml_tensor*, struct ggml_tensor*);
|
||||
bool (*iqk_mixmul)(long, long, long, int, int, const void*, const void*, float*, long, long, const void*, int, int);
|
||||
// typeof(llamafile_sgemm)* sgemm;
|
||||
// typeof(llamafile_mixmul)* mixmul;
|
||||
// typeof(llamafile_mixmul_iqk)* iqk_mixmul = iqk_mul_mat_moe_unsupported;
|
||||
GemmFuncs() {
|
||||
#ifdef __x86_64__
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
// if (X86_HAVE(AVX)) {
|
||||
// if (X86_HAVE(FMA)) {
|
||||
// if (X86_HAVE(AVX2)) {
|
||||
|
@ -86,10 +89,12 @@ static const struct GemmFuncs {
|
|||
// sgemm = llamafile_sgemm_unsupported;
|
||||
// mixmul = llamafile_mixmul_unsupported;
|
||||
// }
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__FMA__)
|
||||
#if defined(__FMA__) || (defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__)))
|
||||
#if defined(__AVX2__)
|
||||
#if defined(__AVX512F__)
|
||||
printf("__AVX512F__\n");
|
||||
#if defined(__AVX512VL__) && defined(__AVX512BW__) && defined(__AVX512DQ__) && defined(__AVX512VNNI__) && defined(__AVX512BF16__)
|
||||
// AMD Zen4+ (2023-)
|
||||
sgemm = llamafile_sgemm_amd_zen4;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue