update kt-kernel

This commit is contained in:
ovowei 2025-11-03 15:19:52 +08:00
parent 1a925769d9
commit f854d03bd7
119 changed files with 4459 additions and 6368 deletions

View file

@ -0,0 +1,63 @@
// BOOST_STRONG_TYPEDEF(int8_t, int4_2_t);
#pragma once
#include <cstdint>
#include "llama.cpp/ggml.h"
#if !defined(CPUINFER_HAS_FLOAT16_T)
using float16_t = ggml_fp16_t;
#define CPUINFER_HAS_FLOAT16_T 1
#endif
#if !defined(CPUINFER_HAS_BFLOAT16_T)
using bfloat16_t = ggml_bf16_t;
#define CPUINFER_HAS_BFLOAT16_T 1
#endif // CPUINFER_HAS_BFLOAT16_T
const bool PACKED = true;
#if defined(__aarch64__) || defined(__arm__) || defined(CPU_USE_KML)
#ifndef CPU_USE_KML
#define CPU_USE_KML
#endif
#endif // USE_MOE_KERNEL_AMD or CPU_USE_KML
#define STRONG_TYPEDEF(T, D) \
struct D { \
T t; \
explicit D(const T &v) : t(v) {} \
D() = default; \
D(const D &) = default; \
D &operator=(const D &) = default; \
D &operator=(const T &rhs) { \
t = rhs; \
return *this; \
} \
operator const T &() const { return t; } \
operator T &() { return t; } \
bool operator==(const D &rhs) const { return t == rhs.t; } \
bool operator!=(const D &rhs) const { return t != rhs.t; } \
bool operator<(const D &rhs) const { return t < rhs.t; } \
};
STRONG_TYPEDEF(int8_t, int4_2_t)
typedef int8_t BLASINT8;
/* matrix transpose or conjugate transpose */
typedef enum KERNEL_CBLAS_TRANSPOSE {
KernelCblasNoTrans = 111,
KernelCblasTrans = 112,
KernelCblasConjTrans = 113,
KernelCblasConjNoTrans = 114
} KERNEL_CBLAS_TRANSPOSE;
/* matrix stored in rows or cols */
typedef enum KERNEL_CBLAS_ORDER { KernelCblasRowMajor = 101, KernelCblasColMajor = 102 } KERNEL_CBLAS_ORDER;
/* matrix position is left or right */
typedef enum KERNEL_CBLAS_SIDE { KernelCblasLeft = 141, KernelCblasRight = 142 } KERNEL_CBLAS_SIDE;
typedef KERNEL_CBLAS_ORDER KERNEL_CBLAS_LAYOUT;
typedef enum KERNEL_CBLAS_OFFSET {
KernelCblasRowOffset = 171,
KernelCblasColOffset = 172,
KernelCblasFixOffset = 173
} KERNEL_CBLAS_OFFSET;
enum class MatKernelVariant {
Decode,
Prefill,
};

View file

@ -0,0 +1,30 @@
#pragma once
#include <cstddef>
#include <cstdint>
#include <type_traits>
#include "common.h"
using GemmFn = void (*)(const KERNEL_CBLAS_LAYOUT layout, const KERNEL_CBLAS_TRANSPOSE transa,
const KERNEL_CBLAS_TRANSPOSE transb, const KERNEL_CBLAS_OFFSET offsetc, const size_t m,
const size_t n, const size_t k, const float alpha, const void* a, const size_t lda,
const int8_t oa, const void* b, const size_t ldb, const int8_t ob, const float beta, int32_t* c,
const size_t ldc, const int32_t* oc);
struct MatKernelSelection {
GemmFn fn;
int divide_elements_size;
};
MatKernelSelection select_kernel_for_int4(MatKernelVariant variant);
MatKernelSelection select_kernel_for_int8(MatKernelVariant variant);
template <typename T>
MatKernelSelection select_mat_kernel(MatKernelVariant variant) {
if constexpr (std::is_same_v<typename T::dt, int4_2_t>) {
return select_kernel_for_int4(variant);
} else {
return select_kernel_for_int8(variant);
}
}