mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-28 03:39:48 +00:00
[build]: release v0.6.1
setup.py metadata + deps updated, version.py bumped to 0.6.1, ext_bindings.cpp: fix segfault (std::cout→printf in constructors) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
948c75e76a
commit
c9264e155c
3 changed files with 59 additions and 11 deletions
|
|
@ -15,6 +15,7 @@
|
|||
#include <cpptrace/cpptrace.hpp>
|
||||
#include <csignal>
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
#include "cpu_backend/cpuinfer.h"
|
||||
|
|
@ -41,9 +42,11 @@ static const bool _is_plain_ = false;
|
|||
|
||||
#if defined(__x86_64__) && defined(USE_AMX_AVX_KERNEL)
|
||||
#include "operators/amx/awq-moe.hpp"
|
||||
#include "operators/amx/bf16-moe.hpp" // Native BF16 MoE using CRTP pattern, with fallback for AVX512F
|
||||
#include "operators/amx/fp8-moe.hpp" // FP8 MoE requires AVX512 BF16 support, with fallback for AVX512F+BW
|
||||
#if defined(__AVX512BF16__)
|
||||
#include "operators/amx/bf16-moe.hpp" // Native BF16 MoE using CRTP pattern
|
||||
#include "operators/amx/fp8-moe.hpp" // FP8 MoE requires AVX512 BF16 support
|
||||
#include "operators/amx/fp8-perchannel-moe.hpp" // FP8 Per-Channel MoE for GLM-4.7-FP8
|
||||
#endif
|
||||
#include "operators/amx/k2-moe.hpp"
|
||||
#include "operators/amx/la/amx_kernels.hpp"
|
||||
#include "operators/amx/moe.hpp"
|
||||
|
|
@ -54,7 +57,6 @@ static const bool _is_plain_ = false;
|
|||
#if defined(__x86_64__)
|
||||
#include "operators/avx2/bf16-moe.hpp"
|
||||
#include "operators/avx2/fp8-moe.hpp"
|
||||
#include "operators/avx2/gptq_int4_avxvnni-moe.hpp"
|
||||
#include "operators/avx2/gptq_int4-moe.hpp"
|
||||
#endif
|
||||
|
||||
|
|
@ -74,6 +76,9 @@ static const bool _is_plain_ = false;
|
|||
namespace py = pybind11;
|
||||
using namespace pybind11::literals;
|
||||
|
||||
// Manually bump this before each rebuild so imports can confirm the loaded
|
||||
// extension is the latest build artifact.
|
||||
static constexpr int kExtBindingsVersion = 7;
|
||||
|
||||
py::object to_float_ptr(uintptr_t input_ptr, int size, ggml_type type) {
|
||||
if (type < 0 || type >= GGML_TYPE_COUNT) {
|
||||
|
|
@ -473,6 +478,7 @@ void bind_moe_module(py::module_& moe_module, const char* name) {
|
|||
}
|
||||
|
||||
PYBIND11_MODULE(kt_kernel_ext, m) {
|
||||
m.attr("__ext_bindings_version__") = py::int_(kExtBindingsVersion);
|
||||
|
||||
py::class_<WorkerPool>(m, "WorkerPool").def(py::init<int>());
|
||||
py::class_<WorkerPoolConfig>(m, "WorkerPoolConfig")
|
||||
|
|
@ -782,7 +788,7 @@ PYBIND11_MODULE(kt_kernel_ext, m) {
|
|||
bind_moe_module<AMX_MOE_TP<amx::GemmKernel224Int4_1>>(moe_module, "AMXInt4_1_MOE");
|
||||
bind_moe_module<AMX_AWQ_MOE_TP<amx::GemmKernel224Int4_1_LowKGroup>>(moe_module, "AMXInt4_1KGroup_MOE");
|
||||
bind_moe_module<AMX_K2_MOE_TP<amx::GemmKernel224Int4SmallKGroup>>(moe_module, "AMXInt4_KGroup_MOE");
|
||||
#if defined(__AVX512F__)
|
||||
#if defined(__AVX512BF16__)
|
||||
bind_moe_module<AMX_BF16_MOE_TP<amx::GemmKernel224BF16>>(moe_module, "AMXBF16_MOE");
|
||||
bind_moe_module<AMX_FP8_MOE_TP<amx::GemmKernel224FP8>>(moe_module, "AMXFP8_MOE");
|
||||
bind_moe_module<AMX_FP8_PERCHANNEL_MOE_TP<amx::GemmKernel224FP8PerChannel>>(moe_module, "AMXFP8PerChannel_MOE");
|
||||
|
|
@ -812,8 +818,6 @@ PYBIND11_MODULE(kt_kernel_ext, m) {
|
|||
bind_moe_module<AVX2_BF16_MOE_TP<avx2::GemmKernelAVX2BF16>>(moe_module, "AVX2BF16_MOE");
|
||||
bind_moe_module<AVX2_FP8_MOE_TP<avx2::GemmKernelAVX2FP8>>(moe_module, "AVX2FP8_MOE");
|
||||
bind_moe_module<AVX2_GPTQ_INT4_MOE_TP<avx2::GemmKernelAVX2GPTQInt4>>(moe_module, "AVX2GPTQInt4_MOE");
|
||||
bind_moe_module<AVXVNNI256_GPTQ_INT4_MOE_TP<avxvnni::GemmKernelAVXVNNI256GPTQInt4>>(moe_module,
|
||||
"AVXVNNI256GPTQInt4_MOE");
|
||||
#endif
|
||||
|
||||
#if defined(USE_MOE_KERNEL)
|
||||
|
|
@ -1002,3 +1006,15 @@ __attribute__((constructor)) static void install_handlers() {
|
|||
sigaction(SIGABRT, &sa, nullptr);
|
||||
}
|
||||
|
||||
#if defined(USE_AMX_AVX_KERNEL)
|
||||
__attribute__((constructor)) static void print_ext_bindings_version() {
|
||||
printf("[kt-kernel] ext_bindings version: %d, sft_moe: %d, moe_sft_tp: %d\n", kExtBindingsVersion, kSftMoeVersion,
|
||||
kMoeSftTpVersion);
|
||||
}
|
||||
#else
|
||||
__attribute__((constructor)) static void print_ext_bindings_version() {
|
||||
printf("[kt-kernel] ext_bindings version: %d\n", kExtBindingsVersion);
|
||||
}
|
||||
#endif
|
||||
|
||||
__attribute__((constructor)) static void print_pid() { printf("[kt-kernel] PID: %d\n", getpid()); }
|
||||
|
|
|
|||
38
setup.py
38
setup.py
|
|
@ -1,6 +1,14 @@
|
|||
"""Meta-package: pip install ktransformers → installs kt-kernel + sglang-kt."""
|
||||
"""KTransformers: CPU-GPU heterogeneous fine-tuning for MoE models.
|
||||
|
||||
``pip install ktransformers`` installs:
|
||||
- ``ktransformers`` — integration glue + auto-patching for HF ecosystem
|
||||
- ``kt-kernel`` — C++ AMX kernel engine (dependency)
|
||||
- ``accelerate-kt`` — accelerate fork with KT plugin support
|
||||
- ``transformers-kt`` — transformers fork with KT training integration
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from setuptools import setup
|
||||
from setuptools import find_packages, setup
|
||||
|
||||
_version_file = Path(__file__).resolve().parent / "version.py"
|
||||
_ns = {}
|
||||
|
|
@ -8,9 +16,33 @@ exec(_version_file.read_text(), _ns)
|
|||
_v = _ns["__version__"]
|
||||
|
||||
setup(
|
||||
name="ktransformers",
|
||||
version=_v,
|
||||
description="CPU-GPU heterogeneous fine-tuning for MoE models",
|
||||
long_description=open(Path(__file__).resolve().parent / "README.md", encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
author="kvcache-ai",
|
||||
url="https://github.com/kvcache-ai/ktransformers",
|
||||
packages=find_packages(),
|
||||
python_requires=">=3.10",
|
||||
install_requires=[
|
||||
f"kt-kernel=={_v}",
|
||||
f"sglang-kt=={_v}",
|
||||
"transformers-kt>=5.6.0",
|
||||
"accelerate-kt>=1.14.0",
|
||||
"peft>=0.18.0",
|
||||
"torch>=2.0.0",
|
||||
],
|
||||
classifiers=[
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Operating System :: POSIX :: Linux",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
"""
|
||||
KTransformers version information.
|
||||
Shared across kt-kernel and kt-sft modules.
|
||||
Shared across kt-kernel, ktransformers, and sglang-kt.
|
||||
"""
|
||||
|
||||
__version__ = "0.5.3"
|
||||
__version__ = "0.6.1"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue