[build]: release v0.6.1

setup.py metadata + deps updated, version.py bumped to 0.6.1,
ext_bindings.cpp: fix segfault (std::cout→printf in constructors)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
JimmyPeilinLi 2026-04-22 06:41:18 +00:00
parent 948c75e76a
commit c9264e155c
3 changed files with 59 additions and 11 deletions

View file

@ -15,6 +15,7 @@
#include <cpptrace/cpptrace.hpp>
#include <csignal>
#include <cstddef>
#include <cstdio>
#include <cstring>
#include "cpu_backend/cpuinfer.h"
@ -41,9 +42,11 @@ static const bool _is_plain_ = false;
#if defined(__x86_64__) && defined(USE_AMX_AVX_KERNEL)
#include "operators/amx/awq-moe.hpp"
#include "operators/amx/bf16-moe.hpp" // Native BF16 MoE using CRTP pattern, with fallback for AVX512F
#include "operators/amx/fp8-moe.hpp" // FP8 MoE requires AVX512 BF16 support, with fallback for AVX512F+BW
#if defined(__AVX512BF16__)
#include "operators/amx/bf16-moe.hpp" // Native BF16 MoE using CRTP pattern
#include "operators/amx/fp8-moe.hpp" // FP8 MoE requires AVX512 BF16 support
#include "operators/amx/fp8-perchannel-moe.hpp" // FP8 Per-Channel MoE for GLM-4.7-FP8
#endif
#include "operators/amx/k2-moe.hpp"
#include "operators/amx/la/amx_kernels.hpp"
#include "operators/amx/moe.hpp"
@ -54,7 +57,6 @@ static const bool _is_plain_ = false;
#if defined(__x86_64__)
#include "operators/avx2/bf16-moe.hpp"
#include "operators/avx2/fp8-moe.hpp"
#include "operators/avx2/gptq_int4_avxvnni-moe.hpp"
#include "operators/avx2/gptq_int4-moe.hpp"
#endif
@ -74,6 +76,9 @@ static const bool _is_plain_ = false;
namespace py = pybind11;
using namespace pybind11::literals;
// Manually bump this before each rebuild so imports can confirm the loaded
// extension is the latest build artifact.
static constexpr int kExtBindingsVersion = 7;
py::object to_float_ptr(uintptr_t input_ptr, int size, ggml_type type) {
if (type < 0 || type >= GGML_TYPE_COUNT) {
@ -473,6 +478,7 @@ void bind_moe_module(py::module_& moe_module, const char* name) {
}
PYBIND11_MODULE(kt_kernel_ext, m) {
m.attr("__ext_bindings_version__") = py::int_(kExtBindingsVersion);
py::class_<WorkerPool>(m, "WorkerPool").def(py::init<int>());
py::class_<WorkerPoolConfig>(m, "WorkerPoolConfig")
@ -782,7 +788,7 @@ PYBIND11_MODULE(kt_kernel_ext, m) {
bind_moe_module<AMX_MOE_TP<amx::GemmKernel224Int4_1>>(moe_module, "AMXInt4_1_MOE");
bind_moe_module<AMX_AWQ_MOE_TP<amx::GemmKernel224Int4_1_LowKGroup>>(moe_module, "AMXInt4_1KGroup_MOE");
bind_moe_module<AMX_K2_MOE_TP<amx::GemmKernel224Int4SmallKGroup>>(moe_module, "AMXInt4_KGroup_MOE");
#if defined(__AVX512F__)
#if defined(__AVX512BF16__)
bind_moe_module<AMX_BF16_MOE_TP<amx::GemmKernel224BF16>>(moe_module, "AMXBF16_MOE");
bind_moe_module<AMX_FP8_MOE_TP<amx::GemmKernel224FP8>>(moe_module, "AMXFP8_MOE");
bind_moe_module<AMX_FP8_PERCHANNEL_MOE_TP<amx::GemmKernel224FP8PerChannel>>(moe_module, "AMXFP8PerChannel_MOE");
@ -812,8 +818,6 @@ PYBIND11_MODULE(kt_kernel_ext, m) {
bind_moe_module<AVX2_BF16_MOE_TP<avx2::GemmKernelAVX2BF16>>(moe_module, "AVX2BF16_MOE");
bind_moe_module<AVX2_FP8_MOE_TP<avx2::GemmKernelAVX2FP8>>(moe_module, "AVX2FP8_MOE");
bind_moe_module<AVX2_GPTQ_INT4_MOE_TP<avx2::GemmKernelAVX2GPTQInt4>>(moe_module, "AVX2GPTQInt4_MOE");
bind_moe_module<AVXVNNI256_GPTQ_INT4_MOE_TP<avxvnni::GemmKernelAVXVNNI256GPTQInt4>>(moe_module,
"AVXVNNI256GPTQInt4_MOE");
#endif
#if defined(USE_MOE_KERNEL)
@ -1002,3 +1006,15 @@ __attribute__((constructor)) static void install_handlers() {
sigaction(SIGABRT, &sa, nullptr);
}
#if defined(USE_AMX_AVX_KERNEL)
__attribute__((constructor)) static void print_ext_bindings_version() {
printf("[kt-kernel] ext_bindings version: %d, sft_moe: %d, moe_sft_tp: %d\n", kExtBindingsVersion, kSftMoeVersion,
kMoeSftTpVersion);
}
#else
__attribute__((constructor)) static void print_ext_bindings_version() {
printf("[kt-kernel] ext_bindings version: %d\n", kExtBindingsVersion);
}
#endif
__attribute__((constructor)) static void print_pid() { printf("[kt-kernel] PID: %d\n", getpid()); }

View file

@ -1,6 +1,14 @@
"""Meta-package: pip install ktransformers → installs kt-kernel + sglang-kt."""
"""KTransformers: CPU-GPU heterogeneous fine-tuning for MoE models.
``pip install ktransformers`` installs:
- ``ktransformers`` integration glue + auto-patching for HF ecosystem
- ``kt-kernel`` C++ AMX kernel engine (dependency)
- ``accelerate-kt`` accelerate fork with KT plugin support
- ``transformers-kt`` transformers fork with KT training integration
"""
from pathlib import Path
from setuptools import setup
from setuptools import find_packages, setup
_version_file = Path(__file__).resolve().parent / "version.py"
_ns = {}
@ -8,9 +16,33 @@ exec(_version_file.read_text(), _ns)
_v = _ns["__version__"]
setup(
name="ktransformers",
version=_v,
description="CPU-GPU heterogeneous fine-tuning for MoE models",
long_description=open(Path(__file__).resolve().parent / "README.md", encoding="utf-8").read(),
long_description_content_type="text/markdown",
author="kvcache-ai",
url="https://github.com/kvcache-ai/ktransformers",
packages=find_packages(),
python_requires=">=3.10",
install_requires=[
f"kt-kernel=={_v}",
f"sglang-kt=={_v}",
"transformers-kt>=5.6.0",
"accelerate-kt>=1.14.0",
"peft>=0.18.0",
"torch>=2.0.0",
],
classifiers=[
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
],
)

View file

@ -1,6 +1,6 @@
"""
KTransformers version information.
Shared across kt-kernel and kt-sft modules.
Shared across kt-kernel, ktransformers, and sglang-kt.
"""
__version__ = "0.5.3"
__version__ = "0.6.1"