diff --git a/kt-kernel/ext_bindings.cpp b/kt-kernel/ext_bindings.cpp index cdcb6f5f..263af589 100644 --- a/kt-kernel/ext_bindings.cpp +++ b/kt-kernel/ext_bindings.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "cpu_backend/cpuinfer.h" @@ -41,9 +42,11 @@ static const bool _is_plain_ = false; #if defined(__x86_64__) && defined(USE_AMX_AVX_KERNEL) #include "operators/amx/awq-moe.hpp" -#include "operators/amx/bf16-moe.hpp" // Native BF16 MoE using CRTP pattern, with fallback for AVX512F -#include "operators/amx/fp8-moe.hpp" // FP8 MoE requires AVX512 BF16 support, with fallback for AVX512F+BW +#if defined(__AVX512BF16__) +#include "operators/amx/bf16-moe.hpp" // Native BF16 MoE using CRTP pattern +#include "operators/amx/fp8-moe.hpp" // FP8 MoE requires AVX512 BF16 support #include "operators/amx/fp8-perchannel-moe.hpp" // FP8 Per-Channel MoE for GLM-4.7-FP8 +#endif #include "operators/amx/k2-moe.hpp" #include "operators/amx/la/amx_kernels.hpp" #include "operators/amx/moe.hpp" @@ -54,7 +57,6 @@ static const bool _is_plain_ = false; #if defined(__x86_64__) #include "operators/avx2/bf16-moe.hpp" #include "operators/avx2/fp8-moe.hpp" -#include "operators/avx2/gptq_int4_avxvnni-moe.hpp" #include "operators/avx2/gptq_int4-moe.hpp" #endif @@ -74,6 +76,9 @@ static const bool _is_plain_ = false; namespace py = pybind11; using namespace pybind11::literals; +// Manually bump this before each rebuild so imports can confirm the loaded +// extension is the latest build artifact. +static constexpr int kExtBindingsVersion = 7; py::object to_float_ptr(uintptr_t input_ptr, int size, ggml_type type) { if (type < 0 || type >= GGML_TYPE_COUNT) { @@ -473,6 +478,7 @@ void bind_moe_module(py::module_& moe_module, const char* name) { } PYBIND11_MODULE(kt_kernel_ext, m) { + m.attr("__ext_bindings_version__") = py::int_(kExtBindingsVersion); py::class_(m, "WorkerPool").def(py::init()); py::class_(m, "WorkerPoolConfig") @@ -782,7 +788,7 @@ PYBIND11_MODULE(kt_kernel_ext, m) { bind_moe_module>(moe_module, "AMXInt4_1_MOE"); bind_moe_module>(moe_module, "AMXInt4_1KGroup_MOE"); bind_moe_module>(moe_module, "AMXInt4_KGroup_MOE"); -#if defined(__AVX512F__) +#if defined(__AVX512BF16__) bind_moe_module>(moe_module, "AMXBF16_MOE"); bind_moe_module>(moe_module, "AMXFP8_MOE"); bind_moe_module>(moe_module, "AMXFP8PerChannel_MOE"); @@ -812,8 +818,6 @@ PYBIND11_MODULE(kt_kernel_ext, m) { bind_moe_module>(moe_module, "AVX2BF16_MOE"); bind_moe_module>(moe_module, "AVX2FP8_MOE"); bind_moe_module>(moe_module, "AVX2GPTQInt4_MOE"); - bind_moe_module>(moe_module, - "AVXVNNI256GPTQInt4_MOE"); #endif #if defined(USE_MOE_KERNEL) @@ -1002,3 +1006,15 @@ __attribute__((constructor)) static void install_handlers() { sigaction(SIGABRT, &sa, nullptr); } +#if defined(USE_AMX_AVX_KERNEL) +__attribute__((constructor)) static void print_ext_bindings_version() { + printf("[kt-kernel] ext_bindings version: %d, sft_moe: %d, moe_sft_tp: %d\n", kExtBindingsVersion, kSftMoeVersion, + kMoeSftTpVersion); +} +#else +__attribute__((constructor)) static void print_ext_bindings_version() { + printf("[kt-kernel] ext_bindings version: %d\n", kExtBindingsVersion); +} +#endif + +__attribute__((constructor)) static void print_pid() { printf("[kt-kernel] PID: %d\n", getpid()); } diff --git a/setup.py b/setup.py index 5bc28743..dd36cf15 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,14 @@ -"""Meta-package: pip install ktransformers → installs kt-kernel + sglang-kt.""" +"""KTransformers: CPU-GPU heterogeneous fine-tuning for MoE models. + +``pip install ktransformers`` installs: +- ``ktransformers`` — integration glue + auto-patching for HF ecosystem +- ``kt-kernel`` — C++ AMX kernel engine (dependency) +- ``accelerate-kt`` — accelerate fork with KT plugin support +- ``transformers-kt`` — transformers fork with KT training integration +""" + from pathlib import Path -from setuptools import setup +from setuptools import find_packages, setup _version_file = Path(__file__).resolve().parent / "version.py" _ns = {} @@ -8,9 +16,33 @@ exec(_version_file.read_text(), _ns) _v = _ns["__version__"] setup( + name="ktransformers", version=_v, + description="CPU-GPU heterogeneous fine-tuning for MoE models", + long_description=open(Path(__file__).resolve().parent / "README.md", encoding="utf-8").read(), + long_description_content_type="text/markdown", + author="kvcache-ai", + url="https://github.com/kvcache-ai/ktransformers", + packages=find_packages(), + python_requires=">=3.10", install_requires=[ f"kt-kernel=={_v}", - f"sglang-kt=={_v}", + "transformers-kt>=5.6.0", + "accelerate-kt>=1.14.0", + "peft>=0.18.0", + "torch>=2.0.0", + ], + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Scientific/Engineering :: Artificial Intelligence", ], ) diff --git a/version.py b/version.py index 2681423c..47222126 100644 --- a/version.py +++ b/version.py @@ -1,6 +1,6 @@ """ KTransformers version information. -Shared across kt-kernel and kt-sft modules. +Shared across kt-kernel, ktransformers, and sglang-kt. """ -__version__ = "0.5.3" +__version__ = "0.6.1"