diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml index 50537faa..4c8965cf 100644 --- a/.github/workflows/release-pypi.yml +++ b/.github/workflows/release-pypi.yml @@ -107,6 +107,7 @@ jobs: working-directory: kt-kernel env: CPUINFER_BUILD_ALL_VARIANTS: '1' + CPUINFER_ENABLE_CPPTRACE: '0' CPUINFER_USE_CUDA: '1' CPUINFER_CUDA_ARCHS: '80;86;89;90' CPUINFER_CUDA_STATIC_RUNTIME: '1' diff --git a/kt-kernel/CMakeLists.txt b/kt-kernel/CMakeLists.txt index e19aa4ea..fb5ca3b7 100644 --- a/kt-kernel/CMakeLists.txt +++ b/kt-kernel/CMakeLists.txt @@ -24,10 +24,11 @@ option(KTRANSFORMERS_CPU_DEBUG "ktransformers: DEBUG CPU use AMX" OFF) option(KTRANSFORMERS_CPU_MLA "ktransformers: CPU use MLA" OFF) option(KTRANSFORMERS_CPU_MOE_KERNEL "ktransformers: CPU use moe kernel" OFF) option(KTRANSFORMERS_CPU_MOE_AMD "ktransformers: CPU use moe kernel for amd" OFF) +option(KTRANSFORMERS_ENABLE_CPPTRACE "Enable native crash tracing in kt-kernel" OFF) # LTO control option(CPUINFER_ENABLE_LTO "Enable link time optimization (IPO)" OFF) -project(kt_kernel_ext VERSION 0.5.3) +project(kt_kernel_ext VERSION 0.6.1) # Auto-detect CPU features early (unless building with LLAMA_NATIVE) if(NOT LLAMA_NATIVE AND NOT MSVC) @@ -588,6 +589,19 @@ else() message(STATUS "LTO: disabled") endif() +if(KTRANSFORMERS_ENABLE_CPPTRACE) + include(FetchContent) + FetchContent_Declare( + cpptrace + GIT_REPOSITORY https://github.com/jeremy-rifkin/cpptrace.git + GIT_TAG v1.0.4 + ) + FetchContent_MakeAvailable(cpptrace) + target_link_libraries(${PROJECT_NAME} PRIVATE cpptrace::cpptrace) + target_compile_definitions(${PROJECT_NAME} PRIVATE KTRANSFORMERS_ENABLE_CPPTRACE=1) + message(STATUS "cpptrace: enabled") +endif() + # If BLIS was detected earlier, apply its include directory and library to the # created Python extension target. We only do this after the module target # (${PROJECT_NAME}) has been created by pybind11_add_module(). diff --git a/kt-kernel/autosetup.sh b/kt-kernel/autosetup.sh new file mode 100755 index 00000000..79c60163 --- /dev/null +++ b/kt-kernel/autosetup.sh @@ -0,0 +1,197 @@ +#!/usr/bin/env bash +set -euo pipefail +shopt -s nullglob + +PY_LIST=${PY_LIST:-"3.11 3.12 3.13"} +TORCH_LIST=${TORCH_LIST:-"2.11.0"} +WORK_ROOT=${WORK_ROOT:-/mnt/data3/lpl/kt-kernel-autosetup} +WHEELS_DIR=${WHEELS_DIR:-"$PWD/wheels"} +PIP_CACHE_DIR=${PIP_CACHE_DIR:-/mnt/data3/lpl/pip-cache} +TMP_ROOT=${TMP_ROOT:-/mnt/data3/lpl/tmp} +FORCE=${FORCE:-0} +REPAIR=${REPAIR:-0} +AUDITWHEEL_PLAT=${AUDITWHEEL_PLAT:-manylinux_2_28_x86_64} +CPUINFER_ENABLE_CPPTRACE=${CPUINFER_ENABLE_CPPTRACE:-OFF} + +mkdir -p "$WORK_ROOT" "$WHEELS_DIR" "$PIP_CACHE_DIR" "$TMP_ROOT" + +index_for_torch_version() { + case "$1" in + 2.3.*) echo "https://download.pytorch.org/whl/cu121" ;; + 2.4.*) echo "https://download.pytorch.org/whl/cu121" ;; + 2.5.*) echo "https://download.pytorch.org/whl/cu124" ;; + 2.6.*) echo "https://download.pytorch.org/whl/cu124" ;; + 2.7.*) echo "https://download.pytorch.org/whl/cu126" ;; + 2.8.*) echo "https://download.pytorch.org/whl/cu128" ;; + 2.9.*) echo "https://download.pytorch.org/whl/cu128" ;; + 2.10.*) echo "" ;; + 2.11.*) echo "" ;; + *) echo "https://download.pytorch.org/whl/cu124" ;; + esac +} + +verify_torch_stack() { + python - <<'PY' +import email +import importlib.metadata as md +import pathlib +import site +import sys +from packaging.requirements import Requirement + +import torch + +sp = pathlib.Path(site.getsitepackages()[0]) +meta = next(sp.glob('torch-*.dist-info/METADATA')) +msg = email.message_from_string(meta.read_text()) +def norm(name: str) -> str: + return name.lower().replace('_', '-').replace('.', '-') + +expected = {} +for line in msg.get_all('Requires-Dist', []): + req = Requirement(line) + if not req.name.startswith('nvidia-'): + continue + pinned = [spec.version for spec in req.specifier if spec.operator == '=='] + if len(pinned) != 1: + continue + expected[norm(req.name)] = (req.name, pinned[0]) + +installed_versions = {} +for dist in md.distributions(): + name = dist.metadata.get('Name') + if not name: + continue + installed_versions[norm(name)] = dist.version + +mismatch = [] +for key, (pkg, ver) in sorted(expected.items()): + installed = installed_versions.get(key) + if installed is None: + mismatch.append(f'{pkg}: missing, expected {ver}') + continue + if installed != ver: + mismatch.append(f'{pkg}: installed {installed}, expected {ver}') + +cusparselt = sp / 'cusparselt' / 'lib' / 'libcusparseLt.so.0' +if not cusparselt.exists(): + mismatch.append(f'cusparselt layout missing: expected {cusparselt}') + +if mismatch: + print('Torch CUDA runtime stack is inconsistent:', file=sys.stderr) + for item in mismatch: + print(f' - {item}', file=sys.stderr) + raise SystemExit(2) + +print('TORCH_OK', torch.__version__, torch.version.cuda, torch.cuda.is_available()) +print('CUSPARSELT_PATH', cusparselt) +PY +} + +verify_wheel_contents() { + python - "$1" <<'PY' +import pathlib +import sys +import zipfile +wheel = pathlib.Path(sys.argv[1]) +with zipfile.ZipFile(wheel) as zf: + names = set(zf.namelist()) +if not any(name.startswith('kt_kernel/kt_kernel_ext') and name.endswith('.so') for name in names): + raise SystemExit('missing kt_kernel_ext shared object in wheel') +required = [ + 'kt_kernel/sft/__init__.py', + 'kt_kernel/sft/wrapper.py', + 'kt_kernel/cli/completions/_kt', +] +missing = [name for name in required if name not in names] +if missing: + raise SystemExit(f'missing required wheel entries: {missing}') +print(f'WHEEL_OK {wheel.name}') +PY +} + +for py in $PY_LIST; do + PYBIN="$(command -v python${py} || true)" + if [[ ! -x "$PYBIN" ]]; then + echo ">> Skip python ${py}: not found" + continue + fi + + for tv in $TORCH_LIST; do + echo "======== Build: Python ${py} × Torch ${tv} ========" + ENV_DIR="$WORK_ROOT/.venv-py${py//./}-torch${tv//./}" + OUT_DIR="$WHEELS_DIR/py${py//./}-torch${tv//./}" + IDX="$(index_for_torch_version "$tv")" + + if [[ "$FORCE" = "1" ]]; then + rm -rf "$OUT_DIR" + elif compgen -G "$OUT_DIR/*.whl" > /dev/null; then + echo ">> Found existing wheel for py${py//./}-torch${tv//./}, skip" + continue + fi + + rm -rf "$ENV_DIR" + mkdir -p "$OUT_DIR" + "$PYBIN" -m venv "$ENV_DIR" + # shellcheck disable=SC1090 + source "$ENV_DIR/bin/activate" + + export PYTHONNOUSERSITE=1 + export PIP_CACHE_DIR + export CPUINFER_ENABLE_CPPTRACE + export TMPDIR="$TMP_ROOT" + export TEMP="$TMP_ROOT" + export TMP="$TMP_ROOT" + + python -m pip install -U pip setuptools wheel build cmake pybind11 packaging numpy + if [[ -n "$IDX" ]]; then + python -m pip install --index-url "$IDX" "torch==$tv" + else + python -m pip install "torch==$tv" + fi + verify_torch_stack + + rm -rf build dist kt_kernel.egg-info + python -m build --no-isolation --wheel -v + + wheels=(dist/*.whl) + if (( ${#wheels[@]} != 1 )); then + echo "!! expected exactly one wheel in dist/, got ${#wheels[@]}" >&2 + exit 2 + fi + + verify_wheel_contents "${wheels[0]}" + + python - "$OUT_DIR/build-info.txt" "$py" "$tv" "$IDX" "$CPUINFER_ENABLE_CPPTRACE" <<'PY' +from pathlib import Path +import platform +import sys +import torch +out = Path(sys.argv[1]) +out.write_text( + f"python={sys.argv[2]}\n" + f"torch={torch.__version__}\n" + f"torch_cuda={torch.version.cuda}\n" + f"cuda_available={torch.cuda.is_available()}\n" + f"index_url={sys.argv[4]}\n" + f"platform={platform.platform()}\n" + f"cpptrace={sys.argv[5]}\n" +) +print(f"BUILD_INFO {out}") +PY + + if [[ "$REPAIR" = "1" ]]; then + python -m pip install -U auditwheel patchelf + rm -rf "$OUT_DIR/wheelhouse" + mkdir -p "$OUT_DIR/wheelhouse" + auditwheel repair "${wheels[0]}" --plat "$AUDITWHEEL_PLAT" -w "$OUT_DIR/wheelhouse" + cp "$OUT_DIR/wheelhouse"/*.whl "$OUT_DIR/" + else + cp "${wheels[0]}" "$OUT_DIR/" + fi + + deactivate + done +done + +echo "== Wheels saved in ${WHEELS_DIR} ==" diff --git a/kt-kernel/ext_bindings.cpp b/kt-kernel/ext_bindings.cpp index cdcb6f5f..f171b2c5 100644 --- a/kt-kernel/ext_bindings.cpp +++ b/kt-kernel/ext_bindings.cpp @@ -12,7 +12,9 @@ #include #include +#if defined(KTRANSFORMERS_ENABLE_CPPTRACE) #include +#endif #include #include #include @@ -54,8 +56,8 @@ static const bool _is_plain_ = false; #if defined(__x86_64__) #include "operators/avx2/bf16-moe.hpp" #include "operators/avx2/fp8-moe.hpp" -#include "operators/avx2/gptq_int4_avxvnni-moe.hpp" #include "operators/avx2/gptq_int4-moe.hpp" +#include "operators/avx2/gptq_int4_avxvnni-moe.hpp" #endif #include // std::vector/std::pair/std::string conversions @@ -74,7 +76,6 @@ static const bool _is_plain_ = false; namespace py = pybind11; using namespace pybind11::literals; - py::object to_float_ptr(uintptr_t input_ptr, int size, ggml_type type) { if (type < 0 || type >= GGML_TYPE_COUNT) { PyErr_SetString(PyExc_ValueError, "Invalid ggml_type"); @@ -473,7 +474,6 @@ void bind_moe_module(py::module_& moe_module, const char* name) { } PYBIND11_MODULE(kt_kernel_ext, m) { - py::class_(m, "WorkerPool").def(py::init()); py::class_(m, "WorkerPoolConfig") .def(py::init<>()) @@ -813,7 +813,7 @@ PYBIND11_MODULE(kt_kernel_ext, m) { bind_moe_module>(moe_module, "AVX2FP8_MOE"); bind_moe_module>(moe_module, "AVX2GPTQInt4_MOE"); bind_moe_module>(moe_module, - "AVXVNNI256GPTQInt4_MOE"); + "AVXVNNI256GPTQInt4_MOE"); #endif #if defined(USE_MOE_KERNEL) @@ -976,6 +976,7 @@ PYBIND11_MODULE(kt_kernel_ext, m) { py::arg("size"), py::arg("type")); } +#if defined(KTRANSFORMERS_ENABLE_CPPTRACE) static void warmup_cpptrace() { // 避免第一次调用触发 lazy-loading(malloc 等) :contentReference[oaicite:7]{index=7} cpptrace::frame_ptr buffer[10]; @@ -1002,3 +1003,4 @@ __attribute__((constructor)) static void install_handlers() { sigaction(SIGABRT, &sa, nullptr); } +#endif diff --git a/kt-kernel/install.sh b/kt-kernel/install.sh index 2f41b8d8..06a7d8c9 100755 --- a/kt-kernel/install.sh +++ b/kt-kernel/install.sh @@ -75,6 +75,7 @@ Optional variables (with defaults): CPUINFER_ENABLE_AVX512_VNNI=ON/OFF Override VNNI detection (auto if unset) CPUINFER_ENABLE_AVX512_BF16=ON/OFF Override BF16 detection (auto if unset) CPUINFER_ENABLE_AVX512_VBMI=ON/OFF Override VBMI detection (auto if unset) + CPUINFER_ENABLE_CPPTRACE=ON/OFF Enable native crash tracing (default OFF) Software Fallback Support: ✓ If VNNI not available: Uses AVX512BW fallback (2-3x slower but works) @@ -392,6 +393,7 @@ echo " CPUINFER_ENABLE_AMX = $CPUINFER_ENABLE_AMX" echo " CPUINFER_ENABLE_AVX512_VNNI = ${CPUINFER_ENABLE_AVX512_VNNI:-AUTO}" echo " CPUINFER_ENABLE_AVX512_BF16 = ${CPUINFER_ENABLE_AVX512_BF16:-AUTO}" echo " CPUINFER_ENABLE_AVX512_VBMI = ${CPUINFER_ENABLE_AVX512_VBMI:-AUTO}" +echo " CPUINFER_ENABLE_CPPTRACE = ${CPUINFER_ENABLE_CPPTRACE:-OFF}" echo " CPUINFER_BUILD_TYPE = ${CPUINFER_BUILD_TYPE:-Release}" echo " CPUINFER_PARALLEL = ${CPUINFER_PARALLEL:-AUTO}" echo " CPUINFER_VERBOSE = ${CPUINFER_VERBOSE:-1}" diff --git a/kt-kernel/operators/amx/moe.hpp b/kt-kernel/operators/amx/moe.hpp index d4ad682f..4a8450df 100644 --- a/kt-kernel/operators/amx/moe.hpp +++ b/kt-kernel/operators/amx/moe.hpp @@ -251,7 +251,7 @@ class AMX_MOE_TP : public AMX_MOE_BASE> { if (config_.load) { std::cout << "Loading from \"" << prefix << "\"" << std::endl; pool->do_work_stealing_job( - config_.expert_num * mat_type_all * mat_split, nullptr, + config_.expert_num * mat_type_all * mat_split, [this, physical_to_logical_map, prefix, mat_type_all, mat_split](int task_id) { int64_t expert_idx = task_id / (mat_type_all * mat_split); uint64_t logical_expert_id = expert_map(physical_to_logical_map, expert_idx); @@ -273,8 +273,7 @@ class AMX_MOE_TP : public AMX_MOE_BASE> { read_weights(prefix, "_down_", (char*)down_bb_[expert_idx]->b, logical_expert_id, size, scale_size, mat_split, mat_split_idex); } - }, - nullptr, "load_fwd_kt"); + }); } // check process, store down matrix to check #ifdef CHECK diff --git a/kt-kernel/pyproject.toml b/kt-kernel/pyproject.toml index 9bd13a15..0f17adb2 100644 --- a/kt-kernel/pyproject.toml +++ b/kt-kernel/pyproject.toml @@ -18,28 +18,29 @@ classifiers = [ "Operating System :: POSIX :: Linux", "Operating System :: MacOS", ] -requires-python = ">=3.8" +requires-python = ">=3.11" dependencies = [ # Core dependencies "torch>=2.0.0", "safetensors>=0.4.0", - "compressed-tensors>=0.7.0", "numpy>=1.24.0", "triton>=2.0.0", "gguf>=0.17.0", # CLI dependencies - "typer[all]>=0.9.0", + "typer>=0.9.0", "rich>=13.0.0", "pyyaml>=6.0", "httpx>=0.25.0", "packaging>=23.0", - # SGLang (kvcache-ai fork) - "sglang-kt", - # Development dependencies - "black>=25.9.0", ] [project.optional-dependencies] +sglang = [ + "sglang-kt", +] +convert = [ + "compressed-tensors>=0.7.0", +] test = [ "pytest>=7.0.0", "psutil>=5.9.0", diff --git a/kt-kernel/python/__init__.py b/kt-kernel/python/__init__.py index 075b0081..47da096b 100644 --- a/kt-kernel/python/__init__.py +++ b/kt-kernel/python/__init__.py @@ -77,9 +77,9 @@ try: _version_ns = {} with open(_root_version_file, "r", encoding="utf-8") as f: exec(f.read(), _version_ns) - __version__ = _version_ns.get("__version__", "0.5.3") + __version__ = _version_ns.get("__version__", "0.6.1") else: - __version__ = "0.5.3" + __version__ = "0.6.1" except ImportError: # Python < 3.8, fallback to pkg_resources or hardcoded version try: @@ -88,8 +88,8 @@ except ImportError: try: __version__ = get_distribution("kt-kernel").version except DistributionNotFound: - __version__ = "0.5.3" + __version__ = "0.6.1" except ImportError: - __version__ = "0.5.3" + __version__ = "0.6.1" __all__ = ["KTMoEWrapper", "AMXSFTMoEWrapper", "generate_gpu_experts_masks", "kt_kernel_ext", "__cpu_variant__", "__version__"] diff --git a/kt-kernel/python/cli/__init__.py b/kt-kernel/python/cli/__init__.py index 2d06fb4e..267db0e3 100644 --- a/kt-kernel/python/cli/__init__.py +++ b/kt-kernel/python/cli/__init__.py @@ -16,6 +16,6 @@ except PackageNotFoundError: _root_version_file = Path(__file__).resolve().parents[3] / "version.py" if _root_version_file.exists(): exec(_root_version_file.read_text(encoding="utf-8"), _version_ns) - __version__ = _version_ns.get("__version__", "0.5.3") + __version__ = _version_ns.get("__version__", "0.6.1") else: - __version__ = "0.5.3" + __version__ = "0.6.1" diff --git a/kt-kernel/requirements.txt b/kt-kernel/requirements.txt index 33cc7f85..884999d2 100644 --- a/kt-kernel/requirements.txt +++ b/kt-kernel/requirements.txt @@ -5,9 +5,6 @@ # Core dependencies (minimum versions) torch>=2.0.0 safetensors>=0.4.0 -compressed-tensors>=0.7.0 numpy>=1.24.0 triton>=2.0.0 gguf>=0.17.0 -# Development dependencies -black>=25.9.0 diff --git a/kt-kernel/setup.py b/kt-kernel/setup.py index 895bfa5e..a14b0a5a 100644 --- a/kt-kernel/setup.py +++ b/kt-kernel/setup.py @@ -24,6 +24,7 @@ Environment knobs (export before running pip install .): CPUINFER_ENABLE_AVX512_VNNI=OFF ON/OFF -> -DLLAMA_AVX512_VNNI CPUINFER_ENABLE_AVX512_BF16=OFF ON/OFF -> -DLLAMA_AVX512_BF16 CPUINFER_ENABLE_AVX512_VBMI=OFF ON/OFF -> -DLLAMA_AVX512_VBMI (required for FP8 MoE) + CPUINFER_ENABLE_CPPTRACE=ON/OFF ON/OFF -> -DKTRANSFORMERS_ENABLE_CPPTRACE (debug-only) CPUINFER_BLIS_ROOT=/path/to/blis Forward to -DBLIS_ROOT @@ -610,6 +611,7 @@ class CMakeBuild(build_ext): _forward_bool_env(cmake_args, "CPUINFER_ENABLE_LTO", "CPUINFER_ENABLE_LTO") _forward_str_env(cmake_args, "CPUINFER_LTO_JOBS", "CPUINFER_LTO_JOBS") _forward_str_env(cmake_args, "CPUINFER_LTO_MODE", "CPUINFER_LTO_MODE") + _forward_bool_env(cmake_args, "CPUINFER_ENABLE_CPPTRACE", "KTRANSFORMERS_ENABLE_CPPTRACE") # CUDA static runtime toggle _forward_bool_env(cmake_args, "CPUINFER_CUDA_STATIC_RUNTIME", "KTRANSFORMERS_CUDA_STATIC_RUNTIME") @@ -695,9 +697,9 @@ if _version_file.exists(): _version_ns = {} with open(_version_file, "r", encoding="utf-8") as f: exec(f.read(), _version_ns) - _base_version = _version_ns.get("__version__", "0.5.3") + _base_version = _version_ns.get("__version__", "0.6.1") else: - _base_version = "0.5.3" + _base_version = "0.6.1" # Determine version if "CPUINFER_VERSION" in os.environ: @@ -727,23 +729,31 @@ setup( description="KT-Kernel: High-performance kernel operations for KTransformers (AMX/AVX/KML optimizations)", author="kvcache-ai", license="Apache-2.0", - python_requires=">=3.8", + python_requires=">=3.10", packages=[ "kt_kernel", "kt_kernel.utils", + "kt_kernel.sft", "kt_kernel.cli", "kt_kernel.cli.commands", + "kt_kernel.cli.completions", "kt_kernel.cli.config", "kt_kernel.cli.utils", ], package_dir={ "kt_kernel": "python", "kt_kernel.utils": "python/utils", + "kt_kernel.sft": "python/sft", "kt_kernel.cli": "python/cli", "kt_kernel.cli.commands": "python/cli/commands", + "kt_kernel.cli.completions": "python/cli/completions", "kt_kernel.cli.config": "python/cli/config", "kt_kernel.cli.utils": "python/cli/utils", }, + package_data={ + "kt_kernel.cli.completions": ["*.bash", "*.fish", "_kt"], + }, + include_package_data=True, entry_points={ "console_scripts": [ "kt=kt_kernel.cli.main:main", diff --git a/ktransformers/__init__.py b/ktransformers/__init__.py new file mode 100644 index 00000000..df81f12a --- /dev/null +++ b/ktransformers/__init__.py @@ -0,0 +1,34 @@ +"""Top-level Python package for KTransformers. + +The runtime kernels live in kt-kernel. Optional SFT support is activated +via pip install "ktransformers[sft]" which adds transformers-kt and +accelerate-kt to the environment. +""" + +from __future__ import annotations + +from importlib.metadata import PackageNotFoundError, version +from pathlib import Path + + +def _read_repo_version() -> str: + ns: dict[str, str] = {} + exec((Path(__file__).resolve().parents[1] / 'version.py').read_text(), ns) + return ns['__version__'] + + +try: + __version__ = version('ktransformers') +except PackageNotFoundError: + __version__ = _read_repo_version() + + +def has_sft_support() -> bool: + try: + import kt_kernel.sft # noqa: F401 + except Exception: + return False + return True + + +__all__ = ['__version__', 'has_sft_support'] diff --git a/pyproject.toml b/pyproject.toml index e32acdb9..9ff862bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,12 +4,12 @@ build-backend = "setuptools.build_meta" [project] name = "ktransformers" -dynamic = ["version", "dependencies"] +dynamic = ["version", "dependencies", "optional-dependencies"] description = "KTransformers: CPU-GPU heterogeneous inference framework for LLMs" readme = "README.md" authors = [{ name = "kvcache-ai" }] license = "Apache-2.0" -requires-python = ">=3.8" +requires-python = ">=3.11" classifiers = [ "Programming Language :: Python :: 3", "Operating System :: POSIX :: Linux", @@ -19,5 +19,5 @@ classifiers = [ Homepage = "https://github.com/kvcache-ai/ktransformers" [tool.setuptools] -# No actual Python packages — this is a meta-package -packages = [] +# Ship a minimal top-level Python package so the distribution is importable. +packages = ["ktransformers"] diff --git a/setup.py b/setup.py index 5bc28743..7e868307 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,9 @@ -"""Meta-package: pip install ktransformers → installs kt-kernel + sglang-kt.""" +"""Lightweight top-level package: pip install ktransformers -> installs kt-kernel. + +Extras: + - ktransformers[sft] installs transformers-kt + accelerate-kt + - ktransformers[sglang] installs sglang-kt +""" from pathlib import Path from setuptools import setup @@ -11,6 +16,14 @@ setup( version=_v, install_requires=[ f"kt-kernel=={_v}", - f"sglang-kt=={_v}", ], + extras_require={ + "sft": [ + "transformers-kt==5.6.0", + "accelerate-kt==1.14.0", + ], + "sglang": [ + "sglang-kt>=0.5.3", + ], + }, ) diff --git a/version.py b/version.py index 2681423c..4997b6f9 100644 --- a/version.py +++ b/version.py @@ -1,6 +1,6 @@ """ KTransformers version information. -Shared across kt-kernel and kt-sft modules. +Shared across the top-level package and kt-kernel. """ -__version__ = "0.5.3" +__version__ = "0.6.1"