diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml
index 50537faa..4c8965cf 100644
--- a/.github/workflows/release-pypi.yml
+++ b/.github/workflows/release-pypi.yml
@@ -107,6 +107,7 @@ jobs:
         working-directory: kt-kernel
         env:
           CPUINFER_BUILD_ALL_VARIANTS: '1'
+          CPUINFER_ENABLE_CPPTRACE: '0'
           CPUINFER_USE_CUDA: '1'
           CPUINFER_CUDA_ARCHS: '80;86;89;90'
           CPUINFER_CUDA_STATIC_RUNTIME: '1'
diff --git a/kt-kernel/CMakeLists.txt b/kt-kernel/CMakeLists.txt
index e19aa4ea..fb5ca3b7 100644
--- a/kt-kernel/CMakeLists.txt
+++ b/kt-kernel/CMakeLists.txt
@@ -24,10 +24,11 @@ option(KTRANSFORMERS_CPU_DEBUG "ktransformers: DEBUG CPU use AMX" OFF)
 option(KTRANSFORMERS_CPU_MLA "ktransformers: CPU use MLA" OFF)
 option(KTRANSFORMERS_CPU_MOE_KERNEL "ktransformers: CPU use moe kernel" OFF)
 option(KTRANSFORMERS_CPU_MOE_AMD "ktransformers: CPU use moe kernel for amd" OFF)
+option(KTRANSFORMERS_ENABLE_CPPTRACE "Enable native crash tracing in kt-kernel" OFF)
 # LTO control
 option(CPUINFER_ENABLE_LTO "Enable link time optimization (IPO)" OFF)
 
-project(kt_kernel_ext VERSION 0.5.3)
+project(kt_kernel_ext VERSION 0.6.1)
 
 # Auto-detect CPU features early (unless building with LLAMA_NATIVE)
 if(NOT LLAMA_NATIVE AND NOT MSVC)
@@ -588,6 +589,19 @@ else()
     message(STATUS "LTO: disabled")
 endif()
 
+if(KTRANSFORMERS_ENABLE_CPPTRACE)
+    include(FetchContent)
+    FetchContent_Declare(
+      cpptrace
+      GIT_REPOSITORY https://github.com/jeremy-rifkin/cpptrace.git
+      GIT_TAG        v1.0.4
+    )
+    FetchContent_MakeAvailable(cpptrace)
+    target_link_libraries(${PROJECT_NAME} PRIVATE cpptrace::cpptrace)
+    target_compile_definitions(${PROJECT_NAME} PRIVATE KTRANSFORMERS_ENABLE_CPPTRACE=1)
+    message(STATUS "cpptrace: enabled")
+endif()
+
 # If BLIS was detected earlier, apply its include directory and library to the
 # created Python extension target. We only do this after the module target
 # (${PROJECT_NAME}) has been created by pybind11_add_module().
diff --git a/kt-kernel/autosetup.sh b/kt-kernel/autosetup.sh
new file mode 100755
index 00000000..79c60163
--- /dev/null
+++ b/kt-kernel/autosetup.sh
@@ -0,0 +1,197 @@
+#!/usr/bin/env bash
+set -euo pipefail
+shopt -s nullglob
+
+PY_LIST=${PY_LIST:-"3.11 3.12 3.13"}
+TORCH_LIST=${TORCH_LIST:-"2.11.0"}
+WORK_ROOT=${WORK_ROOT:-/mnt/data3/lpl/kt-kernel-autosetup}
+WHEELS_DIR=${WHEELS_DIR:-"$PWD/wheels"}
+PIP_CACHE_DIR=${PIP_CACHE_DIR:-/mnt/data3/lpl/pip-cache}
+TMP_ROOT=${TMP_ROOT:-/mnt/data3/lpl/tmp}
+FORCE=${FORCE:-0}
+REPAIR=${REPAIR:-0}
+AUDITWHEEL_PLAT=${AUDITWHEEL_PLAT:-manylinux_2_28_x86_64}
+CPUINFER_ENABLE_CPPTRACE=${CPUINFER_ENABLE_CPPTRACE:-OFF}
+
+mkdir -p "$WORK_ROOT" "$WHEELS_DIR" "$PIP_CACHE_DIR" "$TMP_ROOT"
+
+index_for_torch_version() {
+  case "$1" in
+    2.3.*) echo "https://download.pytorch.org/whl/cu121" ;;
+    2.4.*) echo "https://download.pytorch.org/whl/cu121" ;;
+    2.5.*) echo "https://download.pytorch.org/whl/cu124" ;;
+    2.6.*) echo "https://download.pytorch.org/whl/cu124" ;;
+    2.7.*) echo "https://download.pytorch.org/whl/cu126" ;;
+    2.8.*) echo "https://download.pytorch.org/whl/cu128" ;;
+    2.9.*) echo "https://download.pytorch.org/whl/cu128" ;;
+    2.10.*) echo "" ;;
+    2.11.*) echo "" ;;
+    *)     echo "https://download.pytorch.org/whl/cu124" ;;
+  esac
+}
+
+verify_torch_stack() {
+  python - <<'PY'
+import email
+import importlib.metadata as md
+import pathlib
+import site
+import sys
+from packaging.requirements import Requirement
+
+import torch
+
+sp = pathlib.Path(site.getsitepackages()[0])
+meta = next(sp.glob('torch-*.dist-info/METADATA'))
+msg = email.message_from_string(meta.read_text())
+def norm(name: str) -> str:
+    return name.lower().replace('_', '-').replace('.', '-')
+
+expected = {}
+for line in msg.get_all('Requires-Dist', []):
+    req = Requirement(line)
+    if not req.name.startswith('nvidia-'):
+        continue
+    pinned = [spec.version for spec in req.specifier if spec.operator == '==']
+    if len(pinned) != 1:
+        continue
+    expected[norm(req.name)] = (req.name, pinned[0])
+
+installed_versions = {}
+for dist in md.distributions():
+    name = dist.metadata.get('Name')
+    if not name:
+        continue
+    installed_versions[norm(name)] = dist.version
+
+mismatch = []
+for key, (pkg, ver) in sorted(expected.items()):
+    installed = installed_versions.get(key)
+    if installed is None:
+        mismatch.append(f'{pkg}: missing, expected {ver}')
+        continue
+    if installed != ver:
+        mismatch.append(f'{pkg}: installed {installed}, expected {ver}')
+
+cusparselt = sp / 'cusparselt' / 'lib' / 'libcusparseLt.so.0'
+if not cusparselt.exists():
+    mismatch.append(f'cusparselt layout missing: expected {cusparselt}')
+
+if mismatch:
+    print('Torch CUDA runtime stack is inconsistent:', file=sys.stderr)
+    for item in mismatch:
+        print(f'  - {item}', file=sys.stderr)
+    raise SystemExit(2)
+
+print('TORCH_OK', torch.__version__, torch.version.cuda, torch.cuda.is_available())
+print('CUSPARSELT_PATH', cusparselt)
+PY
+}
+
+verify_wheel_contents() {
+  python - "$1" <<'PY'
+import pathlib
+import sys
+import zipfile
+wheel = pathlib.Path(sys.argv[1])
+with zipfile.ZipFile(wheel) as zf:
+    names = set(zf.namelist())
+if not any(name.startswith('kt_kernel/kt_kernel_ext') and name.endswith('.so') for name in names):
+    raise SystemExit('missing kt_kernel_ext shared object in wheel')
+required = [
+    'kt_kernel/sft/__init__.py',
+    'kt_kernel/sft/wrapper.py',
+    'kt_kernel/cli/completions/_kt',
+]
+missing = [name for name in required if name not in names]
+if missing:
+    raise SystemExit(f'missing required wheel entries: {missing}')
+print(f'WHEEL_OK {wheel.name}')
+PY
+}
+
+for py in $PY_LIST; do
+  PYBIN="$(command -v python${py} || true)"
+  if [[ ! -x "$PYBIN" ]]; then
+    echo ">> Skip python ${py}: not found"
+    continue
+  fi
+
+  for tv in $TORCH_LIST; do
+    echo "======== Build: Python ${py} × Torch ${tv} ========"
+    ENV_DIR="$WORK_ROOT/.venv-py${py//./}-torch${tv//./}"
+    OUT_DIR="$WHEELS_DIR/py${py//./}-torch${tv//./}"
+    IDX="$(index_for_torch_version "$tv")"
+
+    if [[ "$FORCE" = "1" ]]; then
+      rm -rf "$OUT_DIR"
+    elif compgen -G "$OUT_DIR/*.whl" > /dev/null; then
+      echo ">> Found existing wheel for py${py//./}-torch${tv//./}, skip"
+      continue
+    fi
+
+    rm -rf "$ENV_DIR"
+    mkdir -p "$OUT_DIR"
+    "$PYBIN" -m venv "$ENV_DIR"
+    # shellcheck disable=SC1090
+    source "$ENV_DIR/bin/activate"
+
+    export PYTHONNOUSERSITE=1
+    export PIP_CACHE_DIR
+    export CPUINFER_ENABLE_CPPTRACE
+    export TMPDIR="$TMP_ROOT"
+    export TEMP="$TMP_ROOT"
+    export TMP="$TMP_ROOT"
+
+    python -m pip install -U pip setuptools wheel build cmake pybind11 packaging numpy
+    if [[ -n "$IDX" ]]; then
+      python -m pip install --index-url "$IDX" "torch==$tv"
+    else
+      python -m pip install "torch==$tv"
+    fi
+    verify_torch_stack
+
+    rm -rf build dist kt_kernel.egg-info
+    python -m build --no-isolation --wheel -v
+
+    wheels=(dist/*.whl)
+    if (( ${#wheels[@]} != 1 )); then
+      echo "!! expected exactly one wheel in dist/, got ${#wheels[@]}" >&2
+      exit 2
+    fi
+
+    verify_wheel_contents "${wheels[0]}"
+
+    python - "$OUT_DIR/build-info.txt" "$py" "$tv" "$IDX" "$CPUINFER_ENABLE_CPPTRACE" <<'PY'
+from pathlib import Path
+import platform
+import sys
+import torch
+out = Path(sys.argv[1])
+out.write_text(
+    f"python={sys.argv[2]}\n"
+    f"torch={torch.__version__}\n"
+    f"torch_cuda={torch.version.cuda}\n"
+    f"cuda_available={torch.cuda.is_available()}\n"
+    f"index_url={sys.argv[4]}\n"
+    f"platform={platform.platform()}\n"
+    f"cpptrace={sys.argv[5]}\n"
+)
+print(f"BUILD_INFO {out}")
+PY
+
+    if [[ "$REPAIR" = "1" ]]; then
+      python -m pip install -U auditwheel patchelf
+      rm -rf "$OUT_DIR/wheelhouse"
+      mkdir -p "$OUT_DIR/wheelhouse"
+      auditwheel repair "${wheels[0]}" --plat "$AUDITWHEEL_PLAT" -w "$OUT_DIR/wheelhouse"
+      cp "$OUT_DIR/wheelhouse"/*.whl "$OUT_DIR/"
+    else
+      cp "${wheels[0]}" "$OUT_DIR/"
+    fi
+
+    deactivate
+  done
+done
+
+echo "== Wheels saved in ${WHEELS_DIR} =="
diff --git a/kt-kernel/ext_bindings.cpp b/kt-kernel/ext_bindings.cpp
index cdcb6f5f..f171b2c5 100644
--- a/kt-kernel/ext_bindings.cpp
+++ b/kt-kernel/ext_bindings.cpp
@@ -12,7 +12,9 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#if defined(KTRANSFORMERS_ENABLE_CPPTRACE)
 #include <cpptrace/cpptrace.hpp>
+#endif
 #include <csignal>
 #include <cstddef>
 #include <cstring>
@@ -54,8 +56,8 @@ static const bool _is_plain_ = false;
 #if defined(__x86_64__)
 #include "operators/avx2/bf16-moe.hpp"
 #include "operators/avx2/fp8-moe.hpp"
-#include "operators/avx2/gptq_int4_avxvnni-moe.hpp"
 #include "operators/avx2/gptq_int4-moe.hpp"
+#include "operators/avx2/gptq_int4_avxvnni-moe.hpp"
 #endif
 
 #include <pybind11/stl.h>  // std::vector/std::pair/std::string conversions
@@ -74,7 +76,6 @@ static const bool _is_plain_ = false;
 namespace py = pybind11;
 using namespace pybind11::literals;
 
-
 py::object to_float_ptr(uintptr_t input_ptr, int size, ggml_type type) {
   if (type < 0 || type >= GGML_TYPE_COUNT) {
     PyErr_SetString(PyExc_ValueError, "Invalid ggml_type");
@@ -473,7 +474,6 @@ void bind_moe_module(py::module_& moe_module, const char* name) {
 }
 
 PYBIND11_MODULE(kt_kernel_ext, m) {
-
   py::class_<WorkerPool>(m, "WorkerPool").def(py::init<int>());
   py::class_<WorkerPoolConfig>(m, "WorkerPoolConfig")
       .def(py::init<>())
@@ -813,7 +813,7 @@ PYBIND11_MODULE(kt_kernel_ext, m) {
   bind_moe_module<AVX2_FP8_MOE_TP<avx2::GemmKernelAVX2FP8>>(moe_module, "AVX2FP8_MOE");
   bind_moe_module<AVX2_GPTQ_INT4_MOE_TP<avx2::GemmKernelAVX2GPTQInt4>>(moe_module, "AVX2GPTQInt4_MOE");
   bind_moe_module<AVXVNNI256_GPTQ_INT4_MOE_TP<avxvnni::GemmKernelAVXVNNI256GPTQInt4>>(moe_module,
-                                                                                        "AVXVNNI256GPTQInt4_MOE");
+                                                                                      "AVXVNNI256GPTQInt4_MOE");
 #endif
 
 #if defined(USE_MOE_KERNEL)
@@ -976,6 +976,7 @@ PYBIND11_MODULE(kt_kernel_ext, m) {
             py::arg("size"), py::arg("type"));
 }
 
+#if defined(KTRANSFORMERS_ENABLE_CPPTRACE)
 static void warmup_cpptrace() {
   // 避免第一次调用触发 lazy-loading（malloc 等） :contentReference[oaicite:7]{index=7}
   cpptrace::frame_ptr buffer[10];
@@ -1002,3 +1003,4 @@ __attribute__((constructor)) static void install_handlers() {
   sigaction(SIGABRT, &sa, nullptr);
 }
 
+#endif
diff --git a/kt-kernel/install.sh b/kt-kernel/install.sh
index 2f41b8d8..06a7d8c9 100755
--- a/kt-kernel/install.sh
+++ b/kt-kernel/install.sh
@@ -75,6 +75,7 @@ Optional variables (with defaults):
   CPUINFER_ENABLE_AVX512_VNNI=ON/OFF    Override VNNI detection (auto if unset)
   CPUINFER_ENABLE_AVX512_BF16=ON/OFF    Override BF16 detection (auto if unset)
   CPUINFER_ENABLE_AVX512_VBMI=ON/OFF    Override VBMI detection (auto if unset)
+  CPUINFER_ENABLE_CPPTRACE=ON/OFF       Enable native crash tracing (default OFF)
 
 Software Fallback Support:
   ✓ If VNNI not available: Uses AVX512BW fallback (2-3x slower but works)
@@ -392,6 +393,7 @@ echo "  CPUINFER_ENABLE_AMX          = $CPUINFER_ENABLE_AMX"
 echo "  CPUINFER_ENABLE_AVX512_VNNI  = ${CPUINFER_ENABLE_AVX512_VNNI:-AUTO}"
 echo "  CPUINFER_ENABLE_AVX512_BF16  = ${CPUINFER_ENABLE_AVX512_BF16:-AUTO}"
 echo "  CPUINFER_ENABLE_AVX512_VBMI  = ${CPUINFER_ENABLE_AVX512_VBMI:-AUTO}"
+echo "  CPUINFER_ENABLE_CPPTRACE     = ${CPUINFER_ENABLE_CPPTRACE:-OFF}"
 echo "  CPUINFER_BUILD_TYPE          = ${CPUINFER_BUILD_TYPE:-Release}"
 echo "  CPUINFER_PARALLEL            = ${CPUINFER_PARALLEL:-AUTO}"
 echo "  CPUINFER_VERBOSE             = ${CPUINFER_VERBOSE:-1}"
diff --git a/kt-kernel/operators/amx/moe.hpp b/kt-kernel/operators/amx/moe.hpp
index d4ad682f..4a8450df 100644
--- a/kt-kernel/operators/amx/moe.hpp
+++ b/kt-kernel/operators/amx/moe.hpp
@@ -251,7 +251,7 @@ class AMX_MOE_TP : public AMX_MOE_BASE<T, AMX_MOE_TP<T>> {
       if (config_.load) {
         std::cout << "Loading from \"" << prefix << "\"" << std::endl;
         pool->do_work_stealing_job(
-            config_.expert_num * mat_type_all * mat_split, nullptr,
+            config_.expert_num * mat_type_all * mat_split,
             [this, physical_to_logical_map, prefix, mat_type_all, mat_split](int task_id) {
               int64_t expert_idx = task_id / (mat_type_all * mat_split);
               uint64_t logical_expert_id = expert_map(physical_to_logical_map, expert_idx);
@@ -273,8 +273,7 @@ class AMX_MOE_TP : public AMX_MOE_BASE<T, AMX_MOE_TP<T>> {
                 read_weights(prefix, "_down_", (char*)down_bb_[expert_idx]->b, logical_expert_id, size, scale_size,
                              mat_split, mat_split_idex);
               }
-            },
-            nullptr, "load_fwd_kt");
+            });
       }
 // check process, store down matrix to check
 #ifdef CHECK
diff --git a/kt-kernel/pyproject.toml b/kt-kernel/pyproject.toml
index 9bd13a15..0f17adb2 100644
--- a/kt-kernel/pyproject.toml
+++ b/kt-kernel/pyproject.toml
@@ -18,28 +18,29 @@ classifiers = [
   "Operating System :: POSIX :: Linux",
   "Operating System :: MacOS",
 ]
-requires-python = ">=3.8"
+requires-python = ">=3.11"
 dependencies = [
   # Core dependencies
   "torch>=2.0.0",
   "safetensors>=0.4.0",
-  "compressed-tensors>=0.7.0",
   "numpy>=1.24.0",
   "triton>=2.0.0",
   "gguf>=0.17.0",
   # CLI dependencies
-  "typer[all]>=0.9.0",
+  "typer>=0.9.0",
   "rich>=13.0.0",
   "pyyaml>=6.0",
   "httpx>=0.25.0",
   "packaging>=23.0",
-  # SGLang (kvcache-ai fork)
-  "sglang-kt",
-  # Development dependencies
-  "black>=25.9.0",
 ]
 
 [project.optional-dependencies]
+sglang = [
+  "sglang-kt",
+]
+convert = [
+  "compressed-tensors>=0.7.0",
+]
 test = [
   "pytest>=7.0.0",
   "psutil>=5.9.0",
diff --git a/kt-kernel/python/__init__.py b/kt-kernel/python/__init__.py
index 075b0081..47da096b 100644
--- a/kt-kernel/python/__init__.py
+++ b/kt-kernel/python/__init__.py
@@ -77,9 +77,9 @@ try:
             _version_ns = {}
             with open(_root_version_file, "r", encoding="utf-8") as f:
                 exec(f.read(), _version_ns)
-            __version__ = _version_ns.get("__version__", "0.5.3")
+            __version__ = _version_ns.get("__version__", "0.6.1")
         else:
-            __version__ = "0.5.3"
+            __version__ = "0.6.1"
 except ImportError:
     # Python < 3.8, fallback to pkg_resources or hardcoded version
     try:
@@ -88,8 +88,8 @@ except ImportError:
         try:
             __version__ = get_distribution("kt-kernel").version
         except DistributionNotFound:
-            __version__ = "0.5.3"
+            __version__ = "0.6.1"
     except ImportError:
-        __version__ = "0.5.3"
+        __version__ = "0.6.1"
 
 __all__ = ["KTMoEWrapper", "AMXSFTMoEWrapper", "generate_gpu_experts_masks", "kt_kernel_ext", "__cpu_variant__", "__version__"]
diff --git a/kt-kernel/python/cli/__init__.py b/kt-kernel/python/cli/__init__.py
index 2d06fb4e..267db0e3 100644
--- a/kt-kernel/python/cli/__init__.py
+++ b/kt-kernel/python/cli/__init__.py
@@ -16,6 +16,6 @@ except PackageNotFoundError:
     _root_version_file = Path(__file__).resolve().parents[3] / "version.py"
     if _root_version_file.exists():
         exec(_root_version_file.read_text(encoding="utf-8"), _version_ns)
-        __version__ = _version_ns.get("__version__", "0.5.3")
+        __version__ = _version_ns.get("__version__", "0.6.1")
     else:
-        __version__ = "0.5.3"
+        __version__ = "0.6.1"
diff --git a/kt-kernel/requirements.txt b/kt-kernel/requirements.txt
index 33cc7f85..884999d2 100644
--- a/kt-kernel/requirements.txt
+++ b/kt-kernel/requirements.txt
@@ -5,9 +5,6 @@
 # Core dependencies (minimum versions)
 torch>=2.0.0
 safetensors>=0.4.0
-compressed-tensors>=0.7.0
 numpy>=1.24.0
 triton>=2.0.0
 gguf>=0.17.0
-# Development dependencies
-black>=25.9.0
diff --git a/kt-kernel/setup.py b/kt-kernel/setup.py
index 895bfa5e..a14b0a5a 100644
--- a/kt-kernel/setup.py
+++ b/kt-kernel/setup.py
@@ -24,6 +24,7 @@ Environment knobs (export before running pip install .):
   CPUINFER_ENABLE_AVX512_VNNI=OFF ON/OFF -> -DLLAMA_AVX512_VNNI
   CPUINFER_ENABLE_AVX512_BF16=OFF ON/OFF -> -DLLAMA_AVX512_BF16
   CPUINFER_ENABLE_AVX512_VBMI=OFF ON/OFF -> -DLLAMA_AVX512_VBMI (required for FP8 MoE)
+  CPUINFER_ENABLE_CPPTRACE=ON/OFF  ON/OFF -> -DKTRANSFORMERS_ENABLE_CPPTRACE (debug-only)
   CPUINFER_BLIS_ROOT=/path/to/blis  Forward to -DBLIS_ROOT
 
 
@@ -610,6 +611,7 @@ class CMakeBuild(build_ext):
         _forward_bool_env(cmake_args, "CPUINFER_ENABLE_LTO", "CPUINFER_ENABLE_LTO")
         _forward_str_env(cmake_args, "CPUINFER_LTO_JOBS", "CPUINFER_LTO_JOBS")
         _forward_str_env(cmake_args, "CPUINFER_LTO_MODE", "CPUINFER_LTO_MODE")
+        _forward_bool_env(cmake_args, "CPUINFER_ENABLE_CPPTRACE", "KTRANSFORMERS_ENABLE_CPPTRACE")
 
         # CUDA static runtime toggle
         _forward_bool_env(cmake_args, "CPUINFER_CUDA_STATIC_RUNTIME", "KTRANSFORMERS_CUDA_STATIC_RUNTIME")
@@ -695,9 +697,9 @@ if _version_file.exists():
     _version_ns = {}
     with open(_version_file, "r", encoding="utf-8") as f:
         exec(f.read(), _version_ns)
-    _base_version = _version_ns.get("__version__", "0.5.3")
+    _base_version = _version_ns.get("__version__", "0.6.1")
 else:
-    _base_version = "0.5.3"
+    _base_version = "0.6.1"
 
 # Determine version
 if "CPUINFER_VERSION" in os.environ:
@@ -727,23 +729,31 @@ setup(
     description="KT-Kernel: High-performance kernel operations for KTransformers (AMX/AVX/KML optimizations)",
     author="kvcache-ai",
     license="Apache-2.0",
-    python_requires=">=3.8",
+    python_requires=">=3.10",
     packages=[
         "kt_kernel",
         "kt_kernel.utils",
+        "kt_kernel.sft",
         "kt_kernel.cli",
         "kt_kernel.cli.commands",
+        "kt_kernel.cli.completions",
         "kt_kernel.cli.config",
         "kt_kernel.cli.utils",
     ],
     package_dir={
         "kt_kernel": "python",
         "kt_kernel.utils": "python/utils",
+        "kt_kernel.sft": "python/sft",
         "kt_kernel.cli": "python/cli",
         "kt_kernel.cli.commands": "python/cli/commands",
+        "kt_kernel.cli.completions": "python/cli/completions",
         "kt_kernel.cli.config": "python/cli/config",
         "kt_kernel.cli.utils": "python/cli/utils",
     },
+    package_data={
+        "kt_kernel.cli.completions": ["*.bash", "*.fish", "_kt"],
+    },
+    include_package_data=True,
     entry_points={
         "console_scripts": [
             "kt=kt_kernel.cli.main:main",
diff --git a/ktransformers/__init__.py b/ktransformers/__init__.py
new file mode 100644
index 00000000..df81f12a
--- /dev/null
+++ b/ktransformers/__init__.py
@@ -0,0 +1,34 @@
+"""Top-level Python package for KTransformers.
+
+The runtime kernels live in kt-kernel. Optional SFT support is activated
+via pip install "ktransformers[sft]" which adds transformers-kt and
+accelerate-kt to the environment.
+"""
+
+from __future__ import annotations
+
+from importlib.metadata import PackageNotFoundError, version
+from pathlib import Path
+
+
+def _read_repo_version() -> str:
+    ns: dict[str, str] = {}
+    exec((Path(__file__).resolve().parents[1] / 'version.py').read_text(), ns)
+    return ns['__version__']
+
+
+try:
+    __version__ = version('ktransformers')
+except PackageNotFoundError:
+    __version__ = _read_repo_version()
+
+
+def has_sft_support() -> bool:
+    try:
+        import kt_kernel.sft  # noqa: F401
+    except Exception:
+        return False
+    return True
+
+
+__all__ = ['__version__', 'has_sft_support']
diff --git a/pyproject.toml b/pyproject.toml
index e32acdb9..9ff862bb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,12 +4,12 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "ktransformers"
-dynamic = ["version", "dependencies"]
+dynamic = ["version", "dependencies", "optional-dependencies"]
 description = "KTransformers: CPU-GPU heterogeneous inference framework for LLMs"
 readme = "README.md"
 authors = [{ name = "kvcache-ai" }]
 license = "Apache-2.0"
-requires-python = ">=3.8"
+requires-python = ">=3.11"
 classifiers = [
   "Programming Language :: Python :: 3",
   "Operating System :: POSIX :: Linux",
@@ -19,5 +19,5 @@ classifiers = [
 Homepage = "https://github.com/kvcache-ai/ktransformers"
 
 [tool.setuptools]
-# No actual Python packages — this is a meta-package
-packages = []
+# Ship a minimal top-level Python package so the distribution is importable.
+packages = ["ktransformers"]
diff --git a/setup.py b/setup.py
index 5bc28743..7e868307 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,9 @@
-"""Meta-package: pip install ktransformers → installs kt-kernel + sglang-kt."""
+"""Lightweight top-level package: pip install ktransformers -> installs kt-kernel.
+
+Extras:
+  - ktransformers[sft] installs transformers-kt + accelerate-kt
+  - ktransformers[sglang] installs sglang-kt
+"""
 from pathlib import Path
 from setuptools import setup
 
@@ -11,6 +16,14 @@ setup(
     version=_v,
     install_requires=[
         f"kt-kernel=={_v}",
-        f"sglang-kt=={_v}",
     ],
+    extras_require={
+        "sft": [
+            "transformers-kt==5.6.0",
+            "accelerate-kt==1.14.0",
+        ],
+        "sglang": [
+            "sglang-kt>=0.5.3",
+        ],
+    },
 )
diff --git a/version.py b/version.py
index 2681423c..4997b6f9 100644
--- a/version.py
+++ b/version.py
@@ -1,6 +1,6 @@
 """
 KTransformers version information.
-Shared across kt-kernel and kt-sft modules.
+Shared across the top-level package and kt-kernel.
 """
 
-__version__ = "0.5.3"
+__version__ = "0.6.1"