From db9326302babfdc408dfd454d06aeecdbb91ea9a Mon Sep 17 00:00:00 2001 From: Jianwei Dong Date: Wed, 1 Apr 2026 18:58:48 +0800 Subject: [PATCH] chore: bump version to 0.5.3 (#1909) --- docker/Dockerfile | 4 ++-- docker/README-packaging.md | 24 ++++++++++++------------ docker/docker-utils.sh | 4 ++-- docker/push-to-dockerhub.sh | 12 ++++++------ kt-kernel/CMakeLists.txt | 3 +-- kt-kernel/python/__init__.py | 8 ++++---- kt-kernel/python/cli/__init__.py | 15 ++++++++++++++- kt-kernel/setup.py | 4 ++-- version.py | 2 +- 9 files changed, 44 insertions(+), 32 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 0e2f92f4..fe574edd 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -30,8 +30,8 @@ ARG GITHUB_ARTIFACTORY=github.com ARG FLASHINFER_VERSION=0.5.3 # ktransformers wheel version (cu128torch28 for CUDA 12.8 + PyTorch 2.8) -ARG KTRANSFORMERS_VERSION=0.4.2 -ARG KTRANSFORMERS_WHEEL=ktransformers-0.4.2+cu128torch28fancy-cp312-cp312-linux_x86_64.whl +ARG KTRANSFORMERS_VERSION=0.5.3 +ARG KTRANSFORMERS_WHEEL=ktransformers-0.5.3+cu128torch28fancy-cp312-cp312-linux_x86_64.whl # flash_attn wheel for fine-tune env ARG FLASH_ATTN_WHEEL=flash_attn-2.8.3+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl diff --git a/docker/README-packaging.md b/docker/README-packaging.md index afe7e772..81e16ecb 100644 --- a/docker/README-packaging.md +++ b/docker/README-packaging.md @@ -25,16 +25,16 @@ sglang-v{sglang版本}_ktransformers-v{ktransformers版本}_{cpu信息}_{gpu信 **Tar file:** ``` -sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar +sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar ``` **DockerHub tags:** ``` Full tag: -kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 +kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 Simplified tag: -kvcache/ktransformers:v0.4.3-cu128 +kvcache/ktransformers:v0.5.3-cu128 ``` ### Name Components @@ -42,7 +42,7 @@ kvcache/ktransformers:v0.4.3-cu128 | Component | Description | Example | |-----------|-------------|---------| | sglang version | SGLang package version | `v0.5.6` | -| ktransformers version | KTransformers version | `v0.4.3` | +| ktransformers version | KTransformers version | `v0.5.3` | | cpu info | CPU instruction set support | `x86-intel-multi` (includes AMX/AVX512/AVX2) | | gpu info | CUDA version | `cu128` (CUDA 12.8) | | functionality | Feature mode | `sft_llamafactory-v0.9.3` or `infer` | @@ -197,8 +197,8 @@ docker login ``` This creates two tags: -- Full: `kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022` -- Simplified: `kvcache/ktransformers:v0.4.3-cu128` +- Full: `kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022` +- Simplified: `kvcache/ktransformers:v0.5.3-cu128` ### Example 4: Dry Run @@ -225,12 +225,12 @@ Pass additional Docker build arguments: ```bash # Load the image -docker load -i sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar +docker load -i sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar # Run the container docker run -it --rm \ --gpus all \ - sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 \ + sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 \ /bin/bash ``` @@ -238,15 +238,15 @@ docker run -it --rm \ ```bash # Pull with full tag -docker pull kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 +docker pull kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 # Or pull with simplified tag -docker pull kvcache/ktransformers:v0.4.3-cu128 +docker pull kvcache/ktransformers:v0.5.3-cu128 # Run the container docker run -it --rm \ --gpus all \ - kvcache/ktransformers:v0.4.3-cu128 \ + kvcache/ktransformers:v0.5.3-cu128 \ /bin/bash ``` @@ -301,7 +301,7 @@ cat /workspace/versions.env # Output: SGLANG_VERSION=0.5.6 -KTRANSFORMERS_VERSION=0.4.3 +KTRANSFORMERS_VERSION=0.5.3 LLAMAFACTORY_VERSION=0.9.3 ``` diff --git a/docker/docker-utils.sh b/docker/docker-utils.sh index 3277668a..036e907b 100755 --- a/docker/docker-utils.sh +++ b/docker/docker-utils.sh @@ -210,9 +210,9 @@ generate_image_name() { # Generate simplified tag for DockerHub # Input: -# $1: ktransformers_version (e.g., 0.4.3) +# $1: ktransformers_version (e.g., 0.5.3) # $2: cuda_version (e.g., 12.8.1) -# Output: Simplified tag (e.g., v0.4.3-cu128) +# Output: Simplified tag (e.g., v0.5.3-cu128) generate_simplified_tag() { local ktrans_ver="$1" local cuda_version="$2" diff --git a/docker/push-to-dockerhub.sh b/docker/push-to-dockerhub.sh index 0a749ee7..f63c923e 100755 --- a/docker/push-to-dockerhub.sh +++ b/docker/push-to-dockerhub.sh @@ -137,13 +137,13 @@ OUTPUT: {registry}/{repository}:sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp} Example: - docker.io/kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 + docker.io/kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 Simplified tag (if --also-push-simplified): {registry}/{repository}:v{ktransformers-ver}-{cuda} Example: - docker.io/kvcache/ktransformers:v0.4.3-cu128 + docker.io/kvcache/ktransformers:v0.5.3-cu128 EOF exit 0 @@ -372,7 +372,7 @@ generate_tags() { log_warning "DRY RUN: Using placeholder versions" # Use placeholder versions for dry run local versions="SGLANG_VERSION=0.5.6 -KTRANSFORMERS_VERSION=0.4.3 +KTRANSFORMERS_VERSION=0.5.3 LLAMAFACTORY_VERSION=0.9.3" else # Extract versions from image @@ -709,13 +709,13 @@ OUTPUT: {registry}/{repository}:sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp} Example: - docker.io/kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 + docker.io/kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 Simplified tag (if --also-push-simplified): {registry}/{repository}:v{ktransformers-ver}-{cuda} Example: - docker.io/kvcache/ktransformers:v0.4.3-cu128 + docker.io/kvcache/ktransformers:v0.5.3-cu128 EOF exit 0 @@ -944,7 +944,7 @@ generate_tags() { log_warning "DRY RUN: Using placeholder versions" # Use placeholder versions for dry run local versions="SGLANG_VERSION=0.5.6 -KTRANSFORMERS_VERSION=0.4.3 +KTRANSFORMERS_VERSION=0.5.3 LLAMAFACTORY_VERSION=0.9.3" else # Extract versions from image diff --git a/kt-kernel/CMakeLists.txt b/kt-kernel/CMakeLists.txt index c671386a..e19aa4ea 100644 --- a/kt-kernel/CMakeLists.txt +++ b/kt-kernel/CMakeLists.txt @@ -27,7 +27,7 @@ option(KTRANSFORMERS_CPU_MOE_AMD "ktransformers: CPU use moe kernel for amd" OFF # LTO control option(CPUINFER_ENABLE_LTO "Enable link time optimization (IPO)" OFF) -project(kt_kernel_ext VERSION 0.5.0) +project(kt_kernel_ext VERSION 0.5.3) # Auto-detect CPU features early (unless building with LLAMA_NATIVE) if(NOT LLAMA_NATIVE AND NOT MSVC) @@ -692,4 +692,3 @@ if(NUMA_LIBRARY) else() message(FATAL_ERROR "NUMA library not found, please install NUMA, sudo apt install libnuma-dev") endif() - diff --git a/kt-kernel/python/__init__.py b/kt-kernel/python/__init__.py index a01086bf..f80181e2 100644 --- a/kt-kernel/python/__init__.py +++ b/kt-kernel/python/__init__.py @@ -68,9 +68,9 @@ try: _version_ns = {} with open(_root_version_file, "r", encoding="utf-8") as f: exec(f.read(), _version_ns) - __version__ = _version_ns.get("__version__", "0.4.3") + __version__ = _version_ns.get("__version__", "0.5.3") else: - __version__ = "0.4.3" + __version__ = "0.5.3" except ImportError: # Python < 3.8, fallback to pkg_resources or hardcoded version try: @@ -79,8 +79,8 @@ except ImportError: try: __version__ = get_distribution("kt-kernel").version except DistributionNotFound: - __version__ = "0.4.3" + __version__ = "0.5.3" except ImportError: - __version__ = "0.4.3" + __version__ = "0.5.3" __all__ = ["KTMoEWrapper", "generate_gpu_experts_masks", "kt_kernel_ext", "__cpu_variant__", "__version__"] diff --git a/kt-kernel/python/cli/__init__.py b/kt-kernel/python/cli/__init__.py index c3af5ed7..2d06fb4e 100644 --- a/kt-kernel/python/cli/__init__.py +++ b/kt-kernel/python/cli/__init__.py @@ -5,4 +5,17 @@ This CLI provides a user-friendly interface to all KTransformers functionality, including model inference, fine-tuning, benchmarking, and more. """ -__version__ = "0.1.0" +from importlib.metadata import PackageNotFoundError, version +from pathlib import Path + + +try: + __version__ = version("kt-kernel") +except PackageNotFoundError: + _version_ns = {} + _root_version_file = Path(__file__).resolve().parents[3] / "version.py" + if _root_version_file.exists(): + exec(_root_version_file.read_text(encoding="utf-8"), _version_ns) + __version__ = _version_ns.get("__version__", "0.5.3") + else: + __version__ = "0.5.3" diff --git a/kt-kernel/setup.py b/kt-kernel/setup.py index b269f828..895bfa5e 100644 --- a/kt-kernel/setup.py +++ b/kt-kernel/setup.py @@ -695,9 +695,9 @@ if _version_file.exists(): _version_ns = {} with open(_version_file, "r", encoding="utf-8") as f: exec(f.read(), _version_ns) - _base_version = _version_ns.get("__version__", "0.5.0") + _base_version = _version_ns.get("__version__", "0.5.3") else: - _base_version = "0.5.0" + _base_version = "0.5.3" # Determine version if "CPUINFER_VERSION" in os.environ: diff --git a/version.py b/version.py index a294ce1d..2681423c 100644 --- a/version.py +++ b/version.py @@ -3,4 +3,4 @@ KTransformers version information. Shared across kt-kernel and kt-sft modules. """ -__version__ = "0.5.2.post1" +__version__ = "0.5.3"