mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-26 10:50:59 +00:00
chore: bump version to 0.5.3 (#1909)
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
Release Fake Tag / publish (push) Has been cancelled
Release to PyPI / Build & publish sglang-kt (push) Has been cancelled
Release to PyPI / Build kt-kernel (Python 3.11) (push) Has been cancelled
Release to PyPI / Build kt-kernel (Python 3.12) (push) Has been cancelled
Release sglang-kt to PyPI / Build sglang-kt wheel (push) Has been cancelled
Release to PyPI / Publish kt-kernel to PyPI (push) Has been cancelled
Release sglang-kt to PyPI / Publish sglang-kt to PyPI (push) Has been cancelled
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
Release Fake Tag / publish (push) Has been cancelled
Release to PyPI / Build & publish sglang-kt (push) Has been cancelled
Release to PyPI / Build kt-kernel (Python 3.11) (push) Has been cancelled
Release to PyPI / Build kt-kernel (Python 3.12) (push) Has been cancelled
Release sglang-kt to PyPI / Build sglang-kt wheel (push) Has been cancelled
Release to PyPI / Publish kt-kernel to PyPI (push) Has been cancelled
Release sglang-kt to PyPI / Publish sglang-kt to PyPI (push) Has been cancelled
This commit is contained in:
parent
9e6484a538
commit
db9326302b
9 changed files with 44 additions and 32 deletions
|
|
@ -30,8 +30,8 @@ ARG GITHUB_ARTIFACTORY=github.com
|
|||
ARG FLASHINFER_VERSION=0.5.3
|
||||
|
||||
# ktransformers wheel version (cu128torch28 for CUDA 12.8 + PyTorch 2.8)
|
||||
ARG KTRANSFORMERS_VERSION=0.4.2
|
||||
ARG KTRANSFORMERS_WHEEL=ktransformers-0.4.2+cu128torch28fancy-cp312-cp312-linux_x86_64.whl
|
||||
ARG KTRANSFORMERS_VERSION=0.5.3
|
||||
ARG KTRANSFORMERS_WHEEL=ktransformers-0.5.3+cu128torch28fancy-cp312-cp312-linux_x86_64.whl
|
||||
|
||||
# flash_attn wheel for fine-tune env
|
||||
ARG FLASH_ATTN_WHEEL=flash_attn-2.8.3+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl
|
||||
|
|
|
|||
|
|
@ -25,16 +25,16 @@ sglang-v{sglang版本}_ktransformers-v{ktransformers版本}_{cpu信息}_{gpu信
|
|||
|
||||
**Tar file:**
|
||||
```
|
||||
sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
|
||||
sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
|
||||
```
|
||||
|
||||
**DockerHub tags:**
|
||||
```
|
||||
Full tag:
|
||||
kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
|
||||
Simplified tag:
|
||||
kvcache/ktransformers:v0.4.3-cu128
|
||||
kvcache/ktransformers:v0.5.3-cu128
|
||||
```
|
||||
|
||||
### Name Components
|
||||
|
|
@ -42,7 +42,7 @@ kvcache/ktransformers:v0.4.3-cu128
|
|||
| Component | Description | Example |
|
||||
|-----------|-------------|---------|
|
||||
| sglang version | SGLang package version | `v0.5.6` |
|
||||
| ktransformers version | KTransformers version | `v0.4.3` |
|
||||
| ktransformers version | KTransformers version | `v0.5.3` |
|
||||
| cpu info | CPU instruction set support | `x86-intel-multi` (includes AMX/AVX512/AVX2) |
|
||||
| gpu info | CUDA version | `cu128` (CUDA 12.8) |
|
||||
| functionality | Feature mode | `sft_llamafactory-v0.9.3` or `infer` |
|
||||
|
|
@ -197,8 +197,8 @@ docker login
|
|||
```
|
||||
|
||||
This creates two tags:
|
||||
- Full: `kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022`
|
||||
- Simplified: `kvcache/ktransformers:v0.4.3-cu128`
|
||||
- Full: `kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022`
|
||||
- Simplified: `kvcache/ktransformers:v0.5.3-cu128`
|
||||
|
||||
### Example 4: Dry Run
|
||||
|
||||
|
|
@ -225,12 +225,12 @@ Pass additional Docker build arguments:
|
|||
|
||||
```bash
|
||||
# Load the image
|
||||
docker load -i sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
|
||||
docker load -i sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022.tar
|
||||
|
||||
# Run the container
|
||||
docker run -it --rm \
|
||||
--gpus all \
|
||||
sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 \
|
||||
sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022 \
|
||||
/bin/bash
|
||||
```
|
||||
|
||||
|
|
@ -238,15 +238,15 @@ docker run -it --rm \
|
|||
|
||||
```bash
|
||||
# Pull with full tag
|
||||
docker pull kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
docker pull kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
|
||||
# Or pull with simplified tag
|
||||
docker pull kvcache/ktransformers:v0.4.3-cu128
|
||||
docker pull kvcache/ktransformers:v0.5.3-cu128
|
||||
|
||||
# Run the container
|
||||
docker run -it --rm \
|
||||
--gpus all \
|
||||
kvcache/ktransformers:v0.4.3-cu128 \
|
||||
kvcache/ktransformers:v0.5.3-cu128 \
|
||||
/bin/bash
|
||||
```
|
||||
|
||||
|
|
@ -301,7 +301,7 @@ cat /workspace/versions.env
|
|||
|
||||
# Output:
|
||||
SGLANG_VERSION=0.5.6
|
||||
KTRANSFORMERS_VERSION=0.4.3
|
||||
KTRANSFORMERS_VERSION=0.5.3
|
||||
LLAMAFACTORY_VERSION=0.9.3
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -210,9 +210,9 @@ generate_image_name() {
|
|||
|
||||
# Generate simplified tag for DockerHub
|
||||
# Input:
|
||||
# $1: ktransformers_version (e.g., 0.4.3)
|
||||
# $1: ktransformers_version (e.g., 0.5.3)
|
||||
# $2: cuda_version (e.g., 12.8.1)
|
||||
# Output: Simplified tag (e.g., v0.4.3-cu128)
|
||||
# Output: Simplified tag (e.g., v0.5.3-cu128)
|
||||
generate_simplified_tag() {
|
||||
local ktrans_ver="$1"
|
||||
local cuda_version="$2"
|
||||
|
|
|
|||
|
|
@ -137,13 +137,13 @@ OUTPUT:
|
|||
{registry}/{repository}:sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp}
|
||||
|
||||
Example:
|
||||
docker.io/kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
docker.io/kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
|
||||
Simplified tag (if --also-push-simplified):
|
||||
{registry}/{repository}:v{ktransformers-ver}-{cuda}
|
||||
|
||||
Example:
|
||||
docker.io/kvcache/ktransformers:v0.4.3-cu128
|
||||
docker.io/kvcache/ktransformers:v0.5.3-cu128
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
|
|
@ -372,7 +372,7 @@ generate_tags() {
|
|||
log_warning "DRY RUN: Using placeholder versions"
|
||||
# Use placeholder versions for dry run
|
||||
local versions="SGLANG_VERSION=0.5.6
|
||||
KTRANSFORMERS_VERSION=0.4.3
|
||||
KTRANSFORMERS_VERSION=0.5.3
|
||||
LLAMAFACTORY_VERSION=0.9.3"
|
||||
else
|
||||
# Extract versions from image
|
||||
|
|
@ -709,13 +709,13 @@ OUTPUT:
|
|||
{registry}/{repository}:sglang-v{ver}_ktransformers-v{ver}_{cpu}_{gpu}_{func}_{timestamp}
|
||||
|
||||
Example:
|
||||
docker.io/kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.4.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
docker.io/kvcache/ktransformers:sglang-v0.5.6_ktransformers-v0.5.3_x86-intel-multi_cu128_sft_llamafactory-v0.9.3_20241212143022
|
||||
|
||||
Simplified tag (if --also-push-simplified):
|
||||
{registry}/{repository}:v{ktransformers-ver}-{cuda}
|
||||
|
||||
Example:
|
||||
docker.io/kvcache/ktransformers:v0.4.3-cu128
|
||||
docker.io/kvcache/ktransformers:v0.5.3-cu128
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
|
|
@ -944,7 +944,7 @@ generate_tags() {
|
|||
log_warning "DRY RUN: Using placeholder versions"
|
||||
# Use placeholder versions for dry run
|
||||
local versions="SGLANG_VERSION=0.5.6
|
||||
KTRANSFORMERS_VERSION=0.4.3
|
||||
KTRANSFORMERS_VERSION=0.5.3
|
||||
LLAMAFACTORY_VERSION=0.9.3"
|
||||
else
|
||||
# Extract versions from image
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ option(KTRANSFORMERS_CPU_MOE_AMD "ktransformers: CPU use moe kernel for amd" OFF
|
|||
# LTO control
|
||||
option(CPUINFER_ENABLE_LTO "Enable link time optimization (IPO)" OFF)
|
||||
|
||||
project(kt_kernel_ext VERSION 0.5.0)
|
||||
project(kt_kernel_ext VERSION 0.5.3)
|
||||
|
||||
# Auto-detect CPU features early (unless building with LLAMA_NATIVE)
|
||||
if(NOT LLAMA_NATIVE AND NOT MSVC)
|
||||
|
|
@ -692,4 +692,3 @@ if(NUMA_LIBRARY)
|
|||
else()
|
||||
message(FATAL_ERROR "NUMA library not found, please install NUMA, sudo apt install libnuma-dev")
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -68,9 +68,9 @@ try:
|
|||
_version_ns = {}
|
||||
with open(_root_version_file, "r", encoding="utf-8") as f:
|
||||
exec(f.read(), _version_ns)
|
||||
__version__ = _version_ns.get("__version__", "0.4.3")
|
||||
__version__ = _version_ns.get("__version__", "0.5.3")
|
||||
else:
|
||||
__version__ = "0.4.3"
|
||||
__version__ = "0.5.3"
|
||||
except ImportError:
|
||||
# Python < 3.8, fallback to pkg_resources or hardcoded version
|
||||
try:
|
||||
|
|
@ -79,8 +79,8 @@ except ImportError:
|
|||
try:
|
||||
__version__ = get_distribution("kt-kernel").version
|
||||
except DistributionNotFound:
|
||||
__version__ = "0.4.3"
|
||||
__version__ = "0.5.3"
|
||||
except ImportError:
|
||||
__version__ = "0.4.3"
|
||||
__version__ = "0.5.3"
|
||||
|
||||
__all__ = ["KTMoEWrapper", "generate_gpu_experts_masks", "kt_kernel_ext", "__cpu_variant__", "__version__"]
|
||||
|
|
|
|||
|
|
@ -5,4 +5,17 @@ This CLI provides a user-friendly interface to all KTransformers functionality,
|
|||
including model inference, fine-tuning, benchmarking, and more.
|
||||
"""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
from importlib.metadata import PackageNotFoundError, version
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
try:
|
||||
__version__ = version("kt-kernel")
|
||||
except PackageNotFoundError:
|
||||
_version_ns = {}
|
||||
_root_version_file = Path(__file__).resolve().parents[3] / "version.py"
|
||||
if _root_version_file.exists():
|
||||
exec(_root_version_file.read_text(encoding="utf-8"), _version_ns)
|
||||
__version__ = _version_ns.get("__version__", "0.5.3")
|
||||
else:
|
||||
__version__ = "0.5.3"
|
||||
|
|
|
|||
|
|
@ -695,9 +695,9 @@ if _version_file.exists():
|
|||
_version_ns = {}
|
||||
with open(_version_file, "r", encoding="utf-8") as f:
|
||||
exec(f.read(), _version_ns)
|
||||
_base_version = _version_ns.get("__version__", "0.5.0")
|
||||
_base_version = _version_ns.get("__version__", "0.5.3")
|
||||
else:
|
||||
_base_version = "0.5.0"
|
||||
_base_version = "0.5.3"
|
||||
|
||||
# Determine version
|
||||
if "CPUINFER_VERSION" in os.environ:
|
||||
|
|
|
|||
|
|
@ -3,4 +3,4 @@ KTransformers version information.
|
|||
Shared across kt-kernel and kt-sft modules.
|
||||
"""
|
||||
|
||||
__version__ = "0.5.2.post1"
|
||||
__version__ = "0.5.3"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue