mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-28 03:39:48 +00:00
Compare commits
62 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9f34ef46e6 | ||
|
|
0656e01ac1 | ||
|
|
07e274467a | ||
|
|
bfbd0e9352 | ||
|
|
85f1ab530b | ||
|
|
bc7afff13b | ||
|
|
eeaeb7bfd7 | ||
|
|
85308615b9 | ||
|
|
9544a8960d | ||
|
|
22e9915ec9 | ||
|
|
e327db58be | ||
|
|
a9f28d495b | ||
|
|
06ee9f62f3 | ||
|
|
a9411f1d72 | ||
|
|
f42e94a527 | ||
|
|
279c920a69 | ||
|
|
1dd0a78899 | ||
|
|
9b2d3b687b | ||
|
|
ad19a3e653 | ||
|
|
891c5c0a13 | ||
|
|
8a427c9321 | ||
|
|
db9326302b | ||
|
|
9e6484a538 | ||
|
|
cdc867c864 | ||
|
|
24cd4fc055 | ||
|
|
9c18b60556 | ||
|
|
3903c9afcc | ||
|
|
bdf4bb76c5 | ||
|
|
7a02daa694 | ||
|
|
7a9daf0cd4 | ||
|
|
8561a71dd1 | ||
|
|
7a4b9b0e87 | ||
|
|
15c624dcae | ||
|
|
9e69fccb02 | ||
|
|
19887e4363 | ||
|
|
20262b2743 | ||
|
|
786987a95f | ||
|
|
16a8b98f3e | ||
|
|
411b69bec0 | ||
|
|
7d9943365a | ||
|
|
a3d5d53605 | ||
|
|
f0e4fc612b | ||
|
|
1c72b3f5bd | ||
|
|
7f7aeaeff6 | ||
|
|
061fb56382 | ||
|
|
d342fb1df6 | ||
|
|
56cbd69ac4 | ||
|
|
4f64665758 | ||
|
|
c28cfcb26e | ||
|
|
794c04fae4 | ||
|
|
ccbb5b1cf8 | ||
|
|
2e6506535b | ||
|
|
db82d99fa6 | ||
|
|
edc48aba37 | ||
|
|
8321d00cc5 | ||
|
|
2f6f7f1921 | ||
|
|
1da075a3fa | ||
|
|
a368140d76 | ||
|
|
5bd5c8f750 | ||
|
|
bf4c8a690b | ||
|
|
8652346e69 | ||
|
|
b0f827d2a9 |
515 changed files with 41062 additions and 1786 deletions
57
.github/workflows/release-pypi.yml
vendored
57
.github/workflows/release-pypi.yml
vendored
|
|
@ -21,6 +21,58 @@ permissions:
|
|||
contents: read
|
||||
|
||||
jobs:
|
||||
# ── sglang-kt (must be on PyPI before users can pip install kt-kernel) ──
|
||||
build-and-publish-sglang-kt:
|
||||
name: Build & publish sglang-kt
|
||||
runs-on: [self-hosted, linux, x64]
|
||||
if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main'
|
||||
environment: prod
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install build wheel setuptools twine
|
||||
|
||||
- name: Build sglang-kt wheel
|
||||
working-directory: third_party/sglang/python
|
||||
run: |
|
||||
KT_VERSION=$(python3 -c "exec(open('${{ github.workspace }}/version.py').read()); print(__version__)")
|
||||
export SGLANG_KT_VERSION="$KT_VERSION"
|
||||
echo "Building sglang-kt v${KT_VERSION} wheel..."
|
||||
python -m build --wheel -v
|
||||
ls dist/ | grep -q "sglang_kt" || (echo "ERROR: Wheel name does not contain sglang_kt" && exit 1)
|
||||
|
||||
- name: Publish sglang-kt to PyPI
|
||||
if: github.event.inputs.test_pypi != 'true'
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||
run: |
|
||||
python -m twine upload --skip-existing --verbose third_party/sglang/python/dist/*.whl
|
||||
|
||||
- name: Publish sglang-kt to TestPyPI (if requested)
|
||||
if: github.event.inputs.test_pypi == 'true'
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
|
||||
run: |
|
||||
python -m twine upload --repository testpypi --skip-existing --verbose third_party/sglang/python/dist/*.whl
|
||||
|
||||
# ── kt-kernel ──
|
||||
build-kt-kernel:
|
||||
name: Build kt-kernel (Python ${{ matrix.python-version }})
|
||||
runs-on: [self-hosted, linux, x64, gpu]
|
||||
|
|
@ -55,6 +107,7 @@ jobs:
|
|||
working-directory: kt-kernel
|
||||
env:
|
||||
CPUINFER_BUILD_ALL_VARIANTS: '1'
|
||||
CPUINFER_ENABLE_CPPTRACE: '0'
|
||||
CPUINFER_USE_CUDA: '1'
|
||||
CPUINFER_CUDA_ARCHS: '80;86;89;90'
|
||||
CPUINFER_CUDA_STATIC_RUNTIME: '1'
|
||||
|
|
@ -124,8 +177,8 @@ jobs:
|
|||
retention-days: 7
|
||||
|
||||
publish-pypi:
|
||||
name: Publish to PyPI
|
||||
needs: [build-kt-kernel]
|
||||
name: Publish kt-kernel to PyPI
|
||||
needs: [build-and-publish-sglang-kt, build-kt-kernel]
|
||||
runs-on: [self-hosted, linux, x64]
|
||||
if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main'
|
||||
environment: prod
|
||||
|
|
|
|||
130
.github/workflows/release-sglang-kt.yml
vendored
Normal file
130
.github/workflows/release-sglang-kt.yml
vendored
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
name: Release sglang-kt to PyPI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "third_party/sglang"
|
||||
- "version.py"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
test_pypi:
|
||||
description: 'Publish to TestPyPI instead of PyPI (for testing)'
|
||||
required: false
|
||||
default: 'false'
|
||||
type: choice
|
||||
options:
|
||||
- 'true'
|
||||
- 'false'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build-sglang-kt:
|
||||
name: Build sglang-kt wheel
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install build wheel setuptools
|
||||
|
||||
- name: Build sglang-kt wheel
|
||||
working-directory: third_party/sglang/python
|
||||
run: |
|
||||
# Read version from ktransformers version.py
|
||||
KT_VERSION=$(python3 -c "exec(open('${{ github.workspace }}/version.py').read()); print(__version__)")
|
||||
export SGLANG_KT_VERSION="$KT_VERSION"
|
||||
echo "Building sglang-kt v${KT_VERSION} wheel..."
|
||||
python -m build --wheel -v
|
||||
|
||||
- name: Verify wheel
|
||||
working-directory: third_party/sglang/python
|
||||
run: |
|
||||
echo "Generated wheel:"
|
||||
ls -lh dist/
|
||||
# Verify the wheel has the correct package name
|
||||
ls dist/ | grep -q "sglang_kt" || (echo "ERROR: Wheel name does not contain sglang_kt" && exit 1)
|
||||
echo "Wheel name verified."
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: sglang-kt-wheel
|
||||
path: third_party/sglang/python/dist/*.whl
|
||||
retention-days: 7
|
||||
|
||||
publish-pypi:
|
||||
name: Publish sglang-kt to PyPI
|
||||
needs: [build-sglang-kt]
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'kvcache-ai/ktransformers' && github.ref == 'refs/heads/main'
|
||||
environment: prod
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- name: Download wheel artifact
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: sglang-kt-wheel
|
||||
path: dist/
|
||||
|
||||
- name: Display wheels
|
||||
run: |
|
||||
echo "Wheels to publish:"
|
||||
ls -lh dist/
|
||||
|
||||
- name: Install twine
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install twine
|
||||
|
||||
- name: Publish to TestPyPI (if requested)
|
||||
if: github.event.inputs.test_pypi == 'true'
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
|
||||
run: |
|
||||
python -m twine upload \
|
||||
--repository testpypi \
|
||||
--skip-existing \
|
||||
--verbose \
|
||||
dist/*.whl
|
||||
|
||||
- name: Publish to PyPI
|
||||
if: github.event.inputs.test_pypi != 'true'
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||
run: |
|
||||
python -m twine upload \
|
||||
--skip-existing \
|
||||
--verbose \
|
||||
dist/*.whl
|
||||
|
||||
- name: Create release summary
|
||||
run: |
|
||||
echo "## sglang-kt Published to PyPI" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Installation" >> $GITHUB_STEP_SUMMARY
|
||||
echo '```bash' >> $GITHUB_STEP_SUMMARY
|
||||
echo "pip install sglang-kt" >> $GITHUB_STEP_SUMMARY
|
||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "This is the kvcache-ai fork of SGLang with kt-kernel support." >> $GITHUB_STEP_SUMMARY
|
||||
echo "PyPI link: https://pypi.org/project/sglang-kt/" >> $GITHUB_STEP_SUMMARY
|
||||
81
.github/workflows/sync-sglang-submodule.yml
vendored
Normal file
81
.github/workflows/sync-sglang-submodule.yml
vendored
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
name: Sync sglang submodule
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run daily at 08:00 UTC
|
||||
- cron: "0 8 * * *"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
sync:
|
||||
name: Check for sglang-kt updates
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Update sglang submodule to latest main
|
||||
id: update
|
||||
run: |
|
||||
OLD_SHA=$(git -C third_party/sglang rev-parse HEAD)
|
||||
git submodule update --remote third_party/sglang
|
||||
NEW_SHA=$(git -C third_party/sglang rev-parse HEAD)
|
||||
|
||||
echo "old_sha=$OLD_SHA" >> "$GITHUB_OUTPUT"
|
||||
echo "new_sha=$NEW_SHA" >> "$GITHUB_OUTPUT"
|
||||
|
||||
if [ "$OLD_SHA" = "$NEW_SHA" ]; then
|
||||
echo "changed=false" >> "$GITHUB_OUTPUT"
|
||||
echo "sglang submodule is already up to date ($OLD_SHA)"
|
||||
else
|
||||
echo "changed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# Collect commit log between old and new
|
||||
COMMITS=$(git -C third_party/sglang log --oneline "$OLD_SHA..$NEW_SHA" | head -20)
|
||||
echo "commits<<EOF" >> "$GITHUB_OUTPUT"
|
||||
echo "$COMMITS" >> "$GITHUB_OUTPUT"
|
||||
echo "EOF" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# sglang-kt version = ktransformers version (from version.py)
|
||||
VERSION=$(python3 -c "exec(open('version.py').read()); print(__version__)" 2>/dev/null || echo "unknown")
|
||||
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
|
||||
|
||||
echo "sglang submodule updated: $OLD_SHA -> $NEW_SHA (v$VERSION)"
|
||||
fi
|
||||
|
||||
- name: Create pull request
|
||||
if: steps.update.outputs.changed == 'true'
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
commit-message: |
|
||||
[build]: sync sglang submodule to ${{ steps.update.outputs.new_sha }}
|
||||
branch: auto/sync-sglang
|
||||
delete-branch: true
|
||||
title: "[build] Sync sglang-kt submodule (v${{ steps.update.outputs.version }})"
|
||||
body: |
|
||||
Automated sync of `third_party/sglang` submodule to latest `main`.
|
||||
|
||||
**Old ref:** `${{ steps.update.outputs.old_sha }}`
|
||||
**New ref:** `${{ steps.update.outputs.new_sha }}`
|
||||
**sglang-kt version:** `${{ steps.update.outputs.version }}`
|
||||
|
||||
### Commits included
|
||||
```
|
||||
${{ steps.update.outputs.commits }}
|
||||
```
|
||||
|
||||
---
|
||||
*This PR was created automatically by the [sync-sglang-submodule](${{ github.server_url }}/${{ github.repository }}/actions/workflows/sync-sglang-submodule.yml) workflow.*
|
||||
labels: |
|
||||
dependencies
|
||||
automated
|
||||
4
.gitmodules
vendored
4
.gitmodules
vendored
|
|
@ -8,3 +8,7 @@
|
|||
path = third_party/custom_flashinfer
|
||||
url = https://github.com/kvcache-ai/custom_flashinfer.git
|
||||
branch = fix-precision-mla-merge-main
|
||||
[submodule "third_party/sglang"]
|
||||
path = third_party/sglang
|
||||
url = https://github.com/kvcache-ai/sglang.git
|
||||
branch = main
|
||||
|
|
|
|||
34
MAINTAINERS.md
Normal file
34
MAINTAINERS.md
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
# Maintainers
|
||||
|
||||
This document lists the current maintainers and outlines their responsibilities.
|
||||
|
||||
## Current Maintainers
|
||||
|
||||
| Name | GitHub | Role | Affiliation | Email |
|
||||
|------|--------|------|-------------|-------|
|
||||
| Weiyu Xie | [@ErvinXie](https://github.com/ErvinXie) | Maintainer | [MADSys Lab](https://madsys.cs.tsinghua.edu.cn/) @ Tsinghua University | xwy21@mails.tsinghua.edu.cn |
|
||||
| Hongtao Chen | [@chenht2022](https://github.com/chenht2022) | Maintainer | [MADSys Lab](https://madsys.cs.tsinghua.edu.cn/) @ Tsinghua University | cht22@mails.tsinghua.edu.cn |
|
||||
| Jianwei Dong | [@ovowei](https://github.com/ovowei) | Maintainer | [MADSys Lab](https://madsys.cs.tsinghua.edu.cn/) @ Tsinghua University | dongjw24@mails.tsinghua.edu.cn |
|
||||
| Ziwei Yuan | [@KMSorSMS](https://github.com/KMSorSMS) | Maintainer | [Approaching.AI](http://approaching.ai/) | 2022090910005@std.uestc.edu.cn |
|
||||
| Qingliang Ou | [@ouqingliang](https://github.com/ouqingliang) | Maintainer | [MADSys Lab](https://madsys.cs.tsinghua.edu.cn/) @ Tsinghua University | oql@bupt.edu.cn |
|
||||
| Jiaqi Liao | [@SkqLiao](https://github.com/SkqLiao) | Maintainer | [Approaching.AI](http://approaching.ai/) | jiaqi.liao@bit.edu.cn |
|
||||
| Peilin Li | [@JimmyPeilinLi](https://github.com/JimmyPeilinLi) | Maintainer | [Approaching.AI](http://approaching.ai/) | lipeilin@mail.nwpu.edu.cn |
|
||||
| Xingxing Hao | [@mrhaoxx](https://github.com/mrhaoxx) | Maintainer | [Approaching.AI](http://approaching.ai/) | mr.haoxx@gmail.com |
|
||||
| Boxin Zhang | [@Atream](https://github.com/Atream) | Maintainer | [MADSys Lab](https://madsys.cs.tsinghua.edu.cn/) @ Tsinghua University | zhangbx24@mails.tsinghua.edu.cn |
|
||||
| Jingqi Tang | [@Azure-Tang](https://github.com/Azure-Tang) | Maintainer | [MADSys Lab](https://madsys.cs.tsinghua.edu.cn/) @ Tsinghua University | tangjq25@mails.tsinghua.edu.cn |
|
||||
| Jiahao Wang | [@qiyuxinlin](https://github.com/qiyuxinlin) | Maintainer | [Approaching.AI](http://approaching.ai/) | 202241050020@hdu.edu.cn |
|
||||
|
||||
## Responsibilities
|
||||
|
||||
Maintainers steward the project and keep it healthy for users and contributors.
|
||||
|
||||
- Review and approve pull requests; ensure changes meet quality, testing, and documentation standards.
|
||||
- Triage issues, keep labels organized, and respond to questions in a timely manner.
|
||||
- Uphold the project’s code of conduct and report violations when needed.
|
||||
- Maintain CI reliability and address regressions promptly.
|
||||
- Oversee releases and keep compatibility with supported dependency versions.
|
||||
- Protect project security and follow the security disclosure process.
|
||||
|
||||
## Becoming a Maintainer
|
||||
|
||||
We welcome contributors who show sustained, high-quality contributions and collaborative behavior. If you are interested, please contact an existing maintainer and share your recent contributions and areas of focus.
|
||||
24
README.md
24
README.md
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
</p>
|
||||
<h3>A Flexible Framework for Experiencing Cutting-edge LLM Inference/Fine-tune Optimizations</h3>
|
||||
<strong><a href="#-overview">🎯 Overview</a> | <a href="#-kt-kernel---high-performance-inference-kernels">🚀 kt-kernel</a> | <a href="#-kt-sft---fine-tuning-framework">🎓 kt-sft</a> | <a href="#-citation">🔥 Citation</a> | <a href="https://github.com/kvcache-ai/ktransformers/issues/1582">🚀 Roadmap(2025Q4)</a> </strong>
|
||||
<strong><a href="#-overview">🎯 Overview</a> | <a href="#-kt-kernel---high-performance-inference-kernels">🚀 kt-kernel</a> | <a href="#-kt-sft---fine-tuning-framework">🎓 kt-sft</a> | <a href="#-citation">🔥 Citation</a> | <a href="https://github.com/kvcache-ai/ktransformers/issues/1921">🚀 Roadmap(2026Q2)</a> </strong>
|
||||
</div>
|
||||
|
||||
## 🎯 Overview
|
||||
|
|
@ -16,12 +16,17 @@
|
|||
KTransformers is a research project focused on efficient inference and fine-tuning of large language models through CPU-GPU heterogeneous computing. The project has evolved into **two core modules**: [kt-kernel](https://github.com/kvcache-ai/ktransformers/tree/main/kt-kernel/) and [kt-sft](https://github.com/kvcache-ai/ktransformers/tree/main/kt-sft).
|
||||
|
||||
## 🔥 Updates
|
||||
|
||||
* **May 6, 2026**: KTransformers at [GOSIM Paris 2026](https://paris2026.gosim.org/zh/schedule/) — "Agentic AI on Edge" track. We'll present KT's inference performance on consumer hardware.
|
||||
* **Mar 26, 2026**: Support AVX2-only CPU backend for KT-Kernel inference. ([Tutorial](./doc/en/kt-kernel/AVX2-Tutorial.md))
|
||||
* **Feb 13, 2026**: MiniMax-M2.5 Day0 Support! ([Tutorial](./doc/en/MiniMax-M2.5.md))
|
||||
* **Feb 12, 2026**: GLM-5 Day0 Support! ([Tutorial](./doc/en/kt-kernel/GLM-5-Tutorial.md))
|
||||
* **Jan 27, 2026**: Kimi-K2.5 Day0 Support! ([Tutorial](./doc/en/Kimi-K2.5.md)) ([SFT Tutorial](./doc/en/SFT_Installation_Guide_KimiK2.5.md))
|
||||
* **Jan 22, 2026**: Support [CPU-GPU Expert Scheduling](./doc/en/kt-kernel/experts-sched-Tutorial.md), [Native BF16 and FP8 per channel Precision](./doc/en/kt-kernel/Native-Precision-Tutorial.md) and [AutoDL unified fine-tuning and inference](./doc/zh/【云端低价训推】%20KTransformers%2BAutoDL%2BLlamaFactory:随用随租的低成本超大模型「微调%2B推理」一体化流程.pdf)
|
||||
* **Dec 24, 2025**: Support Native MiniMax-M2.1 inference. ([Tutorial](./doc/en/kt-kernel/MiniMax-M2.1-Tutorial.md))
|
||||
* **Dec 22, 2025**: Support RL-DPO fine-tuning with LLaMA-Factory. ([Tutorial](./doc/en/SFT/DPO_tutorial.md))
|
||||
* **Dec 5, 2025**: Support Native Kimi-K2-Thinking inference ([Tutorial](./doc/en/kt-kernel/Kimi-K2-Thinking-Native.md))
|
||||
* **Nov 6, 2025**: Support Kimi-K2-Thinking inference ([Tutorial](./doc/en/Kimi-K2-Thinking.md)) and fine-tune ([Tutorial](./doc/en/SFT_Installation_Guide_KimiK2.md))
|
||||
* **Nov 4, 2025**: KTransformers Fine-Tuning × LLaMA-Factory Integration. ([Tutorial](./doc/en/KTransformers-Fine-Tuning_User-Guide.md))
|
||||
* **Nov 4, 2025**: KTransformers Fine-Tuning × LLaMA-Factory Integration. ([Tutorial](./doc/en/SFT/KTransformers-Fine-Tuning_User-Guide.md))
|
||||
* **Oct 27, 2025**: Support Ascend NPU. ([Tutorial](./doc/zh/DeepseekR1_V3_tutorial_zh_for_Ascend_NPU.md))
|
||||
* **Oct 10, 2025**: Integrating into SGLang. ([Roadmap](https://github.com/sgl-project/sglang/issues/11425), [Blog](https://lmsys.org/blog/2025-10-22-KTransformers/))
|
||||
* **Sept 11, 2025**: Support Qwen3-Next. ([Tutorial](./doc/en/Qwen3-Next.md))
|
||||
|
|
@ -82,7 +87,7 @@ pip install .
|
|||
|
||||
---
|
||||
|
||||
### 🎓 [kt-sft](./kt-sft/) - Fine-Tuning Framework
|
||||
### 🎓 [kt-sft](./doc/en/SFT/KTransformers-Fine-Tuning_User-Guide.md) - Fine-Tuning Framework
|
||||
|
||||
KTransformers × LLaMA-Factory integration for ultra-large MoE model fine-tuning.
|
||||
|
||||
|
|
@ -104,12 +109,15 @@ KTransformers × LLaMA-Factory integration for ultra-large MoE model fine-tuning
|
|||
|
||||
**Quick Start:**
|
||||
```bash
|
||||
cd kt-sft
|
||||
# Install environment following kt-sft/README.md
|
||||
USE_KT=1 llamafactory-cli train examples/train_lora/deepseek3_lora_sft_kt.yaml
|
||||
cd /path/to/LLaMA-Factory
|
||||
pip install -e .
|
||||
pip install "ktransformers[sft]"
|
||||
USE_KT=1 ACCELERATE_USE_KT=true \
|
||||
accelerate launch --config_file examples/ktransformers/accelerate/fsdp2_kt_bf16.yaml \
|
||||
-m llamafactory.cli train examples/ktransformers/train_lora/deepseek_v3_lora_sft_kt.yaml
|
||||
```
|
||||
|
||||
👉 **[Full Documentation →](./kt-sft/README.md)**
|
||||
👉 **[Full Documentation →](./doc/en/SFT/KTransformers-Fine-Tuning_User-Guide.md)**
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
19
README_ZH.md
19
README_ZH.md
|
|
@ -13,13 +13,13 @@
|
|||
|
||||
## 🎯 概览
|
||||
|
||||
KTransformers 是一个专注于通过 CPU-GPU 异构计算实现大语言模型高效推理和微调的研究项目。该项目已发展为**两个核心模块**:[kt-kernel](./kt-kernel/) 和 [kt-sft](./kt-sft/)。
|
||||
KTransformers 是一个专注于通过 CPU-GPU 异构计算实现大语言模型高效推理和微调的研究项目。该项目已发展为**两个核心模块**:[kt-kernel](./kt-kernel/) 和 [kt-sft](./doc/en/SFT/KTransformers-Fine-Tuning_User-Guide.md)。
|
||||
|
||||
## 🔥 更新
|
||||
|
||||
* **2025 年 12 月 5 日**:支持原生 Kimi-K2-Thinking 推理([教程](./doc/en/Kimi-K2-Thinking-Native.md))
|
||||
* **2025 年 12 月 5 日**:支持原生 Kimi-K2-Thinking 推理([教程](./doc/en/kt-kernel/Kimi-K2-Thinking-Native.md))
|
||||
* **2025 年 11 月 6 日**:支持 Kimi-K2-Thinking 推理([教程](./doc/en/Kimi-K2-Thinking.md))和微调([教程](./doc/en/SFT_Installation_Guide_KimiK2.md))
|
||||
* **2025 年 11 月 4 日**:KTransformers 微调 × LLaMA-Factory 集成([教程](./doc/en/KTransformers-Fine-Tuning_User-Guide.md))
|
||||
* **2025 年 11 月 4 日**:KTransformers 微调 × LLaMA-Factory 集成([教程](./doc/en/SFT/KTransformers-Fine-Tuning_User-Guide.md))
|
||||
* **2025 年 10 月 27 日**:支持昇腾 NPU([教程](./doc/zh/DeepseekR1_V3_tutorial_zh_for_Ascend_NPU.md))
|
||||
* **2025 年 10 月 10 日**:集成到 SGLang([路线图](https://github.com/sgl-project/sglang/issues/11425),[博客](https://lmsys.org/blog/2025-10-22-KTransformers/))
|
||||
* **2025 年 9 月 11 日**:支持 Qwen3-Next([教程](./doc/en/Qwen3-Next.md))
|
||||
|
|
@ -79,7 +79,7 @@ pip install .
|
|||
|
||||
---
|
||||
|
||||
### 🎓 [kt-sft](./kt-sft/) - 微调框架
|
||||
### 🎓 [kt-sft](./doc/en/SFT/KTransformers-Fine-Tuning_User-Guide.md) - 微调框架
|
||||
|
||||
KTransformers × LLaMA-Factory 集成,用于超大型 MoE 模型微调。
|
||||
|
||||
|
|
@ -101,12 +101,15 @@ KTransformers × LLaMA-Factory 集成,用于超大型 MoE 模型微调。
|
|||
|
||||
**快速开始:**
|
||||
```bash
|
||||
cd kt-sft
|
||||
# 按照 kt-sft/README.md 安装环境
|
||||
USE_KT=1 llamafactory-cli train examples/train_lora/deepseek3_lora_sft_kt.yaml
|
||||
cd /path/to/LLaMA-Factory
|
||||
pip install -e .
|
||||
pip install "ktransformers[sft]"
|
||||
USE_KT=1 ACCELERATE_USE_KT=true \
|
||||
accelerate launch --config_file examples/ktransformers/accelerate/fsdp2_kt_bf16.yaml \
|
||||
-m llamafactory.cli train examples/ktransformers/train_lora/deepseek_v3_lora_sft_kt.yaml
|
||||
```
|
||||
|
||||
👉 **[完整文档 →](./kt-sft/README.md)**
|
||||
👉 **[完整文档 →](./doc/en/SFT/KTransformers-Fine-Tuning_User-Guide.md)**
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
|
Before Width: | Height: | Size: 1.1 MiB After Width: | Height: | Size: 1.1 MiB |
|
|
@ -671,7 +671,7 @@ torch::Tensor dequantize_q8_0(const int8_t* data, const int num_bytes, const int
|
|||
//data_gpu.copy_(data, false);
|
||||
|
||||
// Create output tensor
|
||||
auto output = torch::zeros({ num_blocks, 32 }, torch::dtype(target_dtype).device(device));
|
||||
auto output = torch::zeros({ num_blocks, ele_per_blk }, torch::dtype(target_dtype).device(device));
|
||||
|
||||
switch (target_dtype) {
|
||||
case torch::kFloat16:
|
||||
|
|
@ -705,7 +705,7 @@ torch::Tensor dequantize_q6_k(const int8_t* data, const int num_bytes, const int
|
|||
//data_gpu.copy_(data, false);
|
||||
|
||||
// Create output tensor
|
||||
auto output = torch::zeros({num_blocks, 256}, torch::dtype(target_dtype).device(device));
|
||||
auto output = torch::zeros({num_blocks, ele_per_blk}, torch::dtype(target_dtype).device(device));
|
||||
|
||||
switch (target_dtype) {
|
||||
case torch::kFloat16:
|
||||
|
|
@ -736,7 +736,7 @@ torch::Tensor dequantize_q5_k(const int8_t* data, const int num_bytes, const int
|
|||
//data_gpu.copy_(data, false);
|
||||
|
||||
// Create output tensor
|
||||
auto output = torch::zeros({num_blocks, 256}, torch::dtype(target_dtype).device(device));
|
||||
auto output = torch::zeros({num_blocks, ele_per_blk}, torch::dtype(target_dtype).device(device));
|
||||
|
||||
switch (target_dtype) {
|
||||
case torch::kFloat16:
|
||||
|
|
@ -768,7 +768,7 @@ torch::Tensor dequantize_q4_k(const int8_t* data, const int num_bytes, const int
|
|||
//data_gpu.copy_(data, false);
|
||||
|
||||
// Create output tensor
|
||||
auto output = torch::zeros({num_blocks, 256}, torch::dtype(target_dtype).device(device));
|
||||
auto output = torch::zeros({num_blocks, ele_per_blk}, torch::dtype(target_dtype).device(device));
|
||||
|
||||
switch (target_dtype) {
|
||||
case torch::kFloat16:
|
||||
|
|
@ -799,7 +799,7 @@ torch::Tensor dequantize_q3_k(const int8_t* data, const int num_bytes, const int
|
|||
//data_gpu.copy_(data, false);
|
||||
|
||||
// Create output tensor
|
||||
auto output = torch::zeros({num_blocks, 256}, torch::dtype(target_dtype).device(device));
|
||||
auto output = torch::zeros({num_blocks, ele_per_blk}, torch::dtype(target_dtype).device(device));
|
||||
|
||||
switch (target_dtype) {
|
||||
case torch::kFloat16:
|
||||
|
|
@ -830,7 +830,7 @@ torch::Tensor dequantize_q2_k(const int8_t* data, const int num_bytes, const int
|
|||
//data_gpu.copy_(data, false);
|
||||
|
||||
// Create output tensor
|
||||
auto output = torch::zeros({num_blocks, 256}, torch::dtype(target_dtype).device(device));
|
||||
auto output = torch::zeros({num_blocks, ele_per_blk}, torch::dtype(target_dtype).device(device));
|
||||
|
||||
switch (target_dtype) {
|
||||
case torch::kFloat16:
|
||||
|
|
@ -861,7 +861,7 @@ torch::Tensor dequantize_iq4_xs(const int8_t* data, const int num_bytes, const i
|
|||
//data_gpu.copy_(data, false);
|
||||
|
||||
// Create output tensor
|
||||
auto output = torch::zeros({num_blocks, 256}, torch::dtype(target_dtype).device(device));
|
||||
auto output = torch::zeros({num_blocks, ele_per_blk}, torch::dtype(target_dtype).device(device));
|
||||
|
||||
switch (target_dtype) {
|
||||
case torch::kFloat16:
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue