mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-05 20:19:51 +00:00
[feature] support for pypi install
This commit is contained in:
parent
a25320b703
commit
dd18a11cab
4 changed files with 185 additions and 124 deletions
44
README.md
44
README.md
|
@ -74,24 +74,37 @@ Some preparation:
|
||||||
conda activate ktransformers # you may need to run ‘conda init’ and reopen shell first
|
conda activate ktransformers # you may need to run ‘conda init’ and reopen shell first
|
||||||
```
|
```
|
||||||
|
|
||||||
Download source code:
|
- Make sure that PyTorch, packaging, ninja is installed
|
||||||
|
```
|
||||||
|
pip install torch packaging ninja
|
||||||
|
```
|
||||||
|
|
||||||
|
<h3>Installation</h3>
|
||||||
|
You can install using Pypi:
|
||||||
|
|
||||||
|
```
|
||||||
|
pip install ktransformers --no-build-isolation
|
||||||
|
```
|
||||||
|
|
||||||
|
Or download source code and compile:
|
||||||
|
- init source code
|
||||||
```sh
|
```sh
|
||||||
git clone https://github.com/kvcache-ai/ktransformers.git
|
git clone https://github.com/kvcache-ai/ktransformers.git
|
||||||
cd ktransformers
|
cd ktransformers
|
||||||
git submodule init
|
git submodule init
|
||||||
git submodule update
|
git submodule update
|
||||||
```
|
```
|
||||||
|
- [Optional] If you want to run with website, please [compile the website](./doc/en/api/server/website.md) before execute ```bash install.sh```
|
||||||
|
- Compile and install
|
||||||
|
```
|
||||||
|
bash install.sh
|
||||||
|
```
|
||||||
|
|
||||||
<h3>Local Chat</h3>
|
<h3>Local Chat</h3>
|
||||||
We provide a simple command-line local chat Python script that you can run for testing.
|
We provide a simple command-line local chat Python script that you can run for testing.
|
||||||
|
|
||||||
> Note that this is a very simple test tool only support one round chat without any memory about last input, if you want to try full ability of the model, you may go to [RESTful API and Web UI](#id_666). We use the DeepSeek-V2-Lite-Chat-GGUF model as an example here. But we alse support other models, you can replace it with any other model that you want to test.
|
> Note that this is a very simple test tool only support one round chat without any memory about last input, if you want to try full ability of the model, you may go to [RESTful API and Web UI](#id_666). We use the DeepSeek-V2-Lite-Chat-GGUF model as an example here. But we alse support other models, you can replace it with any other model that you want to test.
|
||||||
|
|
||||||
<h4>Install</h4>
|
|
||||||
|
|
||||||
```sh
|
|
||||||
bash install.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
<h4>Run Example</h4>
|
<h4>Run Example</h4>
|
||||||
|
|
||||||
|
@ -109,11 +122,11 @@ wget https://huggingface.co/mzwing/DeepSeek-V2-Lite-Chat-GGUF/resolve/main/DeepS
|
||||||
cd .. # Move to repo's root dir
|
cd .. # Move to repo's root dir
|
||||||
|
|
||||||
# Start local chat
|
# Start local chat
|
||||||
python ktransformers/local_chat.py --model_path deepseek-ai/DeepSeek-V2-Lite-Chat --gguf_path ./DeepSeek-V2-Lite-Chat-GGUF
|
python -m ktransformers.local_chat --model_path deepseek-ai/DeepSeek-V2-Lite-Chat --gguf_path ./DeepSeek-V2-Lite-Chat-GGUF
|
||||||
|
|
||||||
# If you see “OSError: We couldn't connect to 'https://huggingface.co' to load this file”, try:
|
# If you see “OSError: We couldn't connect to 'https://huggingface.co' to load this file”, try:
|
||||||
# GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite
|
# GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite
|
||||||
# python ktransformers/local_chat.py --model_path ./DeepSeek-V2-Lite --gguf_path ./DeepSeek-V2-Lite-Chat-GGUF
|
# python ktransformers.local_chat --model_path ./DeepSeek-V2-Lite --gguf_path ./DeepSeek-V2-Lite-Chat-GGUF
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -154,7 +167,7 @@ wget https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct-GGUF/resolve/main/qwen2
|
||||||
|
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
python ktransformers/local_chat.py --model_name Qwen/Qwen2-57B-A14B-Instruct --gguf_path ./Qwen2-57B-GGUF
|
python -m ktransformers.local_chat --model_name Qwen/Qwen2-57B-A14B-Instruct --gguf_path ./Qwen2-57B-GGUF
|
||||||
|
|
||||||
# If you see “OSError: We couldn't connect to 'https://huggingface.co' to load this file”, try:
|
# If you see “OSError: We couldn't connect to 'https://huggingface.co' to load this file”, try:
|
||||||
# GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct
|
# GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct
|
||||||
|
@ -172,11 +185,11 @@ wget https://huggingface.co/bartowski/DeepSeek-V2-Chat-0628-GGUF/resolve/main/De
|
||||||
|
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
python ktransformers/local_chat.py --model_name deepseek-ai/DeepSeek-V2-Chat-0628 --gguf_path ./DeepSeek-V2-Chat-0628-GGUF
|
python -m ktransformers.local_chat --model_name deepseek-ai/DeepSeek-V2-Chat-0628 --gguf_path ./DeepSeek-V2-Chat-0628-GGUF
|
||||||
|
|
||||||
# If you see “OSError: We couldn't connect to 'https://huggingface.co' to load this file”, try:
|
# If you see “OSError: We couldn't connect to 'https://huggingface.co' to load this file”, try:
|
||||||
# GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat-0628
|
# GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat-0628
|
||||||
# python ktransformers/local_chat.py --model_path ./DeepSeek-V2-Chat-0628 --gguf_path ./DeepSeek-V2-Chat-0628-GGUF
|
# python -m ktransformers.local_chat --model_path ./DeepSeek-V2-Chat-0628 --gguf_path ./DeepSeek-V2-Chat-0628-GGUF
|
||||||
```
|
```
|
||||||
|
|
||||||
| model name | weights download link |
|
| model name | weights download link |
|
||||||
|
@ -193,15 +206,6 @@ python ktransformers/local_chat.py --model_name deepseek-ai/DeepSeek-V2-Chat-062
|
||||||
|
|
||||||
<h3>RESTful API and Web UI</h3>
|
<h3>RESTful API and Web UI</h3>
|
||||||
|
|
||||||
<h4>Install</h4>
|
|
||||||
|
|
||||||
[Optional] If you want to run with website, please [compile the website](./doc/en/api/server/website.md) before execute ```pip install .```
|
|
||||||
|
|
||||||
Install ktransformers with source.
|
|
||||||
```
|
|
||||||
pip install -r requirements-local_chat.txt
|
|
||||||
pip install . --no-build-isolation
|
|
||||||
```
|
|
||||||
|
|
||||||
Start without website:
|
Start without website:
|
||||||
|
|
||||||
|
|
14
install.sh
14
install.sh
|
@ -10,16 +10,6 @@ rm -rf ktransformers/ktransformers_ext/cuda/*.egg-info
|
||||||
echo "Installing python dependencies from requirements.txt"
|
echo "Installing python dependencies from requirements.txt"
|
||||||
pip install -r requirements-local_chat.txt
|
pip install -r requirements-local_chat.txt
|
||||||
|
|
||||||
echo "Installing ktransformers cpuinfer"
|
echo "Installing ktransformers"
|
||||||
mkdir -p ktransformers/ktransformers_ext/build
|
pip install . --no-build-isolation
|
||||||
cd ktransformers/ktransformers_ext/build
|
|
||||||
cmake ..
|
|
||||||
cmake --build . --config Release
|
|
||||||
|
|
||||||
echo "Installing ktransformers gpu kernel, this may take for a while, please wait"
|
|
||||||
sleep 3
|
|
||||||
|
|
||||||
cd ../cuda
|
|
||||||
python setup.py install
|
|
||||||
cd ../../..
|
|
||||||
echo "Installation completed successfully"
|
echo "Installation completed successfully"
|
|
@ -6,3 +6,63 @@ requires = [
|
||||||
"packaging"
|
"packaging"
|
||||||
]
|
]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
|
||||||
|
name = "ktransformers"
|
||||||
|
|
||||||
|
dynamic = ["version"]
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
"torch >= 2.3.0",
|
||||||
|
"transformers == 4.43.2",
|
||||||
|
"fastapi >= 0.111.0",
|
||||||
|
"langchain >= 0.2.0",
|
||||||
|
"blessed >= 1.20.0",
|
||||||
|
"accelerate >= 0.31.0",
|
||||||
|
"sentencepiece >= 0.1.97",
|
||||||
|
"setuptools",
|
||||||
|
"ninja",
|
||||||
|
"wheel",
|
||||||
|
"colorlog",
|
||||||
|
"build",
|
||||||
|
"fire"
|
||||||
|
]
|
||||||
|
|
||||||
|
requires-python = ">=3.11"
|
||||||
|
|
||||||
|
authors = [
|
||||||
|
{name = "KVCache.AI", email = "zhang.mingxing@outlook.com"}
|
||||||
|
]
|
||||||
|
|
||||||
|
maintainers = [
|
||||||
|
{name = "james0zan", email = "zhang.mingxing@outlook.com"},
|
||||||
|
{name = "awake", email = "awake@approaching.ai"},
|
||||||
|
{name = "unicorn chan", email = "nl@approaching.ai"}
|
||||||
|
]
|
||||||
|
|
||||||
|
description = "KTransformers, pronounced as Quick Transformers, is designed to enhance your Transformers experience with advanced kernel optimizations and placement/parallelism strategies."
|
||||||
|
|
||||||
|
readme = "README.md"
|
||||||
|
license = {file = "LICENSE"}
|
||||||
|
|
||||||
|
keywords = ["ktransformers", "llm"]
|
||||||
|
|
||||||
|
classifiers = [
|
||||||
|
"Development Status :: 4 - Beta",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12"
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
Homepage = "https://kvcache.ai"
|
||||||
|
Repository = "https://github.com/kvcache-ai/ktransformers.git"
|
||||||
|
Issues = "https://github.com/kvcache-ai/ktransformers/issues"
|
||||||
|
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
ktransformers = "ktransformers.server.main:main"
|
||||||
|
|
||||||
|
[tool.setuptools.packages.find]
|
||||||
|
where = ["./", ]
|
||||||
|
include = ["ktransformers"]
|
159
setup.py
159
setup.py
|
@ -3,33 +3,43 @@
|
||||||
'''
|
'''
|
||||||
Description :
|
Description :
|
||||||
Author : chenxl
|
Author : chenxl
|
||||||
Date : 2024-07-12 07:25:42
|
Date : 2024-07-27 16:15:27
|
||||||
Version : 1.0.0
|
Version : 1.0.0
|
||||||
LastEditors : chenxl
|
LastEditors : chenxl
|
||||||
LastEditTime : 2024-07-27 04:31:03
|
LastEditTime : 2024-07-29 09:40:24
|
||||||
|
Adapted from:
|
||||||
|
https://github.com/Dao-AILab/flash-attention/blob/v2.6.3/setup.py
|
||||||
|
Copyright (c) 2023, Tri Dao.
|
||||||
|
Copyright (c) 2024 by KVCache.AI, All Rights Reserved.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
|
||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
import ast
|
import ast
|
||||||
import subprocess
|
import subprocess
|
||||||
import platform
|
import platform
|
||||||
import io
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from packaging.version import parse
|
from packaging.version import parse
|
||||||
import torch.version
|
import torch.version
|
||||||
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
|
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
|
||||||
from setuptools import setup, Extension
|
from setuptools import setup, Extension
|
||||||
import torch
|
|
||||||
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME
|
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME
|
||||||
|
|
||||||
ROOT_DIR = os.path.dirname(__file__)
|
|
||||||
class VersionInfo:
|
class VersionInfo:
|
||||||
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
PACKAGE_NAME = "ktransformers"
|
PACKAGE_NAME = "ktransformers"
|
||||||
|
BASE_WHEEL_URL:str = (
|
||||||
|
"https://github.com/kvcache-ai/ktransformers/releases/download/{tag_name}/{wheel_filename}"
|
||||||
|
)
|
||||||
|
FORCE_BUILD = os.getenv("KTRANSFORMERS_FORCE_BUILD", "FALSE") == "TRUE"
|
||||||
|
|
||||||
def get_cuda_bare_metal_version(self, cuda_dir):
|
def get_cuda_bare_metal_version(self, cuda_dir):
|
||||||
raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True)
|
raw_output = subprocess.check_output(
|
||||||
|
[cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True)
|
||||||
output = raw_output.split()
|
output = raw_output.split()
|
||||||
release_idx = output.index("release") + 1
|
release_idx = output.index("release") + 1
|
||||||
bare_metal_version = parse(output[release_idx].split(",")[0])
|
bare_metal_version = parse(output[release_idx].split(",")[0])
|
||||||
|
@ -52,10 +62,10 @@ class VersionInfo:
|
||||||
|
|
||||||
def get_cpu_instruct(self,):
|
def get_cpu_instruct(self,):
|
||||||
if sys.platform.startswith("linux"):
|
if sys.platform.startswith("linux"):
|
||||||
with open('/proc/cpuinfo', 'r') as cpu_f:
|
with open('/proc/cpuinfo', 'r', encoding="utf-8") as cpu_f:
|
||||||
cpuinfo = cpu_f.read()
|
cpuinfo = cpu_f.read()
|
||||||
|
flags_line = [line for line in cpuinfo.split(
|
||||||
flags_line = [line for line in cpuinfo.split('\n') if line.startswith('flags')][0]
|
'\n') if line.startswith('flags')][0]
|
||||||
flags = flags_line.split(':')[1].strip().split(' ')
|
flags = flags_line.split(':')[1].strip().split(' ')
|
||||||
for flag in flags:
|
for flag in flags:
|
||||||
if 'avx512' in flag:
|
if 'avx512' in flag:
|
||||||
|
@ -63,37 +73,69 @@ class VersionInfo:
|
||||||
for flag in flags:
|
for flag in flags:
|
||||||
if 'avx2' in flag:
|
if 'avx2' in flag:
|
||||||
return 'avx2'
|
return 'avx2'
|
||||||
raise ValueError("Unsupported cpu Instructions: {}".format(flags_line))
|
raise ValueError(
|
||||||
|
"Unsupported cpu Instructions: {}".format(flags_line))
|
||||||
|
else:
|
||||||
|
raise ValueError("Unsupported platform: {}".format(sys.platform))
|
||||||
|
|
||||||
def get_torch_version(self,):
|
def get_torch_version(self,):
|
||||||
torch_version_raw = parse(torch.__version__)
|
torch_version_raw = parse(torch.__version__)
|
||||||
torch_version = f"{torch_version_raw.major}{torch_version_raw.minor}"
|
torch_version = f"{torch_version_raw.major}{torch_version_raw.minor}"
|
||||||
return torch_version
|
return torch_version
|
||||||
|
|
||||||
def get_package_version(self,):
|
def get_flash_version(self,):
|
||||||
version_file = os.path.join(Path(VersionInfo.THIS_DIR), VersionInfo.PACKAGE_NAME, "__init__.py")
|
version_file = os.path.join(
|
||||||
|
Path(VersionInfo.THIS_DIR), VersionInfo.PACKAGE_NAME, "__init__.py")
|
||||||
with open(version_file, "r", encoding="utf-8") as f:
|
with open(version_file, "r", encoding="utf-8") as f:
|
||||||
version_match = re.search(r"^__version__\s*=\s*(.*)$", f.read(), re.MULTILINE)
|
version_match = re.search(
|
||||||
public_version = ast.literal_eval(version_match.group(1))
|
r"^__version__\s*=\s*(.*)$", f.read(), re.MULTILINE)
|
||||||
package_version = f"{str(public_version)}+cu{self.get_cuda_bare_metal_version(CUDA_HOME)}torch{self.get_torch_version()}{self.get_cpu_instruct()}"
|
flash_version = ast.literal_eval(version_match.group(1))
|
||||||
|
return flash_version
|
||||||
|
|
||||||
|
def get_package_version(self, full_version=False):
|
||||||
|
flash_version = self.get_flash_version()
|
||||||
|
package_version = f"{str(flash_version)}+cu{self.get_cuda_bare_metal_version(CUDA_HOME)}torch{self.get_torch_version()}{self.get_cpu_instruct()}"
|
||||||
|
if full_version:
|
||||||
|
return package_version
|
||||||
|
if not VersionInfo.FORCE_BUILD:
|
||||||
|
return str(flash_version)
|
||||||
return package_version
|
return package_version
|
||||||
|
|
||||||
|
|
||||||
class BuildWheelsCommand(_bdist_wheel):
|
class BuildWheelsCommand(_bdist_wheel):
|
||||||
def get_wheel_name(self,):
|
def get_wheel_name(self,):
|
||||||
version_info = VersionInfo()
|
version_info = VersionInfo()
|
||||||
|
package_version = version_info.get_package_version(full_version=True)
|
||||||
|
flash_version = version_info.get_flash_version()
|
||||||
python_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
|
python_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
|
||||||
wheel_filename = f"{VersionInfo.PACKAGE_NAME}-{version_info.get_package_version()}-{python_version}-{python_version}-{version_info.get_platform()}.whl"
|
wheel_filename = f"{VersionInfo.PACKAGE_NAME}-{package_version}-{python_version}-{python_version}-{version_info.get_platform()}.whl"
|
||||||
return wheel_filename
|
wheel_url = VersionInfo.BASE_WHEEL_URL.format(tag_name=f"v{flash_version}", wheel_filename=wheel_filename)
|
||||||
|
return wheel_filename, wheel_url
|
||||||
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
if VersionInfo.FORCE_BUILD:
|
||||||
super().run()
|
super().run()
|
||||||
|
wheel_filename, wheel_url = self.get_wheel_name()
|
||||||
|
print("Guessing wheel URL: ", wheel_url)
|
||||||
|
try:
|
||||||
|
urllib.request.urlretrieve(wheel_url, wheel_filename)
|
||||||
|
# Make the archive
|
||||||
|
# Lifted from the root wheel processing command
|
||||||
|
# https://github.com/pypa/wheel/blob/cf71108ff9f6ffc36978069acb28824b44ae028e/src/wheel/bdist_wheel.py#LL381C9-L381C85
|
||||||
|
if not os.path.exists(self.dist_dir):
|
||||||
|
os.makedirs(self.dist_dir)
|
||||||
|
|
||||||
impl_tag, abi_tag, plat_tag = self.get_tag()
|
impl_tag, abi_tag, plat_tag = self.get_tag()
|
||||||
archive_basename = f"{self.wheel_dist_name}-{impl_tag}-{abi_tag}-{plat_tag}"
|
archive_basename = f"{self.wheel_dist_name}-{impl_tag}-{abi_tag}-{plat_tag}"
|
||||||
|
|
||||||
wheel_path = os.path.join(self.dist_dir, archive_basename + ".whl")
|
wheel_path = os.path.join(self.dist_dir, archive_basename + ".whl")
|
||||||
wheel_name_with_platform = os.path.join(self.dist_dir, self.get_wheel_name())
|
print("Raw wheel path", wheel_path)
|
||||||
os.rename(wheel_path, wheel_name_with_platform)
|
os.rename(wheel_filename, wheel_path)
|
||||||
|
except (urllib.error.HTTPError, urllib.error.URLError):
|
||||||
|
print("Precompiled wheel not found. Building from source...")
|
||||||
|
# If the wheel could not be downloaded, build from source
|
||||||
|
super().run()
|
||||||
|
|
||||||
|
|
||||||
# Convert distutils Windows platform specifiers to CMake -A arguments
|
# Convert distutils Windows platform specifiers to CMake -A arguments
|
||||||
|
@ -104,22 +146,17 @@ PLAT_TO_CMAKE = {
|
||||||
"win-arm64": "ARM64",
|
"win-arm64": "ARM64",
|
||||||
}
|
}
|
||||||
|
|
||||||
class CopyExtension(Extension):
|
|
||||||
def __init__(self, name: str, sourcedir: str = "", copy_file_source="") -> None:
|
|
||||||
super().__init__(name, sources=[])
|
|
||||||
self.sourcedir = os.fspath(Path(sourcedir).resolve())
|
|
||||||
self.source_file = copy_file_source
|
|
||||||
class CMakeExtension(Extension):
|
class CMakeExtension(Extension):
|
||||||
def __init__(self, name: str, sourcedir: str = "") -> None:
|
def __init__(self, name: str, sourcedir: str = "") -> None:
|
||||||
super().__init__(name, sources=[])
|
super().__init__(name, sources=[])
|
||||||
self.sourcedir = os.fspath(Path(sourcedir).resolve() / "ktransformers/ktransformers_ext")
|
self.sourcedir = os.fspath(
|
||||||
|
Path(sourcedir).resolve() / "ktransformers" / "ktransformers_ext")
|
||||||
|
|
||||||
|
|
||||||
class CMakeBuild(BuildExtension):
|
class CMakeBuild(BuildExtension):
|
||||||
|
|
||||||
def build_extension(self, ext) -> None:
|
def build_extension(self, ext) -> None:
|
||||||
if isinstance(ext, CopyExtension):
|
|
||||||
ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name)
|
|
||||||
extdir = ext_fullpath.parent.resolve()
|
|
||||||
shutil.copy(ext.source_file, extdir)
|
|
||||||
return
|
|
||||||
if not isinstance(ext, CMakeExtension):
|
if not isinstance(ext, CMakeExtension):
|
||||||
super().build_extension(ext)
|
super().build_extension(ext)
|
||||||
return
|
return
|
||||||
|
@ -129,7 +166,8 @@ class CMakeBuild(BuildExtension):
|
||||||
# Using this requires trailing slash for auto-detection & inclusion of
|
# Using this requires trailing slash for auto-detection & inclusion of
|
||||||
# auxiliary "native" libs
|
# auxiliary "native" libs
|
||||||
|
|
||||||
debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug
|
debug = int(os.environ.get("DEBUG", 0)
|
||||||
|
) if self.debug is None else self.debug
|
||||||
cfg = "Debug" if debug else "Release"
|
cfg = "Debug" if debug else "Release"
|
||||||
|
|
||||||
# CMake lets you override the generator - we need to check this.
|
# CMake lets you override the generator - we need to check this.
|
||||||
|
@ -146,10 +184,12 @@ class CMakeBuild(BuildExtension):
|
||||||
]
|
]
|
||||||
build_args = []
|
build_args = []
|
||||||
if "CMAKE_ARGS" in os.environ:
|
if "CMAKE_ARGS" in os.environ:
|
||||||
cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item]
|
cmake_args += [
|
||||||
|
item for item in os.environ["CMAKE_ARGS"].split(" ") if item]
|
||||||
|
|
||||||
# In this example, we pass in the version to C++. You might not need to.
|
# In this example, we pass in the version to C++. You might not need to.
|
||||||
cmake_args += [f"-DEXAMPLE_VERSION_INFO={self.distribution.get_version()}"]
|
cmake_args += [
|
||||||
|
f"-DEXAMPLE_VERSION_INFO={self.distribution.get_version()}"]
|
||||||
if self.compiler.compiler_type != "msvc":
|
if self.compiler.compiler_type != "msvc":
|
||||||
if not cmake_generator or cmake_generator == "Ninja":
|
if not cmake_generator or cmake_generator == "Ninja":
|
||||||
try:
|
try:
|
||||||
|
@ -165,7 +205,8 @@ class CMakeBuild(BuildExtension):
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Single config generators are handled "normally"
|
# Single config generators are handled "normally"
|
||||||
single_config = any(x in cmake_generator for x in {"NMake", "Ninja"})
|
single_config = any(
|
||||||
|
x in cmake_generator for x in {"NMake", "Ninja"})
|
||||||
|
|
||||||
# CMake allows an arch-in-generator style for backward compatibility
|
# CMake allows an arch-in-generator style for backward compatibility
|
||||||
contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"})
|
contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"})
|
||||||
|
@ -183,7 +224,8 @@ class CMakeBuild(BuildExtension):
|
||||||
# Cross-compile support for macOS - respect ARCHFLAGS if set
|
# Cross-compile support for macOS - respect ARCHFLAGS if set
|
||||||
archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", ""))
|
archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", ""))
|
||||||
if archs:
|
if archs:
|
||||||
cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))]
|
cmake_args += [
|
||||||
|
"-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))]
|
||||||
|
|
||||||
if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
|
if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
|
||||||
if hasattr(self, "parallel") and self.parallel:
|
if hasattr(self, "parallel") and self.parallel:
|
||||||
|
@ -199,51 +241,16 @@ class CMakeBuild(BuildExtension):
|
||||||
["cmake", "--build", ".", *build_args], cwd=build_temp, check=True
|
["cmake", "--build", ".", *build_args], cwd=build_temp, check=True
|
||||||
)
|
)
|
||||||
|
|
||||||
def read_readme() -> str:
|
|
||||||
p = os.path.join(ROOT_DIR, "README.md")
|
|
||||||
if os.path.isfile(p):
|
|
||||||
return io.open(p, "r", encoding="utf-8").read()
|
|
||||||
else:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="ktransformers",
|
|
||||||
version=VersionInfo().get_package_version(),
|
version=VersionInfo().get_package_version(),
|
||||||
author="KVCache.ai",
|
cmdclass={"bdist_wheel":BuildWheelsCommand ,"build_ext": CMakeBuild},
|
||||||
license="Apache 2.0",
|
|
||||||
description = "KTransformers, pronounced as Quick Transformers, is designed to enhance your Transformers experience with advanced kernel optimizations and placement/parallelism strategies.",
|
|
||||||
long_description=read_readme(),
|
|
||||||
long_description_content_type="text/markdown",
|
|
||||||
cmdclass={"build_ext": CMakeBuild},
|
|
||||||
install_requires = [
|
|
||||||
"torch >= 2.3.0",
|
|
||||||
"transformers == 4.43.2",
|
|
||||||
"fastapi >= 0.111.0",
|
|
||||||
"langchain >= 0.2.0",
|
|
||||||
"blessed >= 1.20.0",
|
|
||||||
"accelerate >= 0.31.0",
|
|
||||||
"sentencepiece >= 0.1.97",
|
|
||||||
"setuptools",
|
|
||||||
"ninja",
|
|
||||||
"wheel",
|
|
||||||
"colorlog",
|
|
||||||
"build",
|
|
||||||
"packaging",
|
|
||||||
"fire"
|
|
||||||
],
|
|
||||||
python_requires=">=3.10",
|
|
||||||
entry_points={
|
|
||||||
"console_scripts": [
|
|
||||||
"ktransformers=ktransformers.server.main:main",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
packages=["ktransformers"],
|
|
||||||
include_package_data=True,
|
|
||||||
ext_modules=[
|
ext_modules=[
|
||||||
|
CMakeExtension("cpuinfer_ext"),
|
||||||
CUDAExtension('KTransformersOps', [
|
CUDAExtension('KTransformersOps', [
|
||||||
'ktransformers/ktransformers_ext/cuda/custom_gguf/dequant.cu',
|
'ktransformers/ktransformers_ext/cuda/custom_gguf/dequant.cu',
|
||||||
'ktransformers/ktransformers_ext/cuda/binding.cpp',
|
'ktransformers/ktransformers_ext/cuda/binding.cpp',
|
||||||
'ktransformers/ktransformers_ext/cuda/gptq_marlin/gptq_marlin.cu',
|
'ktransformers/ktransformers_ext/cuda/gptq_marlin/gptq_marlin.cu'
|
||||||
]),
|
])
|
||||||
CMakeExtension("cpuinfer_ext")]
|
]
|
||||||
)
|
)
|
Loading…
Add table
Reference in a new issue