mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-09 22:05:30 +00:00
Initial commit
This commit is contained in:
commit
18c42e67df
247 changed files with 53775 additions and 0 deletions
249
setup.py
Normal file
249
setup.py
Normal file
|
@ -0,0 +1,249 @@
|
|||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
Description :
|
||||
Author : chenxl
|
||||
Date : 2024-07-12 07:25:42
|
||||
Version : 1.0.0
|
||||
LastEditors : chenxl
|
||||
LastEditTime : 2024-07-27 04:31:03
|
||||
'''
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import re
|
||||
import ast
|
||||
import subprocess
|
||||
import platform
|
||||
import io
|
||||
from pathlib import Path
|
||||
from packaging.version import parse
|
||||
import torch.version
|
||||
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
|
||||
from setuptools import setup, Extension
|
||||
import torch
|
||||
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME
|
||||
|
||||
ROOT_DIR = os.path.dirname(__file__)
|
||||
class VersionInfo:
|
||||
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
PACKAGE_NAME = "ktransformers"
|
||||
def get_cuda_bare_metal_version(self, cuda_dir):
|
||||
raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True)
|
||||
output = raw_output.split()
|
||||
release_idx = output.index("release") + 1
|
||||
bare_metal_version = parse(output[release_idx].split(",")[0])
|
||||
cuda_version = f"{bare_metal_version.major}{bare_metal_version.minor}"
|
||||
return cuda_version
|
||||
|
||||
def get_cuda_version_of_torch(self,):
|
||||
torch_cuda_version = parse(torch.version.cuda)
|
||||
cuda_version = f"{torch_cuda_version.major}{torch_cuda_version.minor}"
|
||||
return cuda_version
|
||||
|
||||
def get_platform(self,):
|
||||
"""
|
||||
Returns the platform name as used in wheel filenames.
|
||||
"""
|
||||
if sys.platform.startswith("linux"):
|
||||
return f'linux_{platform.uname().machine}'
|
||||
else:
|
||||
raise ValueError("Unsupported platform: {}".format(sys.platform))
|
||||
|
||||
def get_cpu_instruct(self,):
|
||||
if sys.platform.startswith("linux"):
|
||||
with open('/proc/cpuinfo', 'r') as cpu_f:
|
||||
cpuinfo = cpu_f.read()
|
||||
|
||||
flags_line = [line for line in cpuinfo.split('\n') if line.startswith('flags')][0]
|
||||
flags = flags_line.split(':')[1].strip().split(' ')
|
||||
for flag in flags:
|
||||
if 'avx512' in flag:
|
||||
return 'avx512'
|
||||
for flag in flags:
|
||||
if 'avx2' in flag:
|
||||
return 'avx2'
|
||||
raise ValueError("Unsupported cpu Instructions: {}".format(flags_line))
|
||||
|
||||
def get_torch_version(self,):
|
||||
torch_version_raw = parse(torch.__version__)
|
||||
torch_version = f"{torch_version_raw.major}{torch_version_raw.minor}"
|
||||
return torch_version
|
||||
|
||||
def get_package_version(self,):
|
||||
version_file = os.path.join(Path(VersionInfo.THIS_DIR), VersionInfo.PACKAGE_NAME, "__init__.py")
|
||||
with open(version_file, "r", encoding="utf-8") as f:
|
||||
version_match = re.search(r"^__version__\s*=\s*(.*)$", f.read(), re.MULTILINE)
|
||||
public_version = ast.literal_eval(version_match.group(1))
|
||||
package_version = f"{str(public_version)}+cu{self.get_cuda_bare_metal_version(CUDA_HOME)}torch{self.get_torch_version()}{self.get_cpu_instruct()}"
|
||||
return package_version
|
||||
|
||||
|
||||
class BuildWheelsCommand(_bdist_wheel):
|
||||
def get_wheel_name(self,):
|
||||
version_info = VersionInfo()
|
||||
python_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
|
||||
wheel_filename = f"{VersionInfo.PACKAGE_NAME}-{version_info.get_package_version()}-{python_version}-{python_version}-{version_info.get_platform()}.whl"
|
||||
return wheel_filename
|
||||
|
||||
|
||||
def run(self):
|
||||
super().run()
|
||||
impl_tag, abi_tag, plat_tag = self.get_tag()
|
||||
archive_basename = f"{self.wheel_dist_name}-{impl_tag}-{abi_tag}-{plat_tag}"
|
||||
wheel_path = os.path.join(self.dist_dir, archive_basename + ".whl")
|
||||
wheel_name_with_platform = os.path.join(self.dist_dir, self.get_wheel_name())
|
||||
os.rename(wheel_path, wheel_name_with_platform)
|
||||
|
||||
|
||||
# Convert distutils Windows platform specifiers to CMake -A arguments
|
||||
PLAT_TO_CMAKE = {
|
||||
"win32": "Win32",
|
||||
"win-amd64": "x64",
|
||||
"win-arm32": "ARM",
|
||||
"win-arm64": "ARM64",
|
||||
}
|
||||
|
||||
class CopyExtension(Extension):
|
||||
def __init__(self, name: str, sourcedir: str = "", copy_file_source="") -> None:
|
||||
super().__init__(name, sources=[])
|
||||
self.sourcedir = os.fspath(Path(sourcedir).resolve())
|
||||
self.source_file = copy_file_source
|
||||
class CMakeExtension(Extension):
|
||||
def __init__(self, name: str, sourcedir: str = "") -> None:
|
||||
super().__init__(name, sources=[])
|
||||
self.sourcedir = os.fspath(Path(sourcedir).resolve() / "ktransformers/ktransformers_ext")
|
||||
class CMakeBuild(BuildExtension):
|
||||
def build_extension(self, ext) -> None:
|
||||
if isinstance(ext, CopyExtension):
|
||||
ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name)
|
||||
extdir = ext_fullpath.parent.resolve()
|
||||
shutil.copy(ext.source_file, extdir)
|
||||
return
|
||||
if not isinstance(ext, CMakeExtension):
|
||||
super().build_extension(ext)
|
||||
return
|
||||
ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name)
|
||||
extdir = ext_fullpath.parent.resolve()
|
||||
|
||||
# Using this requires trailing slash for auto-detection & inclusion of
|
||||
# auxiliary "native" libs
|
||||
|
||||
debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug
|
||||
cfg = "Debug" if debug else "Release"
|
||||
|
||||
# CMake lets you override the generator - we need to check this.
|
||||
# Can be set with Conda-Build, for example.
|
||||
cmake_generator = os.environ.get("CMAKE_GENERATOR", "")
|
||||
|
||||
# Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON
|
||||
# EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code
|
||||
# from Python.
|
||||
cmake_args = [
|
||||
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}",
|
||||
f"-DPYTHON_EXECUTABLE={sys.executable}",
|
||||
f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm
|
||||
]
|
||||
build_args = []
|
||||
if "CMAKE_ARGS" in os.environ:
|
||||
cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item]
|
||||
|
||||
# In this example, we pass in the version to C++. You might not need to.
|
||||
cmake_args += [f"-DEXAMPLE_VERSION_INFO={self.distribution.get_version()}"]
|
||||
if self.compiler.compiler_type != "msvc":
|
||||
if not cmake_generator or cmake_generator == "Ninja":
|
||||
try:
|
||||
import ninja
|
||||
|
||||
ninja_executable_path = Path(ninja.BIN_DIR) / "ninja"
|
||||
cmake_args += [
|
||||
"-GNinja",
|
||||
f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}",
|
||||
]
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
else:
|
||||
# Single config generators are handled "normally"
|
||||
single_config = any(x in cmake_generator for x in {"NMake", "Ninja"})
|
||||
|
||||
# CMake allows an arch-in-generator style for backward compatibility
|
||||
contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"})
|
||||
if not single_config and not contains_arch:
|
||||
cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]]
|
||||
|
||||
# Multi-config generators have a different way to specify configs
|
||||
if not single_config:
|
||||
cmake_args += [
|
||||
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"
|
||||
]
|
||||
build_args += ["--config", cfg]
|
||||
|
||||
if sys.platform.startswith("darwin"):
|
||||
# Cross-compile support for macOS - respect ARCHFLAGS if set
|
||||
archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", ""))
|
||||
if archs:
|
||||
cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))]
|
||||
|
||||
if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
|
||||
if hasattr(self, "parallel") and self.parallel:
|
||||
build_args += [f"-j{self.parallel}"]
|
||||
|
||||
build_temp = Path(ext.sourcedir) / "build"
|
||||
if not build_temp.exists():
|
||||
build_temp.mkdir(parents=True)
|
||||
subprocess.run(
|
||||
["cmake", ext.sourcedir, *cmake_args], cwd=build_temp, check=True
|
||||
)
|
||||
subprocess.run(
|
||||
["cmake", "--build", ".", *build_args], cwd=build_temp, check=True
|
||||
)
|
||||
|
||||
def read_readme() -> str:
|
||||
p = os.path.join(ROOT_DIR, "README.md")
|
||||
if os.path.isfile(p):
|
||||
return io.open(p, "r", encoding="utf-8").read()
|
||||
else:
|
||||
return ""
|
||||
|
||||
setup(
|
||||
name="ktransformers",
|
||||
version=VersionInfo().get_package_version(),
|
||||
author="KVCache.ai",
|
||||
license="Apache 2.0",
|
||||
description = "KTransformers, pronounced as Quick Transformers, is designed to enhance your Transformers experience with advanced kernel optimizations and placement/parallelism strategies.",
|
||||
long_description=read_readme(),
|
||||
long_description_content_type="text/markdown",
|
||||
cmdclass={"build_ext": CMakeBuild},
|
||||
install_requires = [
|
||||
"torch >= 2.3.0",
|
||||
"transformers == 4.43.2",
|
||||
"fastapi >= 0.111.0",
|
||||
"langchain >= 0.2.0",
|
||||
"blessed >= 1.20.0",
|
||||
"accelerate >= 0.31.0",
|
||||
"sentencepiece >= 0.1.97",
|
||||
"setuptools",
|
||||
"ninja",
|
||||
"wheel",
|
||||
"colorlog",
|
||||
"build",
|
||||
"packaging",
|
||||
"fire"
|
||||
],
|
||||
python_requires=">=3.10",
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"ktransformers=ktransformers.server.main:main",
|
||||
],
|
||||
},
|
||||
packages=["ktransformers"],
|
||||
include_package_data=True,
|
||||
ext_modules=[
|
||||
CUDAExtension('KTransformersOps', [
|
||||
'ktransformers/ktransformers_ext/cuda/custom_gguf/dequant.cu',
|
||||
'ktransformers/ktransformers_ext/cuda/binding.cpp',
|
||||
'ktransformers/ktransformers_ext/cuda/gptq_marlin/gptq_marlin.cu',
|
||||
]),
|
||||
CMakeExtension("cpuinfer_ext")]
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue