mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-09 13:55:27 +00:00
Enable support for Intel XPU devices, add support for DeepSeek V2/V3 first
This commit is contained in:
parent
333351c7c8
commit
142fb7ce6c
22 changed files with 673 additions and 81 deletions
54
setup.py
54
setup.py
|
@ -39,7 +39,8 @@ try:
|
|||
from torch_musa.utils.musa_extension import BuildExtension, MUSAExtension, MUSA_HOME
|
||||
except ImportError:
|
||||
MUSA_HOME=None
|
||||
|
||||
KTRANSFORMERS_BUILD_XPU = torch.xpu.is_available()
|
||||
|
||||
with_balance = os.environ.get("USE_BALANCE_SERVE", "0") == "1"
|
||||
|
||||
class CpuInstructInfo:
|
||||
|
@ -225,6 +226,8 @@ class VersionInfo:
|
|||
backend_version = f"mu{self.get_musa_bare_metal_version(MUSA_HOME)}"
|
||||
elif ROCM_HOME is not None:
|
||||
backend_version = f"rocm{self.get_rocm_bare_metal_version(ROCM_HOME)}"
|
||||
elif torch.xpu.is_available():
|
||||
backend_version = f"xpu"
|
||||
else:
|
||||
raise ValueError("Unsupported backend: CUDA_HOME MUSA_HOME ROCM_HOME all not set.")
|
||||
package_version = f"{flash_version}+{backend_version}torch{torch_version}{cpu_instruct}"
|
||||
|
@ -495,6 +498,8 @@ class CMakeBuild(BuildExtension):
|
|||
cmake_args += ["-DKTRANSFORMERS_USE_MUSA=ON"]
|
||||
elif ROCM_HOME is not None:
|
||||
cmake_args += ["-DKTRANSFORMERS_USE_ROCM=ON"]
|
||||
elif KTRANSFORMERS_BUILD_XPU:
|
||||
cmake_args += ["-DKTRANSFORMERS_USE_XPU=ON", "-DKTRANSFORMERS_USE_CUDA=OFF"]
|
||||
else:
|
||||
raise ValueError("Unsupported backend: CUDA_HOME, MUSA_HOME, and ROCM_HOME are not set.")
|
||||
|
||||
|
@ -620,29 +625,36 @@ elif MUSA_HOME is not None:
|
|||
]
|
||||
}
|
||||
)
|
||||
elif torch.xpu.is_available(): #XPUExtension is not available now.
|
||||
ops_module = None
|
||||
else:
|
||||
raise ValueError("Unsupported backend: CUDA_HOME and MUSA_HOME are not set.")
|
||||
|
||||
ext_modules = [
|
||||
CMakeExtension("cpuinfer_ext", os.fspath(Path("").resolve() / "csrc" / "ktransformers_ext")),
|
||||
ops_module,
|
||||
CUDAExtension(
|
||||
'vLLMMarlin', [
|
||||
'csrc/custom_marlin/binding.cpp',
|
||||
'csrc/custom_marlin/gptq_marlin/gptq_marlin.cu',
|
||||
'csrc/custom_marlin/gptq_marlin/gptq_marlin_repack.cu',
|
||||
],
|
||||
extra_compile_args={
|
||||
'cxx': ['-O3'],
|
||||
'nvcc': ['-O3', '-Xcompiler', '-fPIC'],
|
||||
},
|
||||
)
|
||||
]
|
||||
if with_balance:
|
||||
print("using balance_serve")
|
||||
ext_modules.append(
|
||||
CMakeExtension("balance_serve", os.fspath(Path("").resolve()/ "csrc"/ "balance_serve"))
|
||||
)
|
||||
if not torch.xpu.is_available():
|
||||
ext_modules = [
|
||||
CMakeExtension("cpuinfer_ext", os.fspath(Path("").resolve() / "csrc" / "ktransformers_ext")),
|
||||
ops_module,
|
||||
CUDAExtension(
|
||||
'vLLMMarlin', [
|
||||
'csrc/custom_marlin/binding.cpp',
|
||||
'csrc/custom_marlin/gptq_marlin/gptq_marlin.cu',
|
||||
'csrc/custom_marlin/gptq_marlin/gptq_marlin_repack.cu',
|
||||
],
|
||||
extra_compile_args={
|
||||
'cxx': ['-O3'],
|
||||
'nvcc': ['-O3', '-Xcompiler', '-fPIC'],
|
||||
},
|
||||
)
|
||||
]
|
||||
if with_balance:
|
||||
print("using balance_serve")
|
||||
ext_modules.append(
|
||||
CMakeExtension("balance_serve", os.fspath(Path("").resolve()/ "csrc"/ "balance_serve"))
|
||||
)
|
||||
else:
|
||||
ext_modules = [
|
||||
CMakeExtension("cpuinfer_ext", os.fspath(Path("").resolve() / "csrc" / "ktransformers_ext")),
|
||||
]
|
||||
|
||||
setup(
|
||||
name=VersionInfo.PACKAGE_NAME,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue