mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 23:34:35 +00:00
add balance-serve, support concurrence
This commit is contained in:
parent
8d0292aa44
commit
25cee5810e
196 changed files with 22077 additions and 565 deletions
25
csrc/custom_marlin/setup.py
Normal file
25
csrc/custom_marlin/setup.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
from setuptools import setup, Extension
|
||||
from torch.utils import cpp_extension
|
||||
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
|
||||
setup(
|
||||
name='vLLMMarlin',
|
||||
ext_modules=[
|
||||
CUDAExtension(
|
||||
'vLLMMarlin', [
|
||||
#'custom_gguf/dequant.cu',
|
||||
'binding.cpp',
|
||||
'gptq_marlin/gptq_marlin.cu',
|
||||
'gptq_marlin/gptq_marlin_repack.cu',
|
||||
],
|
||||
extra_compile_args={
|
||||
'cxx': ['-O3'],
|
||||
'nvcc': [
|
||||
'-O3',
|
||||
'--use_fast_math',
|
||||
'-Xcompiler', '-fPIC',
|
||||
]
|
||||
},
|
||||
)
|
||||
],
|
||||
cmdclass={'build_ext': BuildExtension}
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue