[ADD] support multi-gpu qlen>1 q5_k

This commit is contained in:
chenxl 2024-08-12 11:17:29 +00:00
parent f293803156
commit f5f79f5c0e
63 changed files with 3271 additions and 1285 deletions

View file

@ -2,17 +2,25 @@
from setuptools import setup, Extension
from torch.utils import cpp_extension
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
# setup marlin gemm
setup(name='KTransformersOps',
ext_modules=[
CUDAExtension('KTransformersOps', [
setup(
name='KTransformersOps',
ext_modules=[
CUDAExtension(
'KTransformersOps', [
'custom_gguf/dequant.cu',
'binding.cpp',
'gptq_marlin/gptq_marlin.cu',
# 'gptq_marlin_repack.cu',
])
],
cmdclass={'build_ext': BuildExtension
})
],
extra_compile_args={
'cxx': ['-O3'],
'nvcc': [
'-O3',
'--use_fast_math',
'-Xcompiler', '-fPIC',
]
},
)
],
cmdclass={'build_ext': BuildExtension}
)