mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 06:14:58 +00:00
[ADD] support multi-gpu qlen>1 q5_k
This commit is contained in:
parent
f293803156
commit
f5f79f5c0e
63 changed files with 3271 additions and 1285 deletions
|
@ -2,17 +2,25 @@
|
|||
from setuptools import setup, Extension
|
||||
from torch.utils import cpp_extension
|
||||
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
|
||||
|
||||
# setup marlin gemm
|
||||
setup(name='KTransformersOps',
|
||||
ext_modules=[
|
||||
CUDAExtension('KTransformersOps', [
|
||||
setup(
|
||||
name='KTransformersOps',
|
||||
ext_modules=[
|
||||
CUDAExtension(
|
||||
'KTransformersOps', [
|
||||
'custom_gguf/dequant.cu',
|
||||
'binding.cpp',
|
||||
'gptq_marlin/gptq_marlin.cu',
|
||||
# 'gptq_marlin_repack.cu',
|
||||
])
|
||||
],
|
||||
cmdclass={'build_ext': BuildExtension
|
||||
})
|
||||
|
||||
],
|
||||
extra_compile_args={
|
||||
'cxx': ['-O3'],
|
||||
'nvcc': [
|
||||
'-O3',
|
||||
'--use_fast_math',
|
||||
'-Xcompiler', '-fPIC',
|
||||
]
|
||||
},
|
||||
)
|
||||
],
|
||||
cmdclass={'build_ext': BuildExtension}
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue