mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-09 13:55:27 +00:00
[ADD] support multi-gpu qlen>1 q5_k
This commit is contained in:
parent
f293803156
commit
f5f79f5c0e
63 changed files with 3271 additions and 1285 deletions
|
@ -11,7 +11,7 @@ from ktransformers.operators.linear import KTransformerLinear, QuantizedLinearMa
|
|||
from ktransformers.operators.experts import KTransformersMLPExpert, MLPExpertsTorch
|
||||
from ktransformers.util.custom_gguf import GGUFLoader, dequantize_q4_k_gpu, dequantize_q4_k
|
||||
import torch
|
||||
import CudaOps
|
||||
import KTransformersOps
|
||||
torch.set_default_dtype(torch.bfloat16)
|
||||
import time
|
||||
from transformers import (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue