mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 06:14:58 +00:00
use compile for gate, slight performance improvement
This commit is contained in:
parent
6c4ed59175
commit
a889288fc1
9 changed files with 155 additions and 37 deletions
|
@ -477,7 +477,6 @@ class KTransformersLinear(BaseInjectedModule, KLinearBase):
|
|||
gguf_loader: GGUFLoader,
|
||||
config: PretrainedConfig,
|
||||
orig_module: nn.Module,
|
||||
# device: str = "cuda",
|
||||
generate_device: str = "cuda",
|
||||
generate_op: str| None = "KLinearMarlin",
|
||||
prefill_device: str = "cuda",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue