use compile for gate, slight performance improvement

2025-09-10 06:14:58 +00:00 · 2025-03-14 12:43:28 +00:00 · 2025-03-14 12:43:28 +00:00 · a889288fc1
commit a889288fc1
parent 6c4ed59175
9 changed files with 155 additions and 37 deletions
--- a/ktransformers/operators/linear.py
+++ b/ktransformers/operators/linear.py
@ -477,7 +477,6 @@ class KTransformersLinear(BaseInjectedModule, KLinearBase):
        gguf_loader: GGUFLoader,
        config: PretrainedConfig,
        orig_module: nn.Module,
-        # device: str = "cuda",
        generate_device: str = "cuda",
        generate_op: str| None = "KLinearMarlin",
        prefill_device: str = "cuda",