support absorb for prefill long context

This commit is contained in:
Atream 2025-02-25 08:52:02 +00:00
parent e9b1216a9a
commit f4c198bd42
8 changed files with 93 additions and 33 deletions

View file

@ -60,6 +60,7 @@
kwargs:
generate_device: "cuda"
prefill_device: "cuda"
absorb_for_prefill: False # change this to True to enable long context(prefill may slower).
- match:
name: "^model$"
replace: