mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-29 04:09:52 +00:00
refactor(sft): share_backward_bb default True, share_cache_pool auto-derived
- kt_share_backward_bb defaults to True (always saves memory) - kt_share_cache_pool no longer reads from env var; defaults False, auto-set to True by trainer_config_process when gradient checkpointing is enabled Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
020eb929f7
commit
5bfcb5f784
1 changed files with 4 additions and 4 deletions
|
|
@ -64,8 +64,8 @@ class KTConfig:
|
|||
kt_expert_checkpoint_path: str | None = None
|
||||
kt_num_gpu_experts: int | None = None
|
||||
kt_skip_expert_loading: bool | None = None
|
||||
kt_share_backward_bb: bool | None = None
|
||||
kt_share_cache_pool: bool | None = None
|
||||
kt_share_backward_bb: bool | None = None # default True — always saves memory
|
||||
kt_share_cache_pool: bool | None = None # auto-set by trainer_config_process, not user-facing
|
||||
|
||||
# Cache
|
||||
kt_max_cache_depth: int | None = None
|
||||
|
|
@ -117,9 +117,9 @@ class KTConfig:
|
|||
if self.kt_max_cache_depth is None:
|
||||
self.kt_max_cache_depth = _env_int("ACCELERATE_KT_MAX_CACHE_DEPTH", 2)
|
||||
if self.kt_share_backward_bb is None:
|
||||
self.kt_share_backward_bb = _env_bool("ACCELERATE_KT_SHARE_BACKWARD_BB", False)
|
||||
self.kt_share_backward_bb = _env_bool("ACCELERATE_KT_SHARE_BACKWARD_BB", True)
|
||||
if self.kt_share_cache_pool is None:
|
||||
self.kt_share_cache_pool = _env_bool("ACCELERATE_KT_SHARE_CACHE_POOL", False)
|
||||
self.kt_share_cache_pool = False
|
||||
if self.kt_use_lora_experts is None:
|
||||
self.kt_use_lora_experts = _env_bool("ACCELERATE_KT_USE_LORA_EXPERTS", False)
|
||||
if self.kt_lora_expert_num is None:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue