[feat](kt-kernel): Add resume arg to CPU weight conversion (#1630)
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled

* [feat]: kt-kernel: Add resume arg to CPU weight conversion

* [docs]: kt-kernel: Document resume arg for CPU weight conversion

* [fix]: kt-kernel: Only print resume layer if in use

* [fix]: kt-kernel: Don't log skipped layers when using resume_layer
This commit is contained in:
DocShotgun 2025-11-21 20:00:15 -08:00 committed by GitHub
parent e69c67713f
commit e72a4fb880
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 32 additions and 3 deletions

View file

@ -330,11 +330,18 @@ class ConverterBase:
"""
raise NotImplementedError("Subclasses must implement _convert_layer_experts")
def convert(self):
"""Convert all expert layers using subclass-specific logic."""
def convert(self, resume_layer: int = 0):
"""Convert all expert layers using subclass-specific logic.
Args:
resume_layer (int, optional): The layer index to resume conversion from.
Layers with an index lower than this will be skipped. Defaults to 0.
"""
print("Starting conversion...")
print(f"Input: {self.input_path}")
print(f"Output: {self.output_path}")
if resume_layer > 0:
print(f"Resuming from layer: {resume_layer}")
# Create output directory
os.makedirs(self.output_path, exist_ok=True)
@ -355,6 +362,8 @@ class ConverterBase:
# Process layers with memory cleanup
for i, (layer_idx, expert_ids) in enumerate(sorted(expert_layers.items())):
if layer_idx < resume_layer:
continue
print(f"Processing layer {layer_idx} ({i+1}/{len(expert_layers)})...")
layer_tensors = self._convert_layer_experts(layer_idx, expert_ids)
@ -840,6 +849,12 @@ def main():
default=False,
help="Keep layer folders without merging to safetensor files (default: False)",
)
parser.add_argument(
"--resume-layer",
type=int,
default=0,
help="Resume conversion starting at this layer index (default: 0)",
)
args = parser.parse_args()
@ -893,7 +908,7 @@ def main():
)
# Run conversion
converter.convert()
converter.convert(resume_layer=args.resume_layer)
# Cleanup
converter.close()