kt-cli enhancement (#1834)

* [feat]: redesign kt run interactive configuration with i18n support - Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port) - Add configuration save/load system (~/.ktransformers/run_configs.yaml) - Add i18n support for kt chat (en/zh translations) - Add universal input validators with auto-retry and Chinese comma support - Add port availability checker with auto-suggestion - Add parser configuration (--tool-call-parser, --reasoning-parser) - Remove tuna command and clean up redundant files - Fix: variable reference bug in run.py, filter to show only MoE models * [feat]: unify model selection UI and enable shared experts fusion by default - Unify kt run model selection table with kt model list display * Add Total size, MoE Size, Repo, and SHA256 status columns * Use consistent formatting and styling * Improve user decision-making with more information - Enable --disable-shared-experts-fusion by default * Change default value from False to True * Users can still override with --enable-shared-experts-fusion * [feat]: improve kt chat with performance metrics and better CJK support - Add performance metrics display after each response * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token) * Accurate input/output token counts using model tokenizer * Fallback to estimation if tokenizer unavailable * Metrics shown in dim style (not prominent) - Fix Chinese character input issues * Replace Prompt.ask() with console.input() for better CJK support * Fixes backspace deletion showing half-characters - Suppress NumPy subnormal warnings * Filter "The value of the smallest subnormal" warnings * Cleaner CLI output on certain hardware environments * [fix]: correct TTFT measurement in kt chat - Move start_time initialization before API call - Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms - Now correctly measures time from request sent to first token received * [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案 * [docs]: 强调推荐使用 Kimi K2.5 作为核心模型，突出企业级推理能力 * [docs]: 添加 Clawdbot 飞书接入教程链接 * [feat]: improve CLI table display, model verification, and chat experience - Add sequence number (#) column to all model tables by default - Filter kt edit to show only MoE GPU models (exclude AMX) - Extend kt model verify to check *.json and *.py files in addition to weights - Fix re-verification bug where repaired files caused false failures - Suppress tokenizer debug output in kt chat token counting * [fix]: fix cpu cores. --------- Co-authored-by: skqliao <skqliao@gmail.com>
2026-04-28 20:00:06 +00:00 · 2026-02-04 16:44:54 +08:00 · 2026-02-04 16:44:54 +08:00 · 56cbd69ac4
commit 56cbd69ac4
parent 4f64665758
23 changed files with 10327 additions and 781 deletions
--- a/kt-kernel/python/cli/utils/model_discovery.py
+++ b/kt-kernel/python/cli/utils/model_discovery.py
@ -0,0 +1,250 @@
+"""
+Model Discovery Utilities
+
+Shared functions for discovering and registering new models across different commands.
+"""
+
+from typing import List, Optional, Tuple
+from pathlib import Path
+from rich.console import Console
+
+from kt_kernel.cli.utils.model_scanner import (
+    discover_models,
+    scan_directory_for_models,
+    ScannedModel,
+)
+from kt_kernel.cli.utils.user_model_registry import UserModelRegistry, UserModel
+
+
+console = Console()
+
+
+def discover_and_register_global(
+    min_size_gb: float = 2.0, max_depth: int = 6, show_progress: bool = True, lang: str = "en"
+) -> Tuple[int, int, List[UserModel]]:
+    """
+    Perform global model discovery and register new models.
+
+    Args:
+        min_size_gb: Minimum model size in GB
+        max_depth: Maximum search depth
+        show_progress: Whether to show progress messages
+        lang: Language for messages ("en" or "zh")
+
+    Returns:
+        Tuple of (total_found, new_found, registered_models)
+    """
+    registry = UserModelRegistry()
+
+    if show_progress:
+        if lang == "zh":
+            console.print("[dim]正在扫描系统中的模型权重，这可能需要30-60秒...[/dim]")
+        else:
+            console.print("[dim]Scanning system for model weights, this may take 30-60 seconds...[/dim]")
+
+    # Global scan
+    all_models = discover_models(mount_points=None, min_size_gb=min_size_gb, max_depth=max_depth)
+
+    # Filter out existing models
+    new_models = []
+    for model in all_models:
+        if not registry.find_by_path(model.path):
+            new_models.append(model)
+
+    # Register new models
+    registered = []
+    for model in new_models:
+        user_model = _create_and_register_model(registry, model)
+        if user_model:
+            registered.append(user_model)
+
+    return len(all_models), len(new_models), registered
+
+
+def discover_and_register_path(
+    path: str,
+    min_size_gb: float = 2.0,
+    existing_paths: Optional[set] = None,
+    show_progress: bool = True,
+    lang: str = "en",
+) -> Tuple[int, int, List[UserModel]]:
+    """
+    Discover models in a specific path and register new ones.
+
+    Args:
+        path: Directory path to scan
+        min_size_gb: Minimum model file size in GB
+        existing_paths: Set of already discovered paths in this session (optional)
+        show_progress: Whether to show progress messages
+        lang: Language for messages ("en" or "zh")
+
+    Returns:
+        Tuple of (total_found, new_found, registered_models)
+    """
+    registry = UserModelRegistry()
+
+    if show_progress:
+        if lang == "zh":
+            console.print(f"[dim]正在扫描 {path}...[/dim]")
+        else:
+            console.print(f"[dim]Scanning {path}...[/dim]")
+
+    # Scan directory
+    model_info = scan_directory_for_models(path, min_file_size_gb=min_size_gb)
+
+    if not model_info:
+        return 0, 0, []
+
+    # Convert to ScannedModel and filter
+    new_models = []
+    for dir_path, (format_type, size_bytes, file_count, files) in model_info.items():
+        # Check if already in registry
+        if registry.find_by_path(dir_path):
+            continue
+
+        # Check if already discovered in this session
+        if existing_paths and dir_path in existing_paths:
+            continue
+
+        model = ScannedModel(
+            path=dir_path, format=format_type, size_bytes=size_bytes, file_count=file_count, files=files
+        )
+        new_models.append(model)
+
+    # Register new models
+    registered = []
+    for model in new_models:
+        user_model = _create_and_register_model(registry, model)
+        if user_model:
+            registered.append(user_model)
+
+    return len(model_info), len(new_models), registered
+
+
+def _create_and_register_model(registry: UserModelRegistry, scanned_model: ScannedModel) -> Optional[UserModel]:
+    """
+    Create a UserModel from ScannedModel and register it.
+
+    Handles name conflicts by suggesting a unique name (e.g., model-2, model-3).
+    Automatically detects repo_id from README.md YAML frontmatter.
+    Automatically detects and caches MoE information for safetensors models.
+
+    Args:
+        registry: UserModelRegistry instance
+        scanned_model: ScannedModel to register
+
+    Returns:
+        Registered UserModel or None if failed
+    """
+    # Use suggest_name to get a unique name (adds -2, -3, etc. if needed)
+    unique_name = registry.suggest_name(scanned_model.folder_name)
+
+    user_model = UserModel(name=unique_name, path=scanned_model.path, format=scanned_model.format)
+
+    # Auto-detect repo_id from README.md (only YAML frontmatter)
+    try:
+        from kt_kernel.cli.utils.repo_detector import detect_repo_for_model
+
+        repo_info = detect_repo_for_model(scanned_model.path)
+        if repo_info:
+            repo_id, repo_type = repo_info
+            user_model.repo_id = repo_id
+            user_model.repo_type = repo_type
+    except Exception:
+        # Silently continue if detection fails
+        pass
+
+    # Auto-detect MoE information for safetensors models
+    if scanned_model.format == "safetensors":
+        try:
+            from kt_kernel.cli.utils.analyze_moe_model import analyze_moe_model
+
+            moe_result = analyze_moe_model(scanned_model.path, use_cache=True)
+            if moe_result and moe_result.get("is_moe"):
+                user_model.is_moe = True
+                user_model.moe_num_experts = moe_result.get("num_experts")
+                user_model.moe_num_experts_per_tok = moe_result.get("num_experts_per_tok")
+            else:
+                user_model.is_moe = False
+        except Exception:
+            # Silently continue if MoE detection fails
+            # is_moe will remain None
+            pass
+
+    try:
+        registry.add_model(user_model)
+        return user_model
+    except Exception:
+        # Should not happen since we used suggest_name, but handle gracefully
+        return None
+
+
+def format_discovery_summary(
+    total_found: int,
+    new_found: int,
+    registered: List[UserModel],
+    lang: str = "en",
+    show_models: bool = True,
+    max_show: int = 10,
+) -> None:
+    """
+    Print formatted discovery summary.
+
+    Args:
+        total_found: Total models found
+        new_found: New models found
+        registered: List of registered UserModel objects
+        lang: Language ("en" or "zh")
+        show_models: Whether to show model list
+        max_show: Maximum models to show
+    """
+    console.print()
+
+    if new_found == 0:
+        if total_found > 0:
+            if lang == "zh":
+                console.print(f"[green]✓[/green] 扫描完成：找到 {total_found} 个模型，所有模型均已在列表中")
+            else:
+                console.print(f"[green]✓[/green] Scan complete: found {total_found} models, all already in the list")
+        else:
+            if lang == "zh":
+                console.print("[yellow]未找到模型[/yellow]")
+            else:
+                console.print("[yellow]No models found[/yellow]")
+        return
+
+    # Show summary
+    if lang == "zh":
+        console.print(f"[green]✓[/green] 扫描完成：找到 {total_found} 个模型，其中 {new_found} 个为新模型")
+    else:
+        console.print(f"[green]✓[/green] Scan complete: found {total_found} models, {new_found} are new")
+
+    # Show registered count
+    if len(registered) > 0:
+        if lang == "zh":
+            console.print(f"[green]✓[/green] 成功添加 {len(registered)} 个新模型到列表")
+        else:
+            console.print(f"[green]✓[/green] Successfully added {len(registered)} new models to list")
+
+    # Show model list
+    if show_models and registered:
+        console.print()
+        if lang == "zh":
+            console.print(f"[dim]新发现的模型（前{max_show}个）:[/dim]")
+        else:
+            console.print(f"[dim]Newly discovered models (first {max_show}):[/dim]")
+
+        for i, model in enumerate(registered[:max_show], 1):
+            # Get size from registry or estimate
+            size_str = "?.? GB"
+            # Try to find the ScannedModel to get size
+            # For now just show name and path
+            console.print(f"  {i}. {model.name} ({model.format})")
+            console.print(f"     [dim]{model.path}[/dim]")
+
+        if len(registered) > max_show:
+            remaining = len(registered) - max_show
+            if lang == "zh":
+                console.print(f"  [dim]... 还有 {remaining} 个新模型[/dim]")
+            else:
+                console.print(f"  [dim]... and {remaining} more new models[/dim]")