kt-cli enhancement (#1834)

* [feat]: redesign kt run interactive configuration with i18n support - Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port) - Add configuration save/load system (~/.ktransformers/run_configs.yaml) - Add i18n support for kt chat (en/zh translations) - Add universal input validators with auto-retry and Chinese comma support - Add port availability checker with auto-suggestion - Add parser configuration (--tool-call-parser, --reasoning-parser) - Remove tuna command and clean up redundant files - Fix: variable reference bug in run.py, filter to show only MoE models * [feat]: unify model selection UI and enable shared experts fusion by default - Unify kt run model selection table with kt model list display * Add Total size, MoE Size, Repo, and SHA256 status columns * Use consistent formatting and styling * Improve user decision-making with more information - Enable --disable-shared-experts-fusion by default * Change default value from False to True * Users can still override with --enable-shared-experts-fusion * [feat]: improve kt chat with performance metrics and better CJK support - Add performance metrics display after each response * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token) * Accurate input/output token counts using model tokenizer * Fallback to estimation if tokenizer unavailable * Metrics shown in dim style (not prominent) - Fix Chinese character input issues * Replace Prompt.ask() with console.input() for better CJK support * Fixes backspace deletion showing half-characters - Suppress NumPy subnormal warnings * Filter "The value of the smallest subnormal" warnings * Cleaner CLI output on certain hardware environments * [fix]: correct TTFT measurement in kt chat - Move start_time initialization before API call - Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms - Now correctly measures time from request sent to first token received * [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案 * [docs]: 强调推荐使用 Kimi K2.5 作为核心模型，突出企业级推理能力 * [docs]: 添加 Clawdbot 飞书接入教程链接 * [feat]: improve CLI table display, model verification, and chat experience - Add sequence number (#) column to all model tables by default - Filter kt edit to show only MoE GPU models (exclude AMX) - Extend kt model verify to check *.json and *.py files in addition to weights - Fix re-verification bug where repaired files caused false failures - Suppress tokenizer debug output in kt chat token counting * [fix]: fix cpu cores. --------- Co-authored-by: skqliao <skqliao@gmail.com>
2026-04-28 20:00:06 +00:00 · 2026-02-04 16:44:54 +08:00 · 2026-02-04 16:44:54 +08:00 · 56cbd69ac4
commit 56cbd69ac4
parent 4f64665758
23 changed files with 10327 additions and 781 deletions
--- a/kt-kernel/python/cli/main.py
+++ b/kt-kernel/python/cli/main.py
@ -5,6 +5,10 @@ KTransformers CLI - A unified command-line interface for KTransformers.
 """

 import sys
+import warnings
+
+# Suppress numpy subnormal warnings
+warnings.filterwarnings("ignore", message="The value of the smallest subnormal")

 import typer

@ -28,6 +32,7 @@ def _get_help(key: str) -> str:
        "run": {"en": "Start model inference server", "zh": "启动模型推理服务器"},
        "chat": {"en": "Interactive chat with running model", "zh": "与运行中的模型进行交互式聊天"},
        "quant": {"en": "Quantize model weights", "zh": "量化模型权重"},
+        "edit": {"en": "Edit model information", "zh": "编辑模型信息"},
        "bench": {"en": "Run full benchmark", "zh": "运行完整基准测试"},
        "microbench": {"en": "Run micro-benchmark", "zh": "运行微基准测试"},
        "doctor": {"en": "Diagnose environment issues", "zh": "诊断环境问题"},
@ -43,7 +48,7 @@ def _get_help(key: str) -> str:
 app = typer.Typer(
    name="kt",
    help="KTransformers CLI - A unified command-line interface for KTransformers.",
-    no_args_is_help=True,
+    no_args_is_help=False,  # Handle no-args case manually to support first-run setup
    add_completion=False,  # Use static completion scripts instead of dynamic completion
    rich_markup_mode="rich",
 )
@ -66,20 +71,7 @@ def _update_help_texts() -> None:
            group_info.help = _get_help(group_info.name)


-# Register commands
-app.command(name="version", help="Show version information")(version.version)
-# Run command is handled specially in main() to allow extra args
-# (not registered here to avoid typer's argument parsing)
-app.command(name="chat", help="Interactive chat with running model")(chat.chat)
-app.command(name="quant", help="Quantize model weights")(quant.quant)
-app.command(name="bench", help="Run full benchmark")(bench.bench)
-app.command(name="microbench", help="Run micro-benchmark")(bench.microbench)
-app.command(name="doctor", help="Diagnose environment issues")(doctor.doctor)
-
-# Register sub-apps
-app.add_typer(model.app, name="model", help="Manage models and storage paths")
-app.add_typer(config.app, name="config", help="Manage configuration")
-app.add_typer(sft.app, name="sft", help="Fine-tuning with LlamaFactory")
+# Commands are registered later after tui_command is defined


 def check_first_run() -> None:
@ -116,7 +108,7 @@ def _show_first_run_setup(settings) -> None:
    from rich.spinner import Spinner
    from rich.live import Live

-    from kt_kernel.cli.utils.environment import scan_storage_locations, format_size_gb, scan_models_in_location
+    from kt_kernel.cli.utils.environment import scan_storage_locations, format_size_gb

    console = Console()

@ -140,15 +132,8 @@ def _show_first_run_setup(settings) -> None:
    console.print("  [cyan][2][/cyan] 中文 (Chinese)")
    console.print()

-    while True:
-        choice = Prompt.ask("Enter choice / 输入选择", choices=["1", "2"], default="1")
-
-        if choice == "1":
-            lang = "en"
-            break
-        elif choice == "2":
-            lang = "zh"
-            break
+    choice = Prompt.ask("Enter choice / 输入选择", choices=["1", "2"], default="1")
+    lang = "en" if choice == "1" else "zh"

    # Save language setting
    settings.set("general.language", lang)
@ -161,6 +146,131 @@ def _show_first_run_setup(settings) -> None:
    else:
        console.print("[green]✓[/green] Language set to English")

+    # Model discovery section
+    console.print()
+    if lang == "zh":
+        console.print("[bold]发现模型权重[/bold]")
+        console.print()
+        console.print("[dim]扫描系统中已有的模型权重文件，以便快速添加到模型列表。[/dim]")
+        console.print()
+        console.print("  [cyan][1][/cyan] 全局扫描 (自动扫描所有非系统路径)")
+        console.print("  [cyan][2][/cyan] 手动指定路径 (可添加多个)")
+        console.print("  [cyan][3][/cyan] 跳过 (稍后手动添加)")
+        console.print()
+        scan_choice = Prompt.ask("选择扫描方式", choices=["1", "2", "3"], default="1")
+    else:
+        console.print("[bold]Discover Model Weights[/bold]")
+        console.print()
+        console.print("[dim]Scan existing model weights on your system to quickly add them to the model list.[/dim]")
+        console.print()
+        console.print("  [cyan][1][/cyan] Global scan (auto-scan all non-system paths)")
+        console.print("  [cyan][2][/cyan] Manual paths (add multiple paths)")
+        console.print("  [cyan][3][/cyan] Skip (add manually later)")
+        console.print()
+        scan_choice = Prompt.ask("Select scan method", choices=["1", "2", "3"], default="1")
+
+    if scan_choice == "1":
+        # Global scan
+        from kt_kernel.cli.utils.model_discovery import discover_and_register_global, format_discovery_summary
+
+        console.print()
+        try:
+            total_found, new_found, registered = discover_and_register_global(
+                min_size_gb=2.0, max_depth=6, show_progress=True, lang=lang
+            )
+
+            format_discovery_summary(
+                total_found=total_found,
+                new_found=new_found,
+                registered=registered,
+                lang=lang,
+                show_models=True,
+                max_show=10,
+            )
+
+        except Exception as e:
+            console.print(f"[yellow]Warning: Scan failed - {e}[/yellow]")
+
+    elif scan_choice == "2":
+        # Manual path specification
+        from kt_kernel.cli.utils.model_discovery import discover_and_register_path
+        import os
+
+        discovered_paths = set()  # Track paths discovered in this session
+        total_registered = []
+
+        while True:
+            console.print()
+            if lang == "zh":
+                path = Prompt.ask("输入要扫描的路径 (例如: /mnt/data/models)")
+            else:
+                path = Prompt.ask("Enter path to scan (e.g., /mnt/data/models)")
+
+            # Expand and validate path
+            path = os.path.expanduser(path)
+
+            if not os.path.exists(path):
+                if lang == "zh":
+                    console.print(f"[yellow]警告: 路径不存在: {path}[/yellow]")
+                else:
+                    console.print(f"[yellow]Warning: Path does not exist: {path}[/yellow]")
+                continue
+
+            if not os.path.isdir(path):
+                if lang == "zh":
+                    console.print(f"[yellow]警告: 不是一个目录: {path}[/yellow]")
+                else:
+                    console.print(f"[yellow]Warning: Not a directory: {path}[/yellow]")
+                continue
+
+            # Scan this path
+            console.print()
+            try:
+                total_found, new_found, registered = discover_and_register_path(
+                    path=path, min_size_gb=2.0, existing_paths=discovered_paths, show_progress=True, lang=lang
+                )
+
+                # Update discovered paths
+                for model in registered:
+                    discovered_paths.add(model.path)
+                total_registered.extend(registered)
+
+                console.print()
+                if lang == "zh":
+                    console.print(f"[green]✓[/green] 在此路径找到 {total_found} 个模型，其中 {new_found} 个为新模型")
+                else:
+                    console.print(f"[green]✓[/green] Found {total_found} models in this path, {new_found} are new")
+
+                if new_found > 0:
+                    for model in registered[:5]:
+                        console.print(f"  • {model.name} ({model.format})")
+
+                    if len(registered) > 5:
+                        if lang == "zh":
+                            console.print(f"  [dim]... 还有 {len(registered) - 5} 个新模型[/dim]")
+                        else:
+                            console.print(f"  [dim]... and {len(registered) - 5} more new models[/dim]")
+
+            except Exception as e:
+                console.print(f"[red]Error scanning path: {e}[/red]")
+
+            # Ask if continue
+            console.print()
+            if lang == "zh":
+                continue_scan = Confirm.ask("是否继续添加其他路径?", default=False)
+            else:
+                continue_scan = Confirm.ask("Continue adding more paths?", default=False)
+
+            if not continue_scan:
+                break
+
+        if total_registered:
+            console.print()
+            if lang == "zh":
+                console.print(f"[green]✓[/green] 总共发现 {len(total_registered)} 个新模型")
+            else:
+                console.print(f"[green]✓[/green] Total {len(total_registered)} new models discovered")
+
    # Model storage path selection
    console.print()
    console.print(f"[bold]{t('setup_model_path_title')}[/bold]")
@ -174,16 +284,7 @@ def _show_first_run_setup(settings) -> None:
    console.print()

    if locations:
-        # Scan for models in each location
-        console.print(f"[dim]{t('setup_scanning_models')}[/dim]")
-        location_models: dict[str, list] = {}
-        for loc in locations[:5]:
-            models = scan_models_in_location(loc, max_depth=2)
-            if models:
-                location_models[loc.path] = models
-        console.print()
-
-        # Show options
+        # Show storage location options
        for i, loc in enumerate(locations[:5], 1):  # Show top 5 options
            available = format_size_gb(loc.available_gb)
            total = format_size_gb(loc.total_gb)
@ -194,22 +295,8 @@ def _show_first_run_setup(settings) -> None:
            else:
                option_str = t("setup_disk_option", path=loc.path, available=available, total=total)

-            # Add model count if any
-            if loc.path in location_models:
-                model_count = len(location_models[loc.path])
-                option_str += f" [green]✓ {t('setup_location_has_models', count=model_count)}[/green]"
-
            console.print(f"  [cyan][{i}][/cyan] {option_str}")

-            # Show first few models found in this location
-            if loc.path in location_models:
-                for model in location_models[loc.path][:3]:  # Show up to 3 models
-                    size_str = format_size_gb(model.size_gb)
-                    console.print(f"      [dim]• {model.name} ({size_str})[/dim]")
-                if len(location_models[loc.path]) > 3:
-                    remaining = len(location_models[loc.path]) - 3
-                    console.print(f"      [dim]  ... +{remaining} more[/dim]")
-
        # Custom path option
        custom_idx = min(len(locations), 5) + 1
        console.print(f"  [cyan][{custom_idx}][/cyan] {t('setup_custom_path')}")
@ -323,51 +410,28 @@ def _install_shell_completion() -> None:

    # Detect current shell
    shell = os.environ.get("SHELL", "")
-    if "zsh" in shell:
-        shell_name = "zsh"
-    elif "fish" in shell:
-        shell_name = "fish"
-    else:
-        shell_name = "bash"
+    shell_name = "zsh" if "zsh" in shell else "fish" if "fish" in shell else "bash"

    try:
        cli_dir = Path(__file__).parent
        completions_dir = cli_dir / "completions"
        home = Path.home()

-        installed = False
+        def install_completion(src_name: str, dest_dir: Path, dest_name: str) -> None:
+            """Install completion file from source to destination."""
+            src_file = completions_dir / src_name
+            if src_file.exists():
+                dest_dir.mkdir(parents=True, exist_ok=True)
+                shutil.copy2(src_file, dest_dir / dest_name)

        if shell_name == "bash":
-            # Use XDG standard location for bash-completion (auto-loaded)
-            src_file = completions_dir / "kt-completion.bash"
-            dest_dir = home / ".local" / "share" / "bash-completion" / "completions"
-            dest_file = dest_dir / "kt"
-
-            if src_file.exists():
-                dest_dir.mkdir(parents=True, exist_ok=True)
-                shutil.copy2(src_file, dest_file)
-                installed = True
-
+            install_completion(
+                "kt-completion.bash", home / ".local" / "share" / "bash-completion" / "completions", "kt"
+            )
        elif shell_name == "zsh":
-            src_file = completions_dir / "_kt"
-            dest_dir = home / ".zfunc"
-            dest_file = dest_dir / "_kt"
-
-            if src_file.exists():
-                dest_dir.mkdir(parents=True, exist_ok=True)
-                shutil.copy2(src_file, dest_file)
-                installed = True
-
+            install_completion("_kt", home / ".zfunc", "_kt")
        elif shell_name == "fish":
-            # Fish auto-loads from this directory
-            src_file = completions_dir / "kt.fish"
-            dest_dir = home / ".config" / "fish" / "completions"
-            dest_file = dest_dir / "kt.fish"
-
-            if src_file.exists():
-                dest_dir.mkdir(parents=True, exist_ok=True)
-                shutil.copy2(src_file, dest_file)
-                installed = True
+            install_completion("kt.fish", home / ".config" / "fish" / "completions", "kt.fish")

        # Mark as installed
        settings.set("general._completion_installed", True)
@ -403,6 +467,20 @@ def _apply_saved_language() -> None:
        set_lang(lang)


+app.command(name="version", help="Show version information")(version.version)
+app.command(name="chat", help="Interactive chat with running model")(chat.chat)
+app.command(name="quant", help="Quantize model weights")(quant.quant)
+app.command(name="edit", help="Edit model information")(model.edit_model)
+app.command(name="bench", help="Run full benchmark")(bench.bench)
+app.command(name="microbench", help="Run micro-benchmark")(bench.microbench)
+app.command(name="doctor", help="Diagnose environment issues")(doctor.doctor)
+
+# Register sub-apps
+app.add_typer(model.app, name="model", help="Manage models and storage paths")
+app.add_typer(config.app, name="config", help="Manage configuration")
+app.add_typer(sft.app, name="sft", help="Fine-tuning with LlamaFactory")
+
+
 def main():
    """Main entry point."""
    # Apply saved language setting first (before anything else for correct help display)
@ -414,7 +492,7 @@ def main():
    # Check for first run (but not for certain commands)
    # Skip first-run check for: --help, config commands, version
    args = sys.argv[1:] if len(sys.argv) > 1 else []
-    skip_commands = ["--help", "-h", "config", "version", "--version"]
+    skip_commands = ["--help", "-h", "config", "version", "--version", "--no-tui"]

    should_check_first_run = True
    for arg in args:
@ -422,12 +500,35 @@ def main():
            should_check_first_run = False
            break

+    # Handle no arguments case
+    if not args:
+        # Check if this is first run
+        from kt_kernel.cli.config.settings import DEFAULT_CONFIG_FILE, get_settings
+
+        is_first_run = False
+        if not DEFAULT_CONFIG_FILE.exists():
+            is_first_run = True
+        else:
+            settings = get_settings()
+            if not settings.get("general._initialized"):
+                is_first_run = True
+
+        if is_first_run:
+            # First run - start initialization
+            _install_shell_completion()
+            check_first_run()
+            return
+        else:
+            # Not first run - show help
+            app(["--help"])
+            return
+
    # Auto-install shell completion on first run
    if should_check_first_run:
        _install_shell_completion()

    # Check first run before running commands
-    if should_check_first_run and args:
+    if should_check_first_run:
        check_first_run()

    # Handle "run" command specially to pass through unknown options