kt-cli enhancement (#1834)

* [feat]: redesign kt run interactive configuration with i18n support - Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port) - Add configuration save/load system (~/.ktransformers/run_configs.yaml) - Add i18n support for kt chat (en/zh translations) - Add universal input validators with auto-retry and Chinese comma support - Add port availability checker with auto-suggestion - Add parser configuration (--tool-call-parser, --reasoning-parser) - Remove tuna command and clean up redundant files - Fix: variable reference bug in run.py, filter to show only MoE models * [feat]: unify model selection UI and enable shared experts fusion by default - Unify kt run model selection table with kt model list display * Add Total size, MoE Size, Repo, and SHA256 status columns * Use consistent formatting and styling * Improve user decision-making with more information - Enable --disable-shared-experts-fusion by default * Change default value from False to True * Users can still override with --enable-shared-experts-fusion * [feat]: improve kt chat with performance metrics and better CJK support - Add performance metrics display after each response * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token) * Accurate input/output token counts using model tokenizer * Fallback to estimation if tokenizer unavailable * Metrics shown in dim style (not prominent) - Fix Chinese character input issues * Replace Prompt.ask() with console.input() for better CJK support * Fixes backspace deletion showing half-characters - Suppress NumPy subnormal warnings * Filter "The value of the smallest subnormal" warnings * Cleaner CLI output on certain hardware environments * [fix]: correct TTFT measurement in kt chat - Move start_time initialization before API call - Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms - Now correctly measures time from request sent to first token received * [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案 * [docs]: 强调推荐使用 Kimi K2.5 作为核心模型，突出企业级推理能力 * [docs]: 添加 Clawdbot 飞书接入教程链接 * [feat]: improve CLI table display, model verification, and chat experience - Add sequence number (#) column to all model tables by default - Filter kt edit to show only MoE GPU models (exclude AMX) - Extend kt model verify to check *.json and *.py files in addition to weights - Fix re-verification bug where repaired files caused false failures - Suppress tokenizer debug output in kt chat token counting * [fix]: fix cpu cores. --------- Co-authored-by: skqliao <skqliao@gmail.com>
2026-04-28 20:00:06 +00:00 · 2026-02-04 16:44:54 +08:00 · 2026-02-04 16:44:54 +08:00 · 56cbd69ac4
commit 56cbd69ac4
parent 4f64665758
23 changed files with 10327 additions and 781 deletions
--- a/kt-kernel/python/cli/i18n.py
+++ b/kt-kernel/python/cli/i18n.py
@ -190,6 +190,70 @@ MESSAGES: dict[str, dict[str, str]] = {
        "quant_progress": "Quantizing...",
        "quant_complete": "Quantization complete!",
        "quant_input_not_found": "Input model not found at {path}",
+        "quant_cpu_threads": "CPU threads: {threads}",
+        "quant_numa_nodes": "NUMA nodes: {nodes}",
+        "quant_time_warning": "Quantization may take 30-60 minutes depending on model size.",
+        "quant_disk_analysis": "Disk Space Analysis:",
+        "quant_source_size": "Source model size:",
+        "quant_estimated_size": "Estimated output size:",
+        "quant_available_space": "Available space:",
+        "quant_insufficient_space": "WARNING: Insufficient disk space!",
+        "quant_required_space": "Required space (with 20% buffer):",
+        "quant_shortage": "Shortage:",
+        "quant_may_fail": "Quantization may fail or produce incomplete files.",
+        "quant_continue_anyway": "Continue anyway?",
+        "quant_settings": "Quantization Settings:",
+        "quant_registered": "Quantized model registered: {name}",
+        "quant_view_with": "View with:",
+        "quant_use_with": "Use with:",
+        "quant_register_failed": "Failed to auto-register model: {error}",
+        "quant_output_exists": "Output path already exists: {path}",
+        "quant_using_unique": "Using unique name: {path}",
+        # Interactive quant
+        "quant_interactive_title": "Interactive Quantization Configuration",
+        "quant_new_model_notice": "⚠ Note: Some newer models cannot be quantized yet (conversion script not adapted). Recommended to use the original precision for inference (no weight conversion needed).",
+        "quant_no_moe_models": "No MoE models found for quantization.",
+        "quant_only_moe": "Only MoE models (e.g., DeepSeek-V3) can be quantized to AMX format.",
+        "quant_add_models": "Add models with: {command}",
+        "quant_moe_available": "MoE Models Available for Quantization:",
+        "quant_select_model": "Select model to quantize",
+        "quant_invalid_choice": "Invalid choice",
+        "quant_step2_method": "Step 2: Quantization Method",
+        "quant_method_label": "Quantization Method:",
+        "quant_int4_desc": "INT4",
+        "quant_int8_desc": "INT8",
+        "quant_select_method": "Select quantization method",
+        "quant_input_type_label": "Input Weight Type:",
+        "quant_fp8_desc": "FP8 (for 8-bit float weights)",
+        "quant_fp16_desc": "FP16 (for 16-bit float weights)",
+        "quant_bf16_desc": "BF16 (for Brain Float 16 weights)",
+        "quant_select_input_type": "Select input type",
+        "quant_step3_cpu": "Step 3: CPU Configuration",
+        "quant_cpu_threads_prompt": "CPU Threads (1 to {max})",
+        "quant_numa_nodes_prompt": "NUMA Nodes (1 to {max})",
+        "quant_use_gpu_label": "Use GPU for conversion?",
+        "quant_gpu_speedup": "GPU can significantly speed up the quantization process",
+        "quant_enable_gpu": "Enable GPU acceleration?",
+        "quant_step4_output": "Step 4: Output Path",
+        "quant_default_path": "Default:",
+        "quant_use_default": "Use default output path?",
+        "quant_custom_path": "Enter custom output path",
+        "quant_output_exists_warn": "⚠ Output path already exists: {path}",
+        "quant_using_unique_name": "→ Using unique name: {path}",
+        "quant_config_summary": "Configuration Summary",
+        "quant_summary_model": "Model:",
+        "quant_summary_method": "Method:",
+        "quant_summary_input_type": "Input Type:",
+        "quant_summary_cpu_threads": "CPU Threads:",
+        "quant_summary_numa": "NUMA Nodes:",
+        "quant_summary_gpu": "Use GPU:",
+        "quant_summary_output": "Output Path:",
+        "quant_start_question": "Start quantization?",
+        "quant_cancelled": "Cancelled",
+        "quant_config_complete": "Configuration complete",
+        "quant_time_elapsed": "Time elapsed:",
+        "yes": "Yes",
+        "no": "No",
        # SFT command
        "sft_mode_train": "Training mode",
        "sft_mode_chat": "Chat mode",
@ -247,6 +311,113 @@ MESSAGES: dict[str, dict[str, str]] = {
        "chat_proxy_detected": "Proxy detected in environment",
        "chat_proxy_confirm": "Use proxy for connection?",
        "chat_proxy_disabled": "Proxy disabled for this session",
+        "chat_openai_required": "OpenAI Python SDK is required for chat functionality.",
+        "chat_install_hint": "Install it with:",
+        "chat_title": "KTransformers Chat",
+        "chat_server": "Server",
+        "chat_temperature": "Temperature",
+        "chat_max_tokens": "Max tokens",
+        "chat_help_hint": "Type '/help' for commands, '/quit' to exit",
+        "chat_connecting": "Connecting to server...",
+        "chat_no_models": "No models available on server",
+        "chat_model_not_found": "Model '{model}' not found. Available models: {available}",
+        "chat_connected": "Connected to model: {model}",
+        "chat_connect_failed": "Failed to connect to server: {error}",
+        "chat_server_not_running": "Make sure the model server is running:",
+        "chat_user_prompt": "You",
+        "chat_assistant_prompt": "Assistant",
+        "chat_generation_error": "Error generating response: {error}",
+        "chat_interrupted": "Chat interrupted. Goodbye!",
+        "chat_history_saved": "History saved to: {path}",
+        "chat_goodbye": "Goodbye!",
+        "chat_help_title": "Available Commands:",
+        "chat_help_content": "/help, /h         - Show this help message\n/quit, /exit, /q  - Exit chat\n/clear, /c        - Clear conversation history\n/history, /hist   - Show conversation history\n/info, /i         - Show current settings\n/retry, /r        - Regenerate last response",
+        "chat_history_cleared": "Conversation history cleared",
+        "chat_no_history": "No conversation history",
+        "chat_history_title": "History ({count} messages)",
+        "chat_info_title": "Current Settings:",
+        "chat_info_content": "Temperature: {temperature}\nMax tokens: {max_tokens}\nMessages: {messages}",
+        "chat_retrying": "Retrying last response...",
+        "chat_no_retry": "No previous response to retry",
+        "chat_unknown_command": "Unknown command: {command}",
+        "chat_unknown_hint": "Type /help for available commands",
+        # Run Interactive
+        "run_int_no_moe_models": "No MoE GPU models found.",
+        "run_int_add_models": "Add models with: kt model scan",
+        "run_int_list_all": "List all models: kt model list --all",
+        "run_int_step1_title": "Step 1: Select Model (GPU MoE Models)",
+        "run_int_select_model": "Select model",
+        "run_int_step2_title": "Step 2: Select Inference Method",
+        "run_int_method_raw": "RAW Precision (FP8/FP8_PERCHANNEL/BF16/RAWINT4)",
+        "run_int_method_amx": "AMX Quantization (INT4/INT8)",
+        "run_int_method_gguf": "GGUF (Llamafile)",
+        "run_int_method_saved": "Use Saved Configuration",
+        "run_int_select_method": "Select inference method",
+        "run_int_raw_precision": "RAW Precision:",
+        "run_int_select_precision": "Select precision",
+        "run_int_amx_method": "AMX Method:",
+        "run_int_select_amx": "Select AMX method",
+        "run_int_step3_title": "Step 3: NUMA and CPU Configuration",
+        "run_int_numa_nodes": "NUMA Nodes (1-{max})",
+        "run_int_cpu_threads": "CPU Threads per NUMA (1-{max})",
+        "run_int_amx_warning": "⚠ Warning: AMX INT4/INT8 requires compatible CPU. Check with: kt doctor",
+        "run_int_step4_title": "Step 4: GPU Experts Configuration",
+        "run_int_gpu_experts": "GPU Experts per Layer (0-{max})",
+        "run_int_gpu_experts_info": "Total experts: {total}, Activated per token: {active}",
+        "run_int_step5_title": "Step 5: KV Cache Configuration",
+        "run_int_kv_cache_size": "KV Cache Size (tokens)",
+        "run_int_chunk_prefill": "Enable Chunk Prefill?",
+        "run_int_chunk_size": "Chunk Prefill Size (tokens)",
+        "run_int_gpu_prefill_threshold": "GPU Prefill Threshold (tokens)",
+        "run_int_step6_title": "Step 6: GPU Selection and Tensor Parallelism",
+        "run_int_available_gpus": "Available GPUs:",
+        "run_int_gpu_id": "GPU {id}",
+        "run_int_vram_info": "{name} ({total:.1f}GB total, {free:.1f}GB free)",
+        "run_int_select_gpus": "Select GPU IDs (comma-separated)",
+        "run_int_invalid_gpu_range": "All GPU IDs must be between 0 and {max}",
+        "run_int_tp_size": "TP Size (must be power of 2: 1,2,4,8...)",
+        "run_int_tp_mismatch": "TP size must match number of selected GPUs ({count})",
+        "run_int_tp_not_power_of_2": "TP size must be a power of 2",
+        "run_int_mem_fraction": "Static Memory Fraction (0.0-1.0)",
+        "run_int_using_saved_mem": "Using saved memory fraction: {fraction}",
+        "run_int_step7_title": "Step 7: Parser Configuration (Optional)",
+        "run_int_tool_call_parser": "Tool Call Parser (press Enter to skip)",
+        "run_int_reasoning_parser": "Reasoning Parser (press Enter to skip)",
+        "run_int_step8_title": "Step 8: Host and Port Configuration",
+        "run_int_host": "Host",
+        "run_int_port": "Port",
+        "run_int_port_occupied": "⚠ Port {port} is already in use",
+        "run_int_port_suggestion": "Suggested available port: {port}",
+        "run_int_use_suggested": "Use suggested port?",
+        "run_int_saved_configs": "Saved Configurations:",
+        "run_int_config_name": "Configuration {num}",
+        "run_int_kt_method": "KT Method:",
+        "run_int_numa_nodes_label": "NUMA Nodes:",
+        "run_int_cpu_threads_label": "CPU Threads:",
+        "run_int_gpu_experts_label": "GPU Experts:",
+        "run_int_tp_size_label": "TP Size:",
+        "run_int_mem_fraction_label": "Memory Fraction:",
+        "run_int_server_label": "Server:",
+        "run_int_kv_cache_label": "KV Cache:",
+        "run_int_chunk_prefill_label": "Chunk Prefill:",
+        "run_int_gpu_prefill_label": "GPU Prefill Thr:",
+        "run_int_tool_parser_label": "Tool Call Parser:",
+        "run_int_reasoning_parser_label": "Reasoning Parser:",
+        "run_int_command_label": "Command:",
+        "run_int_select_config": "Select configuration",
+        "run_int_gpu_select_required": "Please select {tp} GPUs (TP size from saved config)",
+        "run_int_port_check_title": "Port Configuration",
+        "run_int_port_checking": "Checking port {port} availability...",
+        "run_int_port_available": "Port {port} is available",
+        "run_int_saved_config_title": "Saved Configuration",
+        "run_int_save_config_title": "Save Configuration",
+        "run_int_save_config_prompt": "Save this configuration for future use?",
+        "run_int_config_name_prompt": "Configuration name",
+        "run_int_config_name_default": "Config {timestamp}",
+        "run_int_config_saved": "Configuration saved: {name}",
+        "run_int_config_summary": "Configuration Complete",
+        "run_int_model_label": "Model:",
+        "run_int_selected_gpus_label": "Selected GPUs:",
        # Model command
        "model_supported_title": "KTransformers Supported Models",
        "model_column_model": "Model",
@ -282,6 +453,180 @@ MESSAGES: dict[str, dict[str, str]] = {
        "model_column_name": "Name",
        "model_column_hf_repo": "HuggingFace Repo",
        "model_column_aliases": "Aliases",
+        # Model management - new user registry system
+        "model_no_registered_models": "No models registered yet.",
+        "model_scan_hint": "Scan for models: kt model scan",
+        "model_add_hint": "Add a model: kt model add /path/to/model",
+        "model_registered_models_title": "Registered Models",
+        "model_column_format": "Format",
+        "model_column_repo": "Repository",
+        "model_column_sha256": "SHA256",
+        "model_non_moe_hidden_hint": "Detected {count} non-MoE models, use kt model list --all to show all",
+        "model_usage_title": "Common Operations:",
+        "model_usage_info": "View details:",
+        "model_usage_edit": "Edit model:",
+        "model_usage_verify": "Verify integrity:",
+        "model_usage_quant": "Quantize model:",
+        "model_usage_run": "Run model:",
+        "model_usage_scan": "Scan for models:",
+        "model_usage_add": "Add model:",
+        "model_usage_verbose": "View with file details:",
+        "model_no_storage_paths": "No storage paths configured.",
+        "model_add_path_hint": "Add a storage path with: kt config set model.storage_paths /path/to/models",
+        "model_scanning_paths": "Scanning configured storage paths...",
+        "model_scanning_progress": "Scanning: {path}",
+        "model_scan_warnings_title": "Warnings",
+        "model_scan_no_models_found": "No models found in configured paths.",
+        "model_scan_check_paths_hint": "Check your storage paths: kt config get model.storage_paths",
+        "model_scan_min_size_hint": "Folders must be ≥{size}GB to be detected as models.",
+        "model_scan_found_title": "Found {count} new model(s)",
+        "model_column_path": "Path",
+        "model_column_size": "Size",
+        "model_scan_auto_adding": "Auto-adding models...",
+        "model_added": "Added: {name}",
+        "model_add_failed": "Failed to add {name}: {error}",
+        "model_scan_complete": "Scan complete! Added {count} model(s).",
+        "model_scan_interactive_prompt": "Commands: edit <id> | del <id> | done",
+        "model_scan_cmd_edit": "Set custom name for model",
+        "model_scan_cmd_delete": "Skip this model",
+        "model_scan_cmd_done": "Finish and add models",
+        "model_scan_marked_skip": "Skipped model #{id}",
+        "model_scan_invalid_id": "Invalid model ID: {id}",
+        "model_scan_invalid_command": "Invalid command. Use: edit <id> | del <id> | done",
+        "model_scan_edit_model": "Edit model {id}",
+        "model_scan_edit_note": "You can change the model name before adding it to registry",
+        "model_scan_adding_models": "Adding {count} model(s)...",
+        "model_scan_next_steps": "Next Steps",
+        "model_scan_view_hint": "View registered models: kt model list",
+        "model_scan_edit_hint": "Edit model details: kt model edit <name>",
+        "model_scan_no_models_added": "No models were added.",
+        "model_add_path_not_exist": "Error: Path does not exist: {path}",
+        "model_add_not_directory": "Error: Path is not a directory: {path}",
+        "model_add_already_registered": "This path is already registered as: {name}",
+        "model_add_view_hint": "View with: kt model info {name}",
+        "model_add_scanning": "Scanning model files...",
+        "model_add_scan_failed": "Failed to scan model: {error}",
+        "model_add_no_model_files": "No model files found in {path}",
+        "model_add_supported_formats": "Supported: *.safetensors, *.gguf (folder ≥10GB)",
+        "model_add_detected": "Detected: {format} format, {size}, {count} file(s)",
+        "model_add_name_conflict": "Name '{name}' already exists.",
+        "model_add_prompt_name": "Enter a name for this model",
+        "model_add_name_exists": "Name already exists. Please choose another name:",
+        "model_add_configure_repo": "Configure repository information for SHA256 verification?",
+        "model_add_repo_type_prompt": "Select repository type:",
+        "model_add_choice": "Choice",
+        "model_add_repo_id_prompt": "Enter repository ID (e.g., deepseek-ai/DeepSeek-V3)",
+        "model_add_success": "Successfully added model: {name}",
+        "model_add_verify_hint": "Verify integrity: kt model verify {name}",
+        "model_add_edit_later_hint": "Edit details later: kt model edit {name}",
+        "model_add_failed_generic": "Failed to add model: {error}",
+        "model_edit_not_found": "Model '{name}' not found.",
+        "model_edit_list_hint": "List models: kt model list",
+        "model_edit_current_config": "Current Configuration",
+        "model_edit_what_to_edit": "What would you like to edit?",
+        "model_edit_option_name": "Edit name",
+        "model_edit_option_repo": "Configure repository info",
+        "model_edit_option_delete": "Delete this model",
+        "model_edit_option_cancel": "Cancel / Exit",
+        "model_edit_choice_prompt": "Select option",
+        "model_edit_new_name": "Enter new name",
+        "model_edit_name_conflict": "Name '{name}' already exists. Please choose another:",
+        "model_edit_name_updated": "Name updated: {old} → {new}",
+        "model_edit_repo_type_prompt": "Repository type (or enter to remove repo info):",
+        "model_edit_repo_remove": "Remove repository info",
+        "model_edit_repo_id_prompt": "Enter repository ID",
+        "model_edit_repo_removed": "Repository info removed",
+        "model_edit_repo_updated": "Repository configured: {repo_type} → {repo_id}",
+        "model_edit_delete_warning": "Delete model '{name}' from registry?",
+        "model_edit_delete_note": "Note: This only removes the registry entry. Model files in {path} will NOT be deleted.",
+        "model_edit_delete_confirm": "Confirm deletion?",
+        "model_edit_deleted": "Model '{name}' deleted from registry",
+        "model_edit_delete_cancelled": "Deletion cancelled",
+        "model_edit_cancelled": "Edit cancelled",
+        # Model edit - Interactive selection
+        "model_edit_select_title": "Select Model to Edit",
+        "model_edit_select_model": "Select model",
+        "model_edit_invalid_choice": "Invalid choice",
+        "model_edit_no_models": "No models found in registry.",
+        "model_edit_add_hint_scan": "Add models with:",
+        "model_edit_add_hint_add": "Or:",
+        # Model edit - Display
+        "model_edit_gpu_links": "GPU Links:",
+        # Model edit - Menu options
+        "model_edit_manage_gpu_links": "Manage GPU Links",
+        "model_edit_save_changes": "Save changes",
+        "model_edit_has_changes": "(has changes)",
+        "model_edit_no_changes": "(no changes)",
+        # Model edit - Pending changes messages
+        "model_edit_name_pending": "Name will be updated when you save changes.",
+        "model_edit_repo_remove_pending": "Repository info will be removed when you save changes.",
+        "model_edit_repo_update_pending": "Repository info will be updated when you save changes.",
+        # Model edit - GPU link management
+        "model_edit_gpu_links_title": "Manage GPU Links for {name}",
+        "model_edit_current_gpu_links": "Current GPU links:",
+        "model_edit_no_gpu_links": "No GPU links configured.",
+        "model_edit_gpu_options": "Options:",
+        "model_edit_gpu_add": "Add GPU link",
+        "model_edit_gpu_remove": "Remove GPU link",
+        "model_edit_gpu_clear": "Clear all GPU links",
+        "model_edit_gpu_back": "Back to main menu",
+        "model_edit_gpu_choose_option": "Choose option",
+        "model_edit_gpu_none_available": "No GPU models available to link.",
+        "model_edit_gpu_available_models": "Available GPU models:",
+        "model_edit_gpu_already_linked": "(already linked)",
+        "model_edit_gpu_enter_number": "Enter GPU model number to add",
+        "model_edit_gpu_link_pending": "GPU link will be added when you save changes: {name}",
+        "model_edit_gpu_already_exists": "This GPU model is already linked.",
+        "model_edit_gpu_invalid_choice": "Invalid choice.",
+        "model_edit_gpu_invalid_input": "Invalid input.",
+        "model_edit_gpu_none_to_remove": "No GPU links to remove.",
+        "model_edit_gpu_choose_to_remove": "Choose GPU link to remove:",
+        "model_edit_gpu_enter_to_remove": "Enter number to remove",
+        "model_edit_gpu_remove_pending": "GPU link will be removed when you save changes: {name}",
+        "model_edit_gpu_none_to_clear": "No GPU links to clear.",
+        "model_edit_gpu_clear_confirm": "Remove all GPU links?",
+        "model_edit_gpu_clear_pending": "All GPU links will be removed when you save changes.",
+        "model_edit_cancelled_short": "Cancelled.",
+        # Model edit - Save operation
+        "model_edit_no_changes_to_save": "No changes to save.",
+        "model_edit_saving": "Saving changes...",
+        "model_edit_saved": "Changes saved successfully!",
+        "model_edit_updated_config": "Updated Configuration:",
+        "model_edit_repo_changed_warning": "⚠ Repository information has changed.",
+        "model_edit_verify_hint": "Run [cyan]kt model verify[/cyan] to verify model integrity with SHA256 checksums.",
+        "model_edit_discard_changes": "Discard unsaved changes?",
+        "model_info_not_found": "Model '{name}' not found.",
+        "model_info_list_hint": "List all models: kt model list",
+        "model_remove_not_found": "Model '{name}' not found.",
+        "model_remove_list_hint": "List models: kt model list",
+        "model_remove_warning": "Remove model '{name}' from registry?",
+        "model_remove_note": "Note: This only removes the registry entry. Model files will NOT be deleted from {path}.",
+        "model_remove_confirm": "Confirm removal?",
+        "model_remove_cancelled": "Removal cancelled",
+        "model_removed": "Model '{name}' removed from registry",
+        "model_remove_failed": "Failed to remove model: {error}",
+        "model_refresh_checking": "Checking model paths...",
+        "model_refresh_all_valid": "All models are valid! ({count} model(s) checked)",
+        "model_refresh_total": "Total models: {total}",
+        "model_refresh_missing_found": "Found {count} missing model(s)",
+        "model_refresh_suggestions": "Suggested Actions",
+        "model_refresh_remove_hint": "Remove from registry: kt model remove <name>",
+        "model_refresh_rescan_hint": "Re-scan for models: kt model scan",
+        "model_verify_not_found": "Model '{name}' not found.",
+        "model_verify_list_hint": "List models: kt model list",
+        "model_verify_no_repo": "Model '{name}' has no repository information configured.",
+        "model_verify_config_hint": "Configure repository: kt model edit {name}",
+        "model_verify_path_missing": "Model path does not exist: {path}",
+        "model_verify_starting": "Verifying model integrity...",
+        "model_verify_progress": "Repository: {repo_type} → {repo_id}",
+        "model_verify_not_implemented": "SHA256 verification not implemented yet",
+        "model_verify_future_note": "This feature will fetch official SHA256 hashes from {repo_type} and compare with local files.",
+        "model_verify_passed": "Verification passed! All files match official hashes.",
+        "model_verify_failed": "Verification failed! {count} file(s) have hash mismatches.",
+        "model_verify_all_no_repos": "No models have repository information configured.",
+        "model_verify_all_config_hint": "Configure repos using: kt model edit <name>",
+        "model_verify_all_found": "Found {count} model(s) with repository info",
+        "model_verify_all_manual_hint": "Verify specific model: kt model verify <name>",
        # Coming soon
        "feature_coming_soon": "This feature is coming soon...",
    },
@ -465,6 +810,70 @@ MESSAGES: dict[str, dict[str, str]] = {
        "quant_progress": "正在量化...",
        "quant_complete": "量化完成！",
        "quant_input_not_found": "未找到输入模型: {path}",
+        "quant_cpu_threads": "CPU 线程数: {threads}",
+        "quant_numa_nodes": "NUMA 节点数: {nodes}",
+        "quant_time_warning": "量化可能需要 30-60 分钟，具体取决于模型大小。",
+        "quant_disk_analysis": "磁盘空间分析：",
+        "quant_source_size": "源模型大小：",
+        "quant_estimated_size": "预估输出大小：",
+        "quant_available_space": "可用空间：",
+        "quant_insufficient_space": "警告：磁盘空间不足！",
+        "quant_required_space": "所需空间（含20%缓冲）：",
+        "quant_shortage": "不足：",
+        "quant_may_fail": "量化可能失败或生成不完整的文件。",
+        "quant_continue_anyway": "仍然继续？",
+        "quant_settings": "量化设置：",
+        "quant_registered": "量化模型已注册：{name}",
+        "quant_view_with": "查看：",
+        "quant_use_with": "使用：",
+        "quant_register_failed": "自动注册模型失败：{error}",
+        "quant_output_exists": "输出路径已存在：{path}",
+        "quant_using_unique": "使用唯一名称：{path}",
+        # Interactive quant
+        "quant_interactive_title": "交互式量化配置",
+        "quant_new_model_notice": "⚠ 注意：部分新模型暂时无法量化（转换脚本未适配），推荐使用原精度进行推理（无需转换权重）。",
+        "quant_no_moe_models": "未找到可量化的 MoE 模型。",
+        "quant_only_moe": "只有 MoE 模型（如 DeepSeek-V3）可以被量化为 AMX 格式。",
+        "quant_add_models": "添加模型：{command}",
+        "quant_moe_available": "可量化的 MoE 模型：",
+        "quant_select_model": "选择要量化的模型",
+        "quant_invalid_choice": "无效选择",
+        "quant_step2_method": "第 2 步：量化方法",
+        "quant_method_label": "量化方法：",
+        "quant_int4_desc": "INT4",
+        "quant_int8_desc": "INT8",
+        "quant_select_method": "选择量化方法",
+        "quant_input_type_label": "输入权重类型：",
+        "quant_fp8_desc": "FP8（适用于 8 位浮点权重）",
+        "quant_fp16_desc": "FP16（适用于 16 位浮点权重）",
+        "quant_bf16_desc": "BF16（适用于 Brain Float 16 权重）",
+        "quant_select_input_type": "选择输入类型",
+        "quant_step3_cpu": "第 3 步：CPU 配置",
+        "quant_cpu_threads_prompt": "CPU 线程数（1 到 {max}）",
+        "quant_numa_nodes_prompt": "NUMA 节点数（1 到 {max}）",
+        "quant_use_gpu_label": "是否使用 GPU 进行转换？",
+        "quant_gpu_speedup": "GPU 可以显著加快量化速度",
+        "quant_enable_gpu": "启用 GPU 加速？",
+        "quant_step4_output": "第 4 步：输出路径",
+        "quant_default_path": "默认：",
+        "quant_use_default": "使用默认输出路径？",
+        "quant_custom_path": "输入自定义输出路径",
+        "quant_output_exists_warn": "⚠ 输出路径已存在：{path}",
+        "quant_using_unique_name": "→ 使用唯一名称：{path}",
+        "quant_config_summary": "配置摘要",
+        "quant_summary_model": "模型：",
+        "quant_summary_method": "方法：",
+        "quant_summary_input_type": "输入类型：",
+        "quant_summary_cpu_threads": "CPU 线程数：",
+        "quant_summary_numa": "NUMA 节点数：",
+        "quant_summary_gpu": "使用 GPU：",
+        "quant_summary_output": "输出路径：",
+        "quant_start_question": "开始量化？",
+        "quant_cancelled": "已取消",
+        "quant_config_complete": "配置完成",
+        "quant_time_elapsed": "耗时：",
+        "yes": "是",
+        "no": "否",
        # SFT command
        "sft_mode_train": "训练模式",
        "sft_mode_chat": "聊天模式",
@ -522,6 +931,113 @@ MESSAGES: dict[str, dict[str, str]] = {
        "chat_proxy_detected": "检测到环境中存在代理设置",
        "chat_proxy_confirm": "是否使用代理连接？",
        "chat_proxy_disabled": "已在本次会话中禁用代理",
+        "chat_openai_required": "聊天功能需要 OpenAI Python SDK。",
+        "chat_install_hint": "安装命令：",
+        "chat_title": "KTransformers 对话",
+        "chat_server": "服务器",
+        "chat_temperature": "温度",
+        "chat_max_tokens": "最大 tokens",
+        "chat_help_hint": "输入 '/help' 查看命令，'/quit' 退出",
+        "chat_connecting": "正在连接服务器...",
+        "chat_no_models": "服务器上没有可用模型",
+        "chat_model_not_found": "未找到模型 '{model}'。可用模型：{available}",
+        "chat_connected": "已连接到模型：{model}",
+        "chat_connect_failed": "连接服务器失败：{error}",
+        "chat_server_not_running": "请确保模型服务器正在运行：",
+        "chat_user_prompt": "用户",
+        "chat_assistant_prompt": "助手",
+        "chat_generation_error": "生成回复时出错：{error}",
+        "chat_interrupted": "对话已中断。再见！",
+        "chat_history_saved": "历史记录已保存到：{path}",
+        "chat_goodbye": "再见！",
+        "chat_help_title": "可用命令：",
+        "chat_help_content": "/help, /h         - 显示此帮助信息\n/quit, /exit, /q  - 退出聊天\n/clear, /c        - 清除对话历史\n/history, /hist   - 显示对话历史\n/info, /i         - 显示当前设置\n/retry, /r        - 重新生成上一个回复",
+        "chat_history_cleared": "对话历史已清除",
+        "chat_no_history": "暂无对话历史",
+        "chat_history_title": "历史记录（{count} 条消息）",
+        "chat_info_title": "当前设置：",
+        "chat_info_content": "温度：{temperature}\n最大 tokens：{max_tokens}\n消息数：{messages}",
+        "chat_retrying": "正在重试上一个回复...",
+        "chat_no_retry": "没有可重试的回复",
+        "chat_unknown_command": "未知命令：{command}",
+        "chat_unknown_hint": "输入 /help 查看可用命令",
+        # Run Interactive
+        "run_int_no_moe_models": "未找到 MoE GPU 模型。",
+        "run_int_add_models": "添加模型：kt model scan",
+        "run_int_list_all": "列出所有模型：kt model list --all",
+        "run_int_step1_title": "第 1 步：选择模型（GPU MoE 模型）",
+        "run_int_select_model": "选择模型",
+        "run_int_step2_title": "第 2 步：选择推理方法",
+        "run_int_method_raw": "RAW 精度（FP8/FP8_PERCHANNEL/BF16/RAWINT4）",
+        "run_int_method_amx": "AMX 量化（INT4/INT8）",
+        "run_int_method_gguf": "GGUF（Llamafile）",
+        "run_int_method_saved": "使用已保存的配置",
+        "run_int_select_method": "选择推理方法",
+        "run_int_raw_precision": "RAW 精度：",
+        "run_int_select_precision": "选择精度",
+        "run_int_amx_method": "AMX 方法：",
+        "run_int_select_amx": "选择 AMX 方法",
+        "run_int_step3_title": "第 3 步：NUMA 和 CPU 配置",
+        "run_int_numa_nodes": "NUMA 节点数（1-{max}）",
+        "run_int_cpu_threads": "每个 NUMA 的 CPU 线程数（1-{max}）",
+        "run_int_amx_warning": "⚠ 警告：AMX INT4/INT8 需要兼容的 CPU。检查命令：kt doctor",
+        "run_int_step4_title": "第 4 步：GPU 专家配置",
+        "run_int_gpu_experts": "每层 GPU 专家数（0-{max}）",
+        "run_int_gpu_experts_info": "总专家数：{total}，每 token 激活：{active}",
+        "run_int_step5_title": "第 5 步：KV Cache 配置",
+        "run_int_kv_cache_size": "KV Cache 大小（tokens）",
+        "run_int_chunk_prefill": "启用分块预填充？",
+        "run_int_chunk_size": "分块预填充大小（tokens）",
+        "run_int_gpu_prefill_threshold": "GPU 预填充阈值（tokens）",
+        "run_int_step6_title": "第 6 步：GPU 选择和张量并行",
+        "run_int_available_gpus": "可用 GPU：",
+        "run_int_gpu_id": "GPU {id}",
+        "run_int_vram_info": "{name}（总计 {total:.1f}GB，空闲 {free:.1f}GB）",
+        "run_int_select_gpus": "选择 GPU ID（逗号分隔）",
+        "run_int_invalid_gpu_range": "所有 GPU ID 必须在 0 到 {max} 之间",
+        "run_int_tp_size": "TP 大小（必须是 2 的幂：1,2,4,8...）",
+        "run_int_tp_mismatch": "TP 大小必须与选择的 GPU 数量匹配（{count}）",
+        "run_int_tp_not_power_of_2": "TP 大小必须是 2 的幂",
+        "run_int_mem_fraction": "静态内存占用比例（0.0-1.0）",
+        "run_int_using_saved_mem": "使用已保存的内存占用比例：{fraction}",
+        "run_int_step7_title": "第 7 步：解析器配置（可选）",
+        "run_int_tool_call_parser": "工具调用解析器（按回车跳过）",
+        "run_int_reasoning_parser": "推理解析器（按回车跳过）",
+        "run_int_step8_title": "第 8 步：主机和端口配置",
+        "run_int_host": "主机",
+        "run_int_port": "端口",
+        "run_int_port_occupied": "⚠ 端口 {port} 已被占用",
+        "run_int_port_suggestion": "建议使用可用端口：{port}",
+        "run_int_use_suggested": "使用建议的端口？",
+        "run_int_saved_configs": "已保存的配置：",
+        "run_int_config_name": "配置 {num}",
+        "run_int_kt_method": "KT 方法：",
+        "run_int_numa_nodes_label": "NUMA 节点：",
+        "run_int_cpu_threads_label": "CPU 线程：",
+        "run_int_gpu_experts_label": "GPU 专家：",
+        "run_int_tp_size_label": "TP 大小：",
+        "run_int_mem_fraction_label": "内存占用比例：",
+        "run_int_server_label": "服务器：",
+        "run_int_kv_cache_label": "KV Cache：",
+        "run_int_chunk_prefill_label": "分块预填充：",
+        "run_int_gpu_prefill_label": "GPU 预填充阈值：",
+        "run_int_tool_parser_label": "工具调用解析器：",
+        "run_int_reasoning_parser_label": "推理解析器：",
+        "run_int_command_label": "命令：",
+        "run_int_select_config": "选择配置",
+        "run_int_gpu_select_required": "请选择 {tp} 个 GPU（来自已保存配置的 TP 大小）",
+        "run_int_port_check_title": "端口配置",
+        "run_int_port_checking": "正在检查端口 {port} 可用性...",
+        "run_int_port_available": "端口 {port} 可用",
+        "run_int_saved_config_title": "已保存的配置",
+        "run_int_save_config_title": "保存配置",
+        "run_int_save_config_prompt": "保存此配置以供将来使用？",
+        "run_int_config_name_prompt": "配置名称",
+        "run_int_config_name_default": "配置 {timestamp}",
+        "run_int_config_saved": "配置已保存：{name}",
+        "run_int_config_summary": "配置完成",
+        "run_int_model_label": "模型：",
+        "run_int_selected_gpus_label": "已选择的 GPU：",
        # Model command
        "model_supported_title": "KTransformers 支持的模型",
        "model_column_model": "模型",
@ -557,6 +1073,180 @@ MESSAGES: dict[str, dict[str, str]] = {
        "model_column_name": "名称",
        "model_column_hf_repo": "HuggingFace 仓库",
        "model_column_aliases": "别名",
+        # Model management - new user registry system
+        "model_no_registered_models": "尚未注册任何模型。",
+        "model_scan_hint": "扫描模型: kt model scan",
+        "model_add_hint": "添加模型: kt model add /path/to/model",
+        "model_registered_models_title": "已注册的模型",
+        "model_column_format": "格式",
+        "model_column_repo": "仓库",
+        "model_column_sha256": "SHA256",
+        "model_non_moe_hidden_hint": "检测到 {count} 个非MoE模型，使用 kt model list --all 展示全部",
+        "model_usage_title": "常用操作:",
+        "model_usage_info": "查看详情:",
+        "model_usage_edit": "编辑模型:",
+        "model_usage_verify": "校验权重:",
+        "model_usage_quant": "量化模型:",
+        "model_usage_run": "运行模型:",
+        "model_usage_scan": "扫描模型:",
+        "model_usage_add": "添加模型:",
+        "model_usage_verbose": "查看包含文件详情:",
+        "model_no_storage_paths": "未配置存储路径。",
+        "model_add_path_hint": "添加存储路径: kt config set model.storage_paths /path/to/models",
+        "model_scanning_paths": "正在扫描配置的存储路径...",
+        "model_scanning_progress": "扫描中: {path}",
+        "model_scan_warnings_title": "警告",
+        "model_scan_no_models_found": "在配置的路径中未找到模型。",
+        "model_scan_check_paths_hint": "检查存储路径: kt config get model.storage_paths",
+        "model_scan_min_size_hint": "文件夹必须 ≥{size}GB 才能被识别为模型。",
+        "model_scan_found_title": "发现 {count} 个新模型",
+        "model_column_path": "路径",
+        "model_column_size": "大小",
+        "model_scan_auto_adding": "正在自动添加模型...",
+        "model_added": "已添加: {name}",
+        "model_add_failed": "添加 {name} 失败: {error}",
+        "model_scan_complete": "扫描完成！已添加 {count} 个模型。",
+        "model_scan_interactive_prompt": "命令: edit <id> | del <id> | done",
+        "model_scan_cmd_edit": "设置模型自定义名称和仓库",
+        "model_scan_cmd_delete": "跳过此模型",
+        "model_scan_cmd_done": "完成并添加模型",
+        "model_scan_marked_skip": "已跳过模型 #{id}",
+        "model_scan_invalid_id": "无效的模型 ID: {id}",
+        "model_scan_invalid_command": "无效命令。使用: edit <id> | del <id> | done",
+        "model_scan_edit_model": "编辑模型 {id}",
+        "model_scan_edit_note": "您可以在添加到注册表前更改模型名称和配置仓库信息",
+        "model_scan_adding_models": "正在添加 {count} 个模型...",
+        "model_scan_next_steps": "后续步骤",
+        "model_scan_view_hint": "查看已注册模型: kt model list",
+        "model_scan_edit_hint": "编辑模型详情: kt model edit <name>",
+        "model_scan_no_models_added": "未添加任何模型。",
+        "model_add_path_not_exist": "错误: 路径不存在: {path}",
+        "model_add_not_directory": "错误: 路径不是目录: {path}",
+        "model_add_already_registered": "此路径已注册为: {name}",
+        "model_add_view_hint": "查看: kt model info {name}",
+        "model_add_scanning": "正在扫描模型文件...",
+        "model_add_scan_failed": "扫描模型失败: {error}",
+        "model_add_no_model_files": "在 {path} 中未找到模型文件",
+        "model_add_supported_formats": "支持: *.safetensors, *.gguf (文件夹 ≥10GB)",
+        "model_add_detected": "检测到: {format} 格式, {size}, {count} 个文件",
+        "model_add_name_conflict": "名称 '{name}' 已存在。",
+        "model_add_prompt_name": "为此模型输入名称",
+        "model_add_name_exists": "名称已存在。请选择其他名称:",
+        "model_add_configure_repo": "配置仓库信息以进行 SHA256 验证?",
+        "model_add_repo_type_prompt": "选择仓库类型:",
+        "model_add_choice": "选择",
+        "model_add_repo_id_prompt": "输入仓库 ID (例如: deepseek-ai/DeepSeek-V3)",
+        "model_add_success": "成功添加模型: {name}",
+        "model_add_verify_hint": "验证完整性: kt model verify {name}",
+        "model_add_edit_later_hint": "稍后编辑详情: kt model edit {name}",
+        "model_add_failed_generic": "添加模型失败: {error}",
+        "model_edit_not_found": "未找到模型 '{name}'。",
+        "model_edit_list_hint": "列出模型: kt model list",
+        "model_edit_current_config": "当前配置",
+        "model_edit_what_to_edit": "您想编辑什么?",
+        "model_edit_option_name": "编辑名称",
+        "model_edit_option_repo": "配置仓库信息",
+        "model_edit_option_delete": "删除此模型",
+        "model_edit_option_cancel": "取消 / 退出",
+        "model_edit_choice_prompt": "选择选项",
+        "model_edit_new_name": "输入新名称",
+        "model_edit_name_conflict": "名称 '{name}' 已存在。请选择其他名称:",
+        "model_edit_name_updated": "名称已更新: {old} → {new}",
+        "model_edit_repo_type_prompt": "仓库类型 (或按回车删除仓库信息):",
+        "model_edit_repo_remove": "删除仓库信息",
+        "model_edit_repo_id_prompt": "输入仓库 ID",
+        "model_edit_repo_removed": "仓库信息已删除",
+        "model_edit_repo_updated": "仓库已配置: {repo_type} → {repo_id}",
+        "model_edit_delete_warning": "从注册表中删除模型 '{name}'?",
+        "model_edit_delete_note": "注意: 这只会删除注册表条目。{path} 中的模型文件不会被删除。",
+        "model_edit_delete_confirm": "确认删除?",
+        "model_edit_deleted": "模型 '{name}' 已从注册表中删除",
+        "model_edit_delete_cancelled": "删除已取消",
+        "model_edit_cancelled": "编辑已取消",
+        # Model edit - Interactive selection
+        "model_edit_select_title": "选择要编辑的模型",
+        "model_edit_select_model": "选择模型",
+        "model_edit_invalid_choice": "无效选择",
+        "model_edit_no_models": "注册表中未找到模型。",
+        "model_edit_add_hint_scan": "添加模型:",
+        "model_edit_add_hint_add": "或:",
+        # Model edit - Display
+        "model_edit_gpu_links": "GPU 链接:",
+        # Model edit - Menu options
+        "model_edit_manage_gpu_links": "管理 GPU 链接",
+        "model_edit_save_changes": "保存更改",
+        "model_edit_has_changes": "(有更改)",
+        "model_edit_no_changes": "(无更改)",
+        # Model edit - Pending changes messages
+        "model_edit_name_pending": "名称将在保存更改时更新。",
+        "model_edit_repo_remove_pending": "仓库信息将在保存更改时删除。",
+        "model_edit_repo_update_pending": "仓库信息将在保存更改时更新。",
+        # Model edit - GPU link management
+        "model_edit_gpu_links_title": "管理 {name} 的 GPU 链接",
+        "model_edit_current_gpu_links": "当前 GPU 链接:",
+        "model_edit_no_gpu_links": "未配置 GPU 链接。",
+        "model_edit_gpu_options": "选项:",
+        "model_edit_gpu_add": "添加 GPU 链接",
+        "model_edit_gpu_remove": "删除 GPU 链接",
+        "model_edit_gpu_clear": "清除所有 GPU 链接",
+        "model_edit_gpu_back": "返回主菜单",
+        "model_edit_gpu_choose_option": "选择选项",
+        "model_edit_gpu_none_available": "没有可链接的 GPU 模型。",
+        "model_edit_gpu_available_models": "可用的 GPU 模型:",
+        "model_edit_gpu_already_linked": "(已链接)",
+        "model_edit_gpu_enter_number": "输入要添加的 GPU 模型编号",
+        "model_edit_gpu_link_pending": "GPU 链接将在保存更改时添加: {name}",
+        "model_edit_gpu_already_exists": "此 GPU 模型已链接。",
+        "model_edit_gpu_invalid_choice": "无效选择。",
+        "model_edit_gpu_invalid_input": "无效输入。",
+        "model_edit_gpu_none_to_remove": "没有可删除的 GPU 链接。",
+        "model_edit_gpu_choose_to_remove": "选择要删除的 GPU 链接:",
+        "model_edit_gpu_enter_to_remove": "输入要删除的编号",
+        "model_edit_gpu_remove_pending": "GPU 链接将在保存更改时删除: {name}",
+        "model_edit_gpu_none_to_clear": "没有可清除的 GPU 链接。",
+        "model_edit_gpu_clear_confirm": "删除所有 GPU 链接?",
+        "model_edit_gpu_clear_pending": "所有 GPU 链接将在保存更改时删除。",
+        "model_edit_cancelled_short": "已取消。",
+        # Model edit - Save operation
+        "model_edit_no_changes_to_save": "没有更改可保存。",
+        "model_edit_saving": "正在保存更改...",
+        "model_edit_saved": "更改保存成功!",
+        "model_edit_updated_config": "更新后的配置:",
+        "model_edit_repo_changed_warning": "⚠ 仓库信息已更改。",
+        "model_edit_verify_hint": "运行 [cyan]kt model verify[/cyan] 以使用 SHA256 校验和验证模型完整性。",
+        "model_edit_discard_changes": "放弃未保存的更改?",
+        "model_info_not_found": "未找到模型 '{name}'。",
+        "model_info_list_hint": "列出所有模型: kt model list",
+        "model_remove_not_found": "未找到模型 '{name}'。",
+        "model_remove_list_hint": "列出模型: kt model list",
+        "model_remove_warning": "从注册表中删除模型 '{name}'?",
+        "model_remove_note": "注意: 这只会删除注册表条目。模型文件不会从 {path} 中删除。",
+        "model_remove_confirm": "确认删除?",
+        "model_remove_cancelled": "删除已取消",
+        "model_removed": "模型 '{name}' 已从注册表中删除",
+        "model_remove_failed": "删除模型失败: {error}",
+        "model_refresh_checking": "正在检查模型路径...",
+        "model_refresh_all_valid": "所有模型都有效! (已检查 {count} 个模型)",
+        "model_refresh_total": "总模型数: {total}",
+        "model_refresh_missing_found": "发现 {count} 个缺失的模型",
+        "model_refresh_suggestions": "建议操作",
+        "model_refresh_remove_hint": "从注册表中删除: kt model remove <name>",
+        "model_refresh_rescan_hint": "重新扫描模型: kt model scan",
+        "model_verify_not_found": "未找到模型 '{name}'。",
+        "model_verify_list_hint": "列出模型: kt model list",
+        "model_verify_no_repo": "模型 '{name}' 未配置仓库信息。",
+        "model_verify_config_hint": "配置仓库: kt model edit {name}",
+        "model_verify_path_missing": "模型路径不存在: {path}",
+        "model_verify_starting": "正在验证模型完整性...",
+        "model_verify_progress": "仓库: {repo_type} → {repo_id}",
+        "model_verify_not_implemented": "SHA256 验证尚未实现",
+        "model_verify_future_note": "此功能将从 {repo_type} 获取官方 SHA256 哈希值并与本地文件进行比较。",
+        "model_verify_passed": "验证通过！所有文件都与官方哈希匹配。",
+        "model_verify_failed": "验证失败！{count} 个文件的哈希不匹配。",
+        "model_verify_all_no_repos": "没有模型配置了仓库信息。",
+        "model_verify_all_config_hint": "配置仓库使用: kt model edit <name>",
+        "model_verify_all_found": "发现 {count} 个配置了仓库信息的模型",
+        "model_verify_all_manual_hint": "验证特定模型: kt model verify <name>",
        # Coming soon
        "feature_coming_soon": "此功能即将推出...",
    },