mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-28 20:00:06 +00:00
kt-cli enhancement (#1834)
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
* [feat]: redesign kt run interactive configuration with i18n support - Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port) - Add configuration save/load system (~/.ktransformers/run_configs.yaml) - Add i18n support for kt chat (en/zh translations) - Add universal input validators with auto-retry and Chinese comma support - Add port availability checker with auto-suggestion - Add parser configuration (--tool-call-parser, --reasoning-parser) - Remove tuna command and clean up redundant files - Fix: variable reference bug in run.py, filter to show only MoE models * [feat]: unify model selection UI and enable shared experts fusion by default - Unify kt run model selection table with kt model list display * Add Total size, MoE Size, Repo, and SHA256 status columns * Use consistent formatting and styling * Improve user decision-making with more information - Enable --disable-shared-experts-fusion by default * Change default value from False to True * Users can still override with --enable-shared-experts-fusion * [feat]: improve kt chat with performance metrics and better CJK support - Add performance metrics display after each response * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token) * Accurate input/output token counts using model tokenizer * Fallback to estimation if tokenizer unavailable * Metrics shown in dim style (not prominent) - Fix Chinese character input issues * Replace Prompt.ask() with console.input() for better CJK support * Fixes backspace deletion showing half-characters - Suppress NumPy subnormal warnings * Filter "The value of the smallest subnormal" warnings * Cleaner CLI output on certain hardware environments * [fix]: correct TTFT measurement in kt chat - Move start_time initialization before API call - Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms - Now correctly measures time from request sent to first token received * [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案 * [docs]: 强调推荐使用 Kimi K2.5 作为核心模型,突出企业级推理能力 * [docs]: 添加 Clawdbot 飞书接入教程链接 * [feat]: improve CLI table display, model verification, and chat experience - Add sequence number (#) column to all model tables by default - Filter kt edit to show only MoE GPU models (exclude AMX) - Extend kt model verify to check *.json and *.py files in addition to weights - Fix re-verification bug where repaired files caused false failures - Suppress tokenizer debug output in kt chat token counting * [fix]: fix cpu cores. --------- Co-authored-by: skqliao <skqliao@gmail.com>
This commit is contained in:
parent
4f64665758
commit
56cbd69ac4
23 changed files with 10327 additions and 781 deletions
|
|
@ -5,6 +5,10 @@ KTransformers CLI - A unified command-line interface for KTransformers.
|
|||
"""
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
# Suppress numpy subnormal warnings
|
||||
warnings.filterwarnings("ignore", message="The value of the smallest subnormal")
|
||||
|
||||
import typer
|
||||
|
||||
|
|
@ -28,6 +32,7 @@ def _get_help(key: str) -> str:
|
|||
"run": {"en": "Start model inference server", "zh": "启动模型推理服务器"},
|
||||
"chat": {"en": "Interactive chat with running model", "zh": "与运行中的模型进行交互式聊天"},
|
||||
"quant": {"en": "Quantize model weights", "zh": "量化模型权重"},
|
||||
"edit": {"en": "Edit model information", "zh": "编辑模型信息"},
|
||||
"bench": {"en": "Run full benchmark", "zh": "运行完整基准测试"},
|
||||
"microbench": {"en": "Run micro-benchmark", "zh": "运行微基准测试"},
|
||||
"doctor": {"en": "Diagnose environment issues", "zh": "诊断环境问题"},
|
||||
|
|
@ -43,7 +48,7 @@ def _get_help(key: str) -> str:
|
|||
app = typer.Typer(
|
||||
name="kt",
|
||||
help="KTransformers CLI - A unified command-line interface for KTransformers.",
|
||||
no_args_is_help=True,
|
||||
no_args_is_help=False, # Handle no-args case manually to support first-run setup
|
||||
add_completion=False, # Use static completion scripts instead of dynamic completion
|
||||
rich_markup_mode="rich",
|
||||
)
|
||||
|
|
@ -66,20 +71,7 @@ def _update_help_texts() -> None:
|
|||
group_info.help = _get_help(group_info.name)
|
||||
|
||||
|
||||
# Register commands
|
||||
app.command(name="version", help="Show version information")(version.version)
|
||||
# Run command is handled specially in main() to allow extra args
|
||||
# (not registered here to avoid typer's argument parsing)
|
||||
app.command(name="chat", help="Interactive chat with running model")(chat.chat)
|
||||
app.command(name="quant", help="Quantize model weights")(quant.quant)
|
||||
app.command(name="bench", help="Run full benchmark")(bench.bench)
|
||||
app.command(name="microbench", help="Run micro-benchmark")(bench.microbench)
|
||||
app.command(name="doctor", help="Diagnose environment issues")(doctor.doctor)
|
||||
|
||||
# Register sub-apps
|
||||
app.add_typer(model.app, name="model", help="Manage models and storage paths")
|
||||
app.add_typer(config.app, name="config", help="Manage configuration")
|
||||
app.add_typer(sft.app, name="sft", help="Fine-tuning with LlamaFactory")
|
||||
# Commands are registered later after tui_command is defined
|
||||
|
||||
|
||||
def check_first_run() -> None:
|
||||
|
|
@ -116,7 +108,7 @@ def _show_first_run_setup(settings) -> None:
|
|||
from rich.spinner import Spinner
|
||||
from rich.live import Live
|
||||
|
||||
from kt_kernel.cli.utils.environment import scan_storage_locations, format_size_gb, scan_models_in_location
|
||||
from kt_kernel.cli.utils.environment import scan_storage_locations, format_size_gb
|
||||
|
||||
console = Console()
|
||||
|
||||
|
|
@ -140,15 +132,8 @@ def _show_first_run_setup(settings) -> None:
|
|||
console.print(" [cyan][2][/cyan] 中文 (Chinese)")
|
||||
console.print()
|
||||
|
||||
while True:
|
||||
choice = Prompt.ask("Enter choice / 输入选择", choices=["1", "2"], default="1")
|
||||
|
||||
if choice == "1":
|
||||
lang = "en"
|
||||
break
|
||||
elif choice == "2":
|
||||
lang = "zh"
|
||||
break
|
||||
choice = Prompt.ask("Enter choice / 输入选择", choices=["1", "2"], default="1")
|
||||
lang = "en" if choice == "1" else "zh"
|
||||
|
||||
# Save language setting
|
||||
settings.set("general.language", lang)
|
||||
|
|
@ -161,6 +146,131 @@ def _show_first_run_setup(settings) -> None:
|
|||
else:
|
||||
console.print("[green]✓[/green] Language set to English")
|
||||
|
||||
# Model discovery section
|
||||
console.print()
|
||||
if lang == "zh":
|
||||
console.print("[bold]发现模型权重[/bold]")
|
||||
console.print()
|
||||
console.print("[dim]扫描系统中已有的模型权重文件,以便快速添加到模型列表。[/dim]")
|
||||
console.print()
|
||||
console.print(" [cyan][1][/cyan] 全局扫描 (自动扫描所有非系统路径)")
|
||||
console.print(" [cyan][2][/cyan] 手动指定路径 (可添加多个)")
|
||||
console.print(" [cyan][3][/cyan] 跳过 (稍后手动添加)")
|
||||
console.print()
|
||||
scan_choice = Prompt.ask("选择扫描方式", choices=["1", "2", "3"], default="1")
|
||||
else:
|
||||
console.print("[bold]Discover Model Weights[/bold]")
|
||||
console.print()
|
||||
console.print("[dim]Scan existing model weights on your system to quickly add them to the model list.[/dim]")
|
||||
console.print()
|
||||
console.print(" [cyan][1][/cyan] Global scan (auto-scan all non-system paths)")
|
||||
console.print(" [cyan][2][/cyan] Manual paths (add multiple paths)")
|
||||
console.print(" [cyan][3][/cyan] Skip (add manually later)")
|
||||
console.print()
|
||||
scan_choice = Prompt.ask("Select scan method", choices=["1", "2", "3"], default="1")
|
||||
|
||||
if scan_choice == "1":
|
||||
# Global scan
|
||||
from kt_kernel.cli.utils.model_discovery import discover_and_register_global, format_discovery_summary
|
||||
|
||||
console.print()
|
||||
try:
|
||||
total_found, new_found, registered = discover_and_register_global(
|
||||
min_size_gb=2.0, max_depth=6, show_progress=True, lang=lang
|
||||
)
|
||||
|
||||
format_discovery_summary(
|
||||
total_found=total_found,
|
||||
new_found=new_found,
|
||||
registered=registered,
|
||||
lang=lang,
|
||||
show_models=True,
|
||||
max_show=10,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"[yellow]Warning: Scan failed - {e}[/yellow]")
|
||||
|
||||
elif scan_choice == "2":
|
||||
# Manual path specification
|
||||
from kt_kernel.cli.utils.model_discovery import discover_and_register_path
|
||||
import os
|
||||
|
||||
discovered_paths = set() # Track paths discovered in this session
|
||||
total_registered = []
|
||||
|
||||
while True:
|
||||
console.print()
|
||||
if lang == "zh":
|
||||
path = Prompt.ask("输入要扫描的路径 (例如: /mnt/data/models)")
|
||||
else:
|
||||
path = Prompt.ask("Enter path to scan (e.g., /mnt/data/models)")
|
||||
|
||||
# Expand and validate path
|
||||
path = os.path.expanduser(path)
|
||||
|
||||
if not os.path.exists(path):
|
||||
if lang == "zh":
|
||||
console.print(f"[yellow]警告: 路径不存在: {path}[/yellow]")
|
||||
else:
|
||||
console.print(f"[yellow]Warning: Path does not exist: {path}[/yellow]")
|
||||
continue
|
||||
|
||||
if not os.path.isdir(path):
|
||||
if lang == "zh":
|
||||
console.print(f"[yellow]警告: 不是一个目录: {path}[/yellow]")
|
||||
else:
|
||||
console.print(f"[yellow]Warning: Not a directory: {path}[/yellow]")
|
||||
continue
|
||||
|
||||
# Scan this path
|
||||
console.print()
|
||||
try:
|
||||
total_found, new_found, registered = discover_and_register_path(
|
||||
path=path, min_size_gb=2.0, existing_paths=discovered_paths, show_progress=True, lang=lang
|
||||
)
|
||||
|
||||
# Update discovered paths
|
||||
for model in registered:
|
||||
discovered_paths.add(model.path)
|
||||
total_registered.extend(registered)
|
||||
|
||||
console.print()
|
||||
if lang == "zh":
|
||||
console.print(f"[green]✓[/green] 在此路径找到 {total_found} 个模型,其中 {new_found} 个为新模型")
|
||||
else:
|
||||
console.print(f"[green]✓[/green] Found {total_found} models in this path, {new_found} are new")
|
||||
|
||||
if new_found > 0:
|
||||
for model in registered[:5]:
|
||||
console.print(f" • {model.name} ({model.format})")
|
||||
|
||||
if len(registered) > 5:
|
||||
if lang == "zh":
|
||||
console.print(f" [dim]... 还有 {len(registered) - 5} 个新模型[/dim]")
|
||||
else:
|
||||
console.print(f" [dim]... and {len(registered) - 5} more new models[/dim]")
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"[red]Error scanning path: {e}[/red]")
|
||||
|
||||
# Ask if continue
|
||||
console.print()
|
||||
if lang == "zh":
|
||||
continue_scan = Confirm.ask("是否继续添加其他路径?", default=False)
|
||||
else:
|
||||
continue_scan = Confirm.ask("Continue adding more paths?", default=False)
|
||||
|
||||
if not continue_scan:
|
||||
break
|
||||
|
||||
if total_registered:
|
||||
console.print()
|
||||
if lang == "zh":
|
||||
console.print(f"[green]✓[/green] 总共发现 {len(total_registered)} 个新模型")
|
||||
else:
|
||||
console.print(f"[green]✓[/green] Total {len(total_registered)} new models discovered")
|
||||
|
||||
# Model storage path selection
|
||||
console.print()
|
||||
console.print(f"[bold]{t('setup_model_path_title')}[/bold]")
|
||||
|
|
@ -174,16 +284,7 @@ def _show_first_run_setup(settings) -> None:
|
|||
console.print()
|
||||
|
||||
if locations:
|
||||
# Scan for models in each location
|
||||
console.print(f"[dim]{t('setup_scanning_models')}[/dim]")
|
||||
location_models: dict[str, list] = {}
|
||||
for loc in locations[:5]:
|
||||
models = scan_models_in_location(loc, max_depth=2)
|
||||
if models:
|
||||
location_models[loc.path] = models
|
||||
console.print()
|
||||
|
||||
# Show options
|
||||
# Show storage location options
|
||||
for i, loc in enumerate(locations[:5], 1): # Show top 5 options
|
||||
available = format_size_gb(loc.available_gb)
|
||||
total = format_size_gb(loc.total_gb)
|
||||
|
|
@ -194,22 +295,8 @@ def _show_first_run_setup(settings) -> None:
|
|||
else:
|
||||
option_str = t("setup_disk_option", path=loc.path, available=available, total=total)
|
||||
|
||||
# Add model count if any
|
||||
if loc.path in location_models:
|
||||
model_count = len(location_models[loc.path])
|
||||
option_str += f" [green]✓ {t('setup_location_has_models', count=model_count)}[/green]"
|
||||
|
||||
console.print(f" [cyan][{i}][/cyan] {option_str}")
|
||||
|
||||
# Show first few models found in this location
|
||||
if loc.path in location_models:
|
||||
for model in location_models[loc.path][:3]: # Show up to 3 models
|
||||
size_str = format_size_gb(model.size_gb)
|
||||
console.print(f" [dim]• {model.name} ({size_str})[/dim]")
|
||||
if len(location_models[loc.path]) > 3:
|
||||
remaining = len(location_models[loc.path]) - 3
|
||||
console.print(f" [dim] ... +{remaining} more[/dim]")
|
||||
|
||||
# Custom path option
|
||||
custom_idx = min(len(locations), 5) + 1
|
||||
console.print(f" [cyan][{custom_idx}][/cyan] {t('setup_custom_path')}")
|
||||
|
|
@ -323,51 +410,28 @@ def _install_shell_completion() -> None:
|
|||
|
||||
# Detect current shell
|
||||
shell = os.environ.get("SHELL", "")
|
||||
if "zsh" in shell:
|
||||
shell_name = "zsh"
|
||||
elif "fish" in shell:
|
||||
shell_name = "fish"
|
||||
else:
|
||||
shell_name = "bash"
|
||||
shell_name = "zsh" if "zsh" in shell else "fish" if "fish" in shell else "bash"
|
||||
|
||||
try:
|
||||
cli_dir = Path(__file__).parent
|
||||
completions_dir = cli_dir / "completions"
|
||||
home = Path.home()
|
||||
|
||||
installed = False
|
||||
def install_completion(src_name: str, dest_dir: Path, dest_name: str) -> None:
|
||||
"""Install completion file from source to destination."""
|
||||
src_file = completions_dir / src_name
|
||||
if src_file.exists():
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src_file, dest_dir / dest_name)
|
||||
|
||||
if shell_name == "bash":
|
||||
# Use XDG standard location for bash-completion (auto-loaded)
|
||||
src_file = completions_dir / "kt-completion.bash"
|
||||
dest_dir = home / ".local" / "share" / "bash-completion" / "completions"
|
||||
dest_file = dest_dir / "kt"
|
||||
|
||||
if src_file.exists():
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src_file, dest_file)
|
||||
installed = True
|
||||
|
||||
install_completion(
|
||||
"kt-completion.bash", home / ".local" / "share" / "bash-completion" / "completions", "kt"
|
||||
)
|
||||
elif shell_name == "zsh":
|
||||
src_file = completions_dir / "_kt"
|
||||
dest_dir = home / ".zfunc"
|
||||
dest_file = dest_dir / "_kt"
|
||||
|
||||
if src_file.exists():
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src_file, dest_file)
|
||||
installed = True
|
||||
|
||||
install_completion("_kt", home / ".zfunc", "_kt")
|
||||
elif shell_name == "fish":
|
||||
# Fish auto-loads from this directory
|
||||
src_file = completions_dir / "kt.fish"
|
||||
dest_dir = home / ".config" / "fish" / "completions"
|
||||
dest_file = dest_dir / "kt.fish"
|
||||
|
||||
if src_file.exists():
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src_file, dest_file)
|
||||
installed = True
|
||||
install_completion("kt.fish", home / ".config" / "fish" / "completions", "kt.fish")
|
||||
|
||||
# Mark as installed
|
||||
settings.set("general._completion_installed", True)
|
||||
|
|
@ -403,6 +467,20 @@ def _apply_saved_language() -> None:
|
|||
set_lang(lang)
|
||||
|
||||
|
||||
app.command(name="version", help="Show version information")(version.version)
|
||||
app.command(name="chat", help="Interactive chat with running model")(chat.chat)
|
||||
app.command(name="quant", help="Quantize model weights")(quant.quant)
|
||||
app.command(name="edit", help="Edit model information")(model.edit_model)
|
||||
app.command(name="bench", help="Run full benchmark")(bench.bench)
|
||||
app.command(name="microbench", help="Run micro-benchmark")(bench.microbench)
|
||||
app.command(name="doctor", help="Diagnose environment issues")(doctor.doctor)
|
||||
|
||||
# Register sub-apps
|
||||
app.add_typer(model.app, name="model", help="Manage models and storage paths")
|
||||
app.add_typer(config.app, name="config", help="Manage configuration")
|
||||
app.add_typer(sft.app, name="sft", help="Fine-tuning with LlamaFactory")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
# Apply saved language setting first (before anything else for correct help display)
|
||||
|
|
@ -414,7 +492,7 @@ def main():
|
|||
# Check for first run (but not for certain commands)
|
||||
# Skip first-run check for: --help, config commands, version
|
||||
args = sys.argv[1:] if len(sys.argv) > 1 else []
|
||||
skip_commands = ["--help", "-h", "config", "version", "--version"]
|
||||
skip_commands = ["--help", "-h", "config", "version", "--version", "--no-tui"]
|
||||
|
||||
should_check_first_run = True
|
||||
for arg in args:
|
||||
|
|
@ -422,12 +500,35 @@ def main():
|
|||
should_check_first_run = False
|
||||
break
|
||||
|
||||
# Handle no arguments case
|
||||
if not args:
|
||||
# Check if this is first run
|
||||
from kt_kernel.cli.config.settings import DEFAULT_CONFIG_FILE, get_settings
|
||||
|
||||
is_first_run = False
|
||||
if not DEFAULT_CONFIG_FILE.exists():
|
||||
is_first_run = True
|
||||
else:
|
||||
settings = get_settings()
|
||||
if not settings.get("general._initialized"):
|
||||
is_first_run = True
|
||||
|
||||
if is_first_run:
|
||||
# First run - start initialization
|
||||
_install_shell_completion()
|
||||
check_first_run()
|
||||
return
|
||||
else:
|
||||
# Not first run - show help
|
||||
app(["--help"])
|
||||
return
|
||||
|
||||
# Auto-install shell completion on first run
|
||||
if should_check_first_run:
|
||||
_install_shell_completion()
|
||||
|
||||
# Check first run before running commands
|
||||
if should_check_first_run and args:
|
||||
if should_check_first_run:
|
||||
check_first_run()
|
||||
|
||||
# Handle "run" command specially to pass through unknown options
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue