mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-28 20:00:06 +00:00
kt-cli enhancement (#1834)
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
* [feat]: redesign kt run interactive configuration with i18n support - Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port) - Add configuration save/load system (~/.ktransformers/run_configs.yaml) - Add i18n support for kt chat (en/zh translations) - Add universal input validators with auto-retry and Chinese comma support - Add port availability checker with auto-suggestion - Add parser configuration (--tool-call-parser, --reasoning-parser) - Remove tuna command and clean up redundant files - Fix: variable reference bug in run.py, filter to show only MoE models * [feat]: unify model selection UI and enable shared experts fusion by default - Unify kt run model selection table with kt model list display * Add Total size, MoE Size, Repo, and SHA256 status columns * Use consistent formatting and styling * Improve user decision-making with more information - Enable --disable-shared-experts-fusion by default * Change default value from False to True * Users can still override with --enable-shared-experts-fusion * [feat]: improve kt chat with performance metrics and better CJK support - Add performance metrics display after each response * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token) * Accurate input/output token counts using model tokenizer * Fallback to estimation if tokenizer unavailable * Metrics shown in dim style (not prominent) - Fix Chinese character input issues * Replace Prompt.ask() with console.input() for better CJK support * Fixes backspace deletion showing half-characters - Suppress NumPy subnormal warnings * Filter "The value of the smallest subnormal" warnings * Cleaner CLI output on certain hardware environments * [fix]: correct TTFT measurement in kt chat - Move start_time initialization before API call - Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms - Now correctly measures time from request sent to first token received * [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案 * [docs]: 强调推荐使用 Kimi K2.5 作为核心模型,突出企业级推理能力 * [docs]: 添加 Clawdbot 飞书接入教程链接 * [feat]: improve CLI table display, model verification, and chat experience - Add sequence number (#) column to all model tables by default - Filter kt edit to show only MoE GPU models (exclude AMX) - Extend kt model verify to check *.json and *.py files in addition to weights - Fix re-verification bug where repaired files caused false failures - Suppress tokenizer debug output in kt chat token counting * [fix]: fix cpu cores. --------- Co-authored-by: skqliao <skqliao@gmail.com>
This commit is contained in:
parent
4f64665758
commit
56cbd69ac4
23 changed files with 10327 additions and 781 deletions
250
kt-kernel/python/cli/utils/model_discovery.py
Normal file
250
kt-kernel/python/cli/utils/model_discovery.py
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
"""
|
||||
Model Discovery Utilities
|
||||
|
||||
Shared functions for discovering and registering new models across different commands.
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
from pathlib import Path
|
||||
from rich.console import Console
|
||||
|
||||
from kt_kernel.cli.utils.model_scanner import (
|
||||
discover_models,
|
||||
scan_directory_for_models,
|
||||
ScannedModel,
|
||||
)
|
||||
from kt_kernel.cli.utils.user_model_registry import UserModelRegistry, UserModel
|
||||
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def discover_and_register_global(
|
||||
min_size_gb: float = 2.0, max_depth: int = 6, show_progress: bool = True, lang: str = "en"
|
||||
) -> Tuple[int, int, List[UserModel]]:
|
||||
"""
|
||||
Perform global model discovery and register new models.
|
||||
|
||||
Args:
|
||||
min_size_gb: Minimum model size in GB
|
||||
max_depth: Maximum search depth
|
||||
show_progress: Whether to show progress messages
|
||||
lang: Language for messages ("en" or "zh")
|
||||
|
||||
Returns:
|
||||
Tuple of (total_found, new_found, registered_models)
|
||||
"""
|
||||
registry = UserModelRegistry()
|
||||
|
||||
if show_progress:
|
||||
if lang == "zh":
|
||||
console.print("[dim]正在扫描系统中的模型权重,这可能需要30-60秒...[/dim]")
|
||||
else:
|
||||
console.print("[dim]Scanning system for model weights, this may take 30-60 seconds...[/dim]")
|
||||
|
||||
# Global scan
|
||||
all_models = discover_models(mount_points=None, min_size_gb=min_size_gb, max_depth=max_depth)
|
||||
|
||||
# Filter out existing models
|
||||
new_models = []
|
||||
for model in all_models:
|
||||
if not registry.find_by_path(model.path):
|
||||
new_models.append(model)
|
||||
|
||||
# Register new models
|
||||
registered = []
|
||||
for model in new_models:
|
||||
user_model = _create_and_register_model(registry, model)
|
||||
if user_model:
|
||||
registered.append(user_model)
|
||||
|
||||
return len(all_models), len(new_models), registered
|
||||
|
||||
|
||||
def discover_and_register_path(
|
||||
path: str,
|
||||
min_size_gb: float = 2.0,
|
||||
existing_paths: Optional[set] = None,
|
||||
show_progress: bool = True,
|
||||
lang: str = "en",
|
||||
) -> Tuple[int, int, List[UserModel]]:
|
||||
"""
|
||||
Discover models in a specific path and register new ones.
|
||||
|
||||
Args:
|
||||
path: Directory path to scan
|
||||
min_size_gb: Minimum model file size in GB
|
||||
existing_paths: Set of already discovered paths in this session (optional)
|
||||
show_progress: Whether to show progress messages
|
||||
lang: Language for messages ("en" or "zh")
|
||||
|
||||
Returns:
|
||||
Tuple of (total_found, new_found, registered_models)
|
||||
"""
|
||||
registry = UserModelRegistry()
|
||||
|
||||
if show_progress:
|
||||
if lang == "zh":
|
||||
console.print(f"[dim]正在扫描 {path}...[/dim]")
|
||||
else:
|
||||
console.print(f"[dim]Scanning {path}...[/dim]")
|
||||
|
||||
# Scan directory
|
||||
model_info = scan_directory_for_models(path, min_file_size_gb=min_size_gb)
|
||||
|
||||
if not model_info:
|
||||
return 0, 0, []
|
||||
|
||||
# Convert to ScannedModel and filter
|
||||
new_models = []
|
||||
for dir_path, (format_type, size_bytes, file_count, files) in model_info.items():
|
||||
# Check if already in registry
|
||||
if registry.find_by_path(dir_path):
|
||||
continue
|
||||
|
||||
# Check if already discovered in this session
|
||||
if existing_paths and dir_path in existing_paths:
|
||||
continue
|
||||
|
||||
model = ScannedModel(
|
||||
path=dir_path, format=format_type, size_bytes=size_bytes, file_count=file_count, files=files
|
||||
)
|
||||
new_models.append(model)
|
||||
|
||||
# Register new models
|
||||
registered = []
|
||||
for model in new_models:
|
||||
user_model = _create_and_register_model(registry, model)
|
||||
if user_model:
|
||||
registered.append(user_model)
|
||||
|
||||
return len(model_info), len(new_models), registered
|
||||
|
||||
|
||||
def _create_and_register_model(registry: UserModelRegistry, scanned_model: ScannedModel) -> Optional[UserModel]:
|
||||
"""
|
||||
Create a UserModel from ScannedModel and register it.
|
||||
|
||||
Handles name conflicts by suggesting a unique name (e.g., model-2, model-3).
|
||||
Automatically detects repo_id from README.md YAML frontmatter.
|
||||
Automatically detects and caches MoE information for safetensors models.
|
||||
|
||||
Args:
|
||||
registry: UserModelRegistry instance
|
||||
scanned_model: ScannedModel to register
|
||||
|
||||
Returns:
|
||||
Registered UserModel or None if failed
|
||||
"""
|
||||
# Use suggest_name to get a unique name (adds -2, -3, etc. if needed)
|
||||
unique_name = registry.suggest_name(scanned_model.folder_name)
|
||||
|
||||
user_model = UserModel(name=unique_name, path=scanned_model.path, format=scanned_model.format)
|
||||
|
||||
# Auto-detect repo_id from README.md (only YAML frontmatter)
|
||||
try:
|
||||
from kt_kernel.cli.utils.repo_detector import detect_repo_for_model
|
||||
|
||||
repo_info = detect_repo_for_model(scanned_model.path)
|
||||
if repo_info:
|
||||
repo_id, repo_type = repo_info
|
||||
user_model.repo_id = repo_id
|
||||
user_model.repo_type = repo_type
|
||||
except Exception:
|
||||
# Silently continue if detection fails
|
||||
pass
|
||||
|
||||
# Auto-detect MoE information for safetensors models
|
||||
if scanned_model.format == "safetensors":
|
||||
try:
|
||||
from kt_kernel.cli.utils.analyze_moe_model import analyze_moe_model
|
||||
|
||||
moe_result = analyze_moe_model(scanned_model.path, use_cache=True)
|
||||
if moe_result and moe_result.get("is_moe"):
|
||||
user_model.is_moe = True
|
||||
user_model.moe_num_experts = moe_result.get("num_experts")
|
||||
user_model.moe_num_experts_per_tok = moe_result.get("num_experts_per_tok")
|
||||
else:
|
||||
user_model.is_moe = False
|
||||
except Exception:
|
||||
# Silently continue if MoE detection fails
|
||||
# is_moe will remain None
|
||||
pass
|
||||
|
||||
try:
|
||||
registry.add_model(user_model)
|
||||
return user_model
|
||||
except Exception:
|
||||
# Should not happen since we used suggest_name, but handle gracefully
|
||||
return None
|
||||
|
||||
|
||||
def format_discovery_summary(
|
||||
total_found: int,
|
||||
new_found: int,
|
||||
registered: List[UserModel],
|
||||
lang: str = "en",
|
||||
show_models: bool = True,
|
||||
max_show: int = 10,
|
||||
) -> None:
|
||||
"""
|
||||
Print formatted discovery summary.
|
||||
|
||||
Args:
|
||||
total_found: Total models found
|
||||
new_found: New models found
|
||||
registered: List of registered UserModel objects
|
||||
lang: Language ("en" or "zh")
|
||||
show_models: Whether to show model list
|
||||
max_show: Maximum models to show
|
||||
"""
|
||||
console.print()
|
||||
|
||||
if new_found == 0:
|
||||
if total_found > 0:
|
||||
if lang == "zh":
|
||||
console.print(f"[green]✓[/green] 扫描完成:找到 {total_found} 个模型,所有模型均已在列表中")
|
||||
else:
|
||||
console.print(f"[green]✓[/green] Scan complete: found {total_found} models, all already in the list")
|
||||
else:
|
||||
if lang == "zh":
|
||||
console.print("[yellow]未找到模型[/yellow]")
|
||||
else:
|
||||
console.print("[yellow]No models found[/yellow]")
|
||||
return
|
||||
|
||||
# Show summary
|
||||
if lang == "zh":
|
||||
console.print(f"[green]✓[/green] 扫描完成:找到 {total_found} 个模型,其中 {new_found} 个为新模型")
|
||||
else:
|
||||
console.print(f"[green]✓[/green] Scan complete: found {total_found} models, {new_found} are new")
|
||||
|
||||
# Show registered count
|
||||
if len(registered) > 0:
|
||||
if lang == "zh":
|
||||
console.print(f"[green]✓[/green] 成功添加 {len(registered)} 个新模型到列表")
|
||||
else:
|
||||
console.print(f"[green]✓[/green] Successfully added {len(registered)} new models to list")
|
||||
|
||||
# Show model list
|
||||
if show_models and registered:
|
||||
console.print()
|
||||
if lang == "zh":
|
||||
console.print(f"[dim]新发现的模型(前{max_show}个):[/dim]")
|
||||
else:
|
||||
console.print(f"[dim]Newly discovered models (first {max_show}):[/dim]")
|
||||
|
||||
for i, model in enumerate(registered[:max_show], 1):
|
||||
# Get size from registry or estimate
|
||||
size_str = "?.? GB"
|
||||
# Try to find the ScannedModel to get size
|
||||
# For now just show name and path
|
||||
console.print(f" {i}. {model.name} ({model.format})")
|
||||
console.print(f" [dim]{model.path}[/dim]")
|
||||
|
||||
if len(registered) > max_show:
|
||||
remaining = len(registered) - max_show
|
||||
if lang == "zh":
|
||||
console.print(f" [dim]... 还有 {remaining} 个新模型[/dim]")
|
||||
else:
|
||||
console.print(f" [dim]... and {remaining} more new models[/dim]")
|
||||
Loading…
Add table
Add a link
Reference in a new issue