mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-29 12:19:50 +00:00
kt-cli enhancement (#1834)
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
* [feat]: redesign kt run interactive configuration with i18n support - Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port) - Add configuration save/load system (~/.ktransformers/run_configs.yaml) - Add i18n support for kt chat (en/zh translations) - Add universal input validators with auto-retry and Chinese comma support - Add port availability checker with auto-suggestion - Add parser configuration (--tool-call-parser, --reasoning-parser) - Remove tuna command and clean up redundant files - Fix: variable reference bug in run.py, filter to show only MoE models * [feat]: unify model selection UI and enable shared experts fusion by default - Unify kt run model selection table with kt model list display * Add Total size, MoE Size, Repo, and SHA256 status columns * Use consistent formatting and styling * Improve user decision-making with more information - Enable --disable-shared-experts-fusion by default * Change default value from False to True * Users can still override with --enable-shared-experts-fusion * [feat]: improve kt chat with performance metrics and better CJK support - Add performance metrics display after each response * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token) * Accurate input/output token counts using model tokenizer * Fallback to estimation if tokenizer unavailable * Metrics shown in dim style (not prominent) - Fix Chinese character input issues * Replace Prompt.ask() with console.input() for better CJK support * Fixes backspace deletion showing half-characters - Suppress NumPy subnormal warnings * Filter "The value of the smallest subnormal" warnings * Cleaner CLI output on certain hardware environments * [fix]: correct TTFT measurement in kt chat - Move start_time initialization before API call - Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms - Now correctly measures time from request sent to first token received * [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案 * [docs]: 强调推荐使用 Kimi K2.5 作为核心模型,突出企业级推理能力 * [docs]: 添加 Clawdbot 飞书接入教程链接 * [feat]: improve CLI table display, model verification, and chat experience - Add sequence number (#) column to all model tables by default - Filter kt edit to show only MoE GPU models (exclude AMX) - Extend kt model verify to check *.json and *.py files in addition to weights - Fix re-verification bug where repaired files caused false failures - Suppress tokenizer debug output in kt chat token counting * [fix]: fix cpu cores. --------- Co-authored-by: skqliao <skqliao@gmail.com>
This commit is contained in:
parent
4f64665758
commit
56cbd69ac4
23 changed files with 10327 additions and 781 deletions
146
kt-kernel/python/cli/utils/download_helper.py
Normal file
146
kt-kernel/python/cli/utils/download_helper.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""Helper functions for interactive model download."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
import fnmatch
|
||||
|
||||
|
||||
def list_remote_files_hf(repo_id: str, use_mirror: bool = False) -> List[Dict[str, any]]:
|
||||
"""
|
||||
List files in a HuggingFace repository.
|
||||
|
||||
Returns:
|
||||
List of dicts with keys: 'path', 'size' (in bytes)
|
||||
"""
|
||||
from huggingface_hub import HfApi
|
||||
import os
|
||||
|
||||
# Set mirror if needed
|
||||
original_endpoint = os.environ.get("HF_ENDPOINT")
|
||||
if use_mirror and not original_endpoint:
|
||||
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
|
||||
|
||||
try:
|
||||
api = HfApi()
|
||||
files_info = api.list_repo_tree(repo_id=repo_id, recursive=True)
|
||||
|
||||
result = []
|
||||
for item in files_info:
|
||||
# Skip directories
|
||||
if hasattr(item, "type") and item.type == "directory":
|
||||
continue
|
||||
|
||||
# Get file info
|
||||
file_path = item.path if hasattr(item, "path") else str(item)
|
||||
file_size = item.size if hasattr(item, "size") else 0
|
||||
|
||||
result.append({"path": file_path, "size": file_size})
|
||||
|
||||
return result
|
||||
finally:
|
||||
# Restore original endpoint
|
||||
if use_mirror and not original_endpoint:
|
||||
os.environ.pop("HF_ENDPOINT", None)
|
||||
elif original_endpoint:
|
||||
os.environ["HF_ENDPOINT"] = original_endpoint
|
||||
|
||||
|
||||
def list_remote_files_ms(repo_id: str) -> List[Dict[str, any]]:
|
||||
"""
|
||||
List files in a ModelScope repository.
|
||||
|
||||
Returns:
|
||||
List of dicts with keys: 'path', 'size' (in bytes)
|
||||
"""
|
||||
from modelscope.hub.api import HubApi
|
||||
|
||||
api = HubApi()
|
||||
files_info = api.get_model_files(model_id=repo_id, recursive=True)
|
||||
|
||||
result = []
|
||||
for file_info in files_info:
|
||||
file_path = file_info.get("Name", file_info.get("Path", ""))
|
||||
file_size = file_info.get("Size", 0)
|
||||
|
||||
result.append({"path": file_path, "size": file_size})
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def filter_files_by_pattern(files: List[Dict[str, any]], pattern: str) -> List[Dict[str, any]]:
|
||||
"""Filter files by glob pattern."""
|
||||
if pattern == "*":
|
||||
return files
|
||||
|
||||
filtered = []
|
||||
for file in files:
|
||||
# Check if filename matches pattern
|
||||
filename = Path(file["path"]).name
|
||||
full_path = file["path"]
|
||||
|
||||
if fnmatch.fnmatch(filename, pattern) or fnmatch.fnmatch(full_path, pattern):
|
||||
filtered.append(file)
|
||||
|
||||
return filtered
|
||||
|
||||
|
||||
def calculate_total_size(files: List[Dict[str, any]]) -> int:
|
||||
"""Calculate total size of files in bytes."""
|
||||
return sum(f["size"] for f in files)
|
||||
|
||||
|
||||
def format_file_list_table(files: List[Dict[str, any]], max_display: int = 10):
|
||||
"""Format file list as a table for display."""
|
||||
from rich.table import Table
|
||||
from kt_kernel.cli.utils.model_scanner import format_size
|
||||
|
||||
table = Table(show_header=True, header_style="bold")
|
||||
table.add_column("File", style="cyan", overflow="fold")
|
||||
table.add_column("Size", justify="right")
|
||||
|
||||
# Show first max_display files
|
||||
for file in files[:max_display]:
|
||||
table.add_row(file["path"], format_size(file["size"]))
|
||||
|
||||
if len(files) > max_display:
|
||||
table.add_row(f"... and {len(files) - max_display} more files", "[dim]...[/dim]")
|
||||
|
||||
return table
|
||||
|
||||
|
||||
def verify_repo_exists(repo_id: str, repo_type: str, use_mirror: bool = False) -> Tuple[bool, str]:
|
||||
"""
|
||||
Verify if a repository exists.
|
||||
|
||||
Returns:
|
||||
(exists: bool, message: str)
|
||||
"""
|
||||
try:
|
||||
if repo_type == "huggingface":
|
||||
import os
|
||||
|
||||
original_endpoint = os.environ.get("HF_ENDPOINT")
|
||||
if use_mirror and not original_endpoint:
|
||||
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
|
||||
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
try:
|
||||
api = HfApi()
|
||||
api.repo_info(repo_id=repo_id, repo_type="model")
|
||||
return True, "Repository found"
|
||||
finally:
|
||||
if use_mirror and not original_endpoint:
|
||||
os.environ.pop("HF_ENDPOINT", None)
|
||||
elif original_endpoint:
|
||||
os.environ["HF_ENDPOINT"] = original_endpoint
|
||||
|
||||
else: # modelscope
|
||||
from modelscope.hub.api import HubApi
|
||||
|
||||
api = HubApi()
|
||||
api.get_model(model_id=repo_id)
|
||||
return True, "Repository found"
|
||||
|
||||
except Exception as e:
|
||||
return False, f"Repository not found: {str(e)}"
|
||||
Loading…
Add table
Add a link
Reference in a new issue