mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-29 12:19:50 +00:00
kt-cli enhancement (#1834)
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
* [feat]: redesign kt run interactive configuration with i18n support - Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port) - Add configuration save/load system (~/.ktransformers/run_configs.yaml) - Add i18n support for kt chat (en/zh translations) - Add universal input validators with auto-retry and Chinese comma support - Add port availability checker with auto-suggestion - Add parser configuration (--tool-call-parser, --reasoning-parser) - Remove tuna command and clean up redundant files - Fix: variable reference bug in run.py, filter to show only MoE models * [feat]: unify model selection UI and enable shared experts fusion by default - Unify kt run model selection table with kt model list display * Add Total size, MoE Size, Repo, and SHA256 status columns * Use consistent formatting and styling * Improve user decision-making with more information - Enable --disable-shared-experts-fusion by default * Change default value from False to True * Users can still override with --enable-shared-experts-fusion * [feat]: improve kt chat with performance metrics and better CJK support - Add performance metrics display after each response * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token) * Accurate input/output token counts using model tokenizer * Fallback to estimation if tokenizer unavailable * Metrics shown in dim style (not prominent) - Fix Chinese character input issues * Replace Prompt.ask() with console.input() for better CJK support * Fixes backspace deletion showing half-characters - Suppress NumPy subnormal warnings * Filter "The value of the smallest subnormal" warnings * Cleaner CLI output on certain hardware environments * [fix]: correct TTFT measurement in kt chat - Move start_time initialization before API call - Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms - Now correctly measures time from request sent to first token received * [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案 * [docs]: 强调推荐使用 Kimi K2.5 作为核心模型,突出企业级推理能力 * [docs]: 添加 Clawdbot 飞书接入教程链接 * [feat]: improve CLI table display, model verification, and chat experience - Add sequence number (#) column to all model tables by default - Filter kt edit to show only MoE GPU models (exclude AMX) - Extend kt model verify to check *.json and *.py files in addition to weights - Fix re-verification bug where repaired files caused false failures - Suppress tokenizer debug output in kt chat token counting * [fix]: fix cpu cores. --------- Co-authored-by: skqliao <skqliao@gmail.com>
This commit is contained in:
parent
4f64665758
commit
56cbd69ac4
23 changed files with 10327 additions and 781 deletions
111
kt-kernel/python/cli/utils/run_configs.py
Normal file
111
kt-kernel/python/cli/utils/run_configs.py
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
"""
|
||||
Configuration save/load for kt run command.
|
||||
|
||||
Manages saved run configurations bound to specific models.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
from datetime import datetime
|
||||
import yaml
|
||||
|
||||
|
||||
CONFIG_FILE = Path.home() / ".ktransformers" / "run_configs.yaml"
|
||||
|
||||
|
||||
class RunConfigManager:
|
||||
"""Manager for saved run configurations."""
|
||||
|
||||
def __init__(self):
|
||||
self.config_file = CONFIG_FILE
|
||||
self._ensure_config_file()
|
||||
|
||||
def _ensure_config_file(self):
|
||||
"""Ensure config file exists."""
|
||||
if not self.config_file.exists():
|
||||
self.config_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._save_data({"version": "1.0", "configs": {}})
|
||||
|
||||
def _load_data(self) -> Dict:
|
||||
"""Load raw config data."""
|
||||
try:
|
||||
with open(self.config_file, "r", encoding="utf-8") as f:
|
||||
return yaml.safe_load(f) or {"version": "1.0", "configs": {}}
|
||||
except Exception:
|
||||
return {"version": "1.0", "configs": {}}
|
||||
|
||||
def _save_data(self, data: Dict):
|
||||
"""Save raw config data."""
|
||||
with open(self.config_file, "w", encoding="utf-8") as f:
|
||||
yaml.dump(data, f, allow_unicode=True, default_flow_style=False)
|
||||
|
||||
def list_configs(self, model_id: str) -> List[Dict[str, Any]]:
|
||||
"""List all saved configs for a model.
|
||||
|
||||
Returns:
|
||||
List of config dicts with 'config_name' and other fields.
|
||||
"""
|
||||
data = self._load_data()
|
||||
configs = data.get("configs", {}).get(model_id, [])
|
||||
return configs if isinstance(configs, list) else []
|
||||
|
||||
def save_config(self, model_id: str, config: Dict[str, Any]):
|
||||
"""Save a configuration for a model.
|
||||
|
||||
Args:
|
||||
model_id: Model ID to bind config to
|
||||
config: Configuration dict with all run parameters
|
||||
"""
|
||||
data = self._load_data()
|
||||
|
||||
if "configs" not in data:
|
||||
data["configs"] = {}
|
||||
|
||||
if model_id not in data["configs"]:
|
||||
data["configs"][model_id] = []
|
||||
|
||||
# Add timestamp
|
||||
config["created_at"] = datetime.now().isoformat()
|
||||
|
||||
# Append config
|
||||
data["configs"][model_id].append(config)
|
||||
|
||||
self._save_data(data)
|
||||
|
||||
def delete_config(self, model_id: str, config_index: int) -> bool:
|
||||
"""Delete a saved configuration.
|
||||
|
||||
Args:
|
||||
model_id: Model ID
|
||||
config_index: Index of config to delete (0-based)
|
||||
|
||||
Returns:
|
||||
True if deleted, False if not found
|
||||
"""
|
||||
data = self._load_data()
|
||||
|
||||
if model_id not in data.get("configs", {}):
|
||||
return False
|
||||
|
||||
configs = data["configs"][model_id]
|
||||
if config_index < 0 or config_index >= len(configs):
|
||||
return False
|
||||
|
||||
configs.pop(config_index)
|
||||
self._save_data(data)
|
||||
return True
|
||||
|
||||
def get_config(self, model_id: str, config_index: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get a specific saved configuration.
|
||||
|
||||
Args:
|
||||
model_id: Model ID
|
||||
config_index: Index of config to get (0-based)
|
||||
|
||||
Returns:
|
||||
Config dict or None if not found
|
||||
"""
|
||||
configs = self.list_configs(model_id)
|
||||
if config_index < 0 or config_index >= len(configs):
|
||||
return None
|
||||
return configs[config_index]
|
||||
Loading…
Add table
Add a link
Reference in a new issue