kt-cli enhancement (#1834)

* [feat]: redesign kt run interactive configuration with i18n support - Redesign kt run with 8-step interactive flow (model selection, inference method, NUMA/CPU, GPU experts, KV cache, GPU/TP selection, parsers, host/port) - Add configuration save/load system (~/.ktransformers/run_configs.yaml) - Add i18n support for kt chat (en/zh translations) - Add universal input validators with auto-retry and Chinese comma support - Add port availability checker with auto-suggestion - Add parser configuration (--tool-call-parser, --reasoning-parser) - Remove tuna command and clean up redundant files - Fix: variable reference bug in run.py, filter to show only MoE models * [feat]: unify model selection UI and enable shared experts fusion by default - Unify kt run model selection table with kt model list display * Add Total size, MoE Size, Repo, and SHA256 status columns * Use consistent formatting and styling * Improve user decision-making with more information - Enable --disable-shared-experts-fusion by default * Change default value from False to True * Users can still override with --enable-shared-experts-fusion * [feat]: improve kt chat with performance metrics and better CJK support - Add performance metrics display after each response * Total time, TTFT (Time To First Token), TPOT (Time Per Output Token) * Accurate input/output token counts using model tokenizer * Fallback to estimation if tokenizer unavailable * Metrics shown in dim style (not prominent) - Fix Chinese character input issues * Replace Prompt.ask() with console.input() for better CJK support * Fixes backspace deletion showing half-characters - Suppress NumPy subnormal warnings * Filter "The value of the smallest subnormal" warnings * Cleaner CLI output on certain hardware environments * [fix]: correct TTFT measurement in kt chat - Move start_time initialization before API call - Previously start_time was set when receiving first chunk, causing TTFT ≈ 0ms - Now correctly measures time from request sent to first token received * [docs]: 添加 Clawdbot 集成指南 - KTransformers 企业级 AI 助手部署方案 * [docs]: 强调推荐使用 Kimi K2.5 作为核心模型，突出企业级推理能力 * [docs]: 添加 Clawdbot 飞书接入教程链接 * [feat]: improve CLI table display, model verification, and chat experience - Add sequence number (#) column to all model tables by default - Filter kt edit to show only MoE GPU models (exclude AMX) - Extend kt model verify to check *.json and *.py files in addition to weights - Fix re-verification bug where repaired files caused false failures - Suppress tokenizer debug output in kt chat token counting * [fix]: fix cpu cores. --------- Co-authored-by: skqliao <skqliao@gmail.com>
2026-04-28 20:00:06 +00:00 · 2026-02-04 16:44:54 +08:00 · 2026-02-04 16:44:54 +08:00 · 56cbd69ac4
commit 56cbd69ac4
parent 4f64665758
23 changed files with 10327 additions and 781 deletions
--- a/kt-kernel/python/cli/utils/model_scanner.py
+++ b/kt-kernel/python/cli/utils/model_scanner.py
@ -0,0 +1,790 @@
+"""
+Model Scanner
+
+Scans directories for model files (safetensors, gguf) and identifies models
+"""
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Optional, Set, Tuple, Dict
+from collections import defaultdict
+import os
+import subprocess
+import json
+
+
+@dataclass
+class ScannedModel:
+    """Temporary structure for scanned model information"""
+
+    path: str  # Absolute path to model directory
+    format: str  # "safetensors" | "gguf" | "mixed"
+    size_bytes: int  # Total size in bytes
+    file_count: int  # Number of model files
+    files: List[str]  # List of model file names
+
+    @property
+    def size_gb(self) -> float:
+        """Get size in GB"""
+        return self.size_bytes / (1024**3)
+
+    @property
+    def folder_name(self) -> str:
+        """Get the folder name (default model name)"""
+        return Path(self.path).name
+
+
+class ModelScanner:
+    """Scanner for discovering models in directory trees"""
+
+    def __init__(self, min_size_gb: float = 10.0):
+        """
+        Initialize scanner
+
+        Args:
+            min_size_gb: Minimum folder size in GB to be considered a model
+        """
+        self.min_size_bytes = int(min_size_gb * 1024**3)
+
+    def scan_directory(
+        self, base_path: Path, exclude_paths: Optional[Set[str]] = None
+    ) -> Tuple[List[ScannedModel], List[str]]:
+        """
+        Scan directory tree for models
+
+        Args:
+            base_path: Root directory to scan
+            exclude_paths: Set of absolute paths to exclude from results
+
+        Returns:
+            Tuple of (valid_models, warnings)
+            - valid_models: List of ScannedModel instances
+            - warnings: List of warning messages
+        """
+        if not base_path.exists():
+            raise ValueError(f"Path does not exist: {base_path}")
+
+        if not base_path.is_dir():
+            raise ValueError(f"Path is not a directory: {base_path}")
+
+        exclude_paths = exclude_paths or set()
+        results: List[ScannedModel] = []
+        warnings: List[str] = []
+
+        # Walk the directory tree
+        for root, dirs, files in os.walk(base_path):
+            root_path = Path(root).resolve()
+
+            # Skip if already registered
+            if str(root_path) in exclude_paths:
+                dirs[:] = []  # Don't descend into this directory
+                continue
+
+            # Check for model files
+            safetensors_files = [f for f in files if f.endswith(".safetensors")]
+            gguf_files = [f for f in files if f.endswith(".gguf")]
+
+            if not safetensors_files and not gguf_files:
+                continue  # No model files in this directory
+
+            # Calculate total size
+            model_files = safetensors_files + gguf_files
+            total_size = self._calculate_total_size(root_path, model_files)
+
+            # Check if size meets minimum threshold
+            if total_size < self.min_size_bytes:
+                continue  # Too small, but keep scanning subdirectories
+
+            # Detect format
+            if safetensors_files and gguf_files:
+                # Mixed format - issue warning
+                warnings.append(
+                    f"Mixed format detected in {root_path}: "
+                    f"{len(safetensors_files)} safetensors + {len(gguf_files)} gguf files. "
+                    "Please separate into different folders and re-scan."
+                )
+                dirs[:] = []  # Don't descend into mixed format directories
+                continue
+
+            # Determine format
+            format_type = "safetensors" if safetensors_files else "gguf"
+
+            # Create scanned model
+            scanned = ScannedModel(
+                path=str(root_path),
+                format=format_type,
+                size_bytes=total_size,
+                file_count=len(model_files),
+                files=model_files,
+            )
+
+            results.append(scanned)
+
+            # Continue scanning subdirectories - they might also contain models
+            # Each subdirectory will be independently checked for size >= 10GB
+
+        return results, warnings
+
+    def scan_single_path(self, path: Path) -> Optional[ScannedModel]:
+        """
+        Scan a single path for model files
+
+        Args:
+            path: Path to scan
+
+        Returns:
+            ScannedModel instance or None if not a valid model
+        """
+        if not path.exists() or not path.is_dir():
+            return None
+
+        # Find model files
+        safetensors_files = list(path.glob("*.safetensors"))
+        gguf_files = list(path.glob("*.gguf"))
+
+        if not safetensors_files and not gguf_files:
+            return None
+
+        # Check for mixed format
+        if safetensors_files and gguf_files:
+            raise ValueError(
+                f"Mixed format detected: {len(safetensors_files)} safetensors + "
+                f"{len(gguf_files)} gguf files. Please use a single format."
+            )
+
+        # Calculate size
+        model_files = [f.name for f in safetensors_files + gguf_files]
+        total_size = self._calculate_total_size(path, model_files)
+
+        # Determine format
+        format_type = "safetensors" if safetensors_files else "gguf"
+
+        return ScannedModel(
+            path=str(path.resolve()),
+            format=format_type,
+            size_bytes=total_size,
+            file_count=len(model_files),
+            files=model_files,
+        )
+
+    def _calculate_total_size(self, directory: Path, filenames: List[str]) -> int:
+        """
+        Calculate total size of specified files in directory
+
+        Args:
+            directory: Directory containing the files
+            filenames: List of filenames to sum
+
+        Returns:
+            Total size in bytes
+        """
+        total = 0
+        for filename in filenames:
+            file_path = directory / filename
+            if file_path.exists():
+                try:
+                    total += file_path.stat().st_size
+                except OSError:
+                    # File might be inaccessible, skip it
+                    pass
+        return total
+
+
+# Convenience functions
+
+
+def scan_directory(
+    base_path: Path, min_size_gb: float = 10.0, exclude_paths: Optional[Set[str]] = None
+) -> Tuple[List[ScannedModel], List[str]]:
+    """
+    Convenience function to scan a directory
+
+    Args:
+        base_path: Root directory to scan
+        min_size_gb: Minimum folder size in GB
+        exclude_paths: Set of paths to exclude
+
+    Returns:
+        Tuple of (models, warnings)
+    """
+    scanner = ModelScanner(min_size_gb=min_size_gb)
+    return scanner.scan_directory(base_path, exclude_paths)
+
+
+def scan_single_path(path: Path) -> Optional[ScannedModel]:
+    """
+    Convenience function to scan a single path
+
+    Args:
+        path: Path to scan
+
+    Returns:
+        ScannedModel or None
+    """
+    scanner = ModelScanner()
+    return scanner.scan_single_path(path)
+
+
+def format_size(size_bytes: int) -> str:
+    """
+    Format size in bytes to human-readable string
+
+    Args:
+        size_bytes: Size in bytes
+
+    Returns:
+        Formatted string (e.g., "42.3 GB")
+    """
+    for unit in ["B", "KB", "MB", "GB", "TB"]:
+        if size_bytes < 1024.0:
+            return f"{size_bytes:.1f} {unit}"
+        size_bytes /= 1024.0
+    return f"{size_bytes:.1f} PB"
+
+
+# ===== Fast Scanning with Find Command and Tree-based Root Detection =====
+
+
+def find_files_fast(mount_point: str, pattern: str, max_depth: int = 6, timeout: int = 30) -> List[str]:
+    """
+    Use find command to quickly locate files
+
+    Args:
+        mount_point: Starting directory
+        pattern: File pattern (e.g., "config.json", "*.gguf")
+        max_depth: Maximum directory depth (default: 6)
+        timeout: Command timeout in seconds
+
+    Returns:
+        List of absolute file paths
+    """
+    try:
+        # Use shell=True to redirect stderr to /dev/null, ignoring permission errors
+        result = subprocess.run(
+            f'find "{mount_point}" -maxdepth {max_depth} -name "{pattern}" -type f 2>/dev/null',
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+            shell=True,
+        )
+
+        # Return results even if returncode is non-zero (due to permission errors)
+        # As long as we got some output
+        if result.stdout:
+            return [line.strip() for line in result.stdout.strip().split("\n") if line.strip()]
+        return []
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        return []
+
+
+def is_valid_model_directory(directory: Path, min_size_gb: float = 10.0) -> Tuple[bool, Optional[str]]:
+    """
+    Check if a directory is a valid model directory
+
+    Args:
+        directory: Path to check
+        min_size_gb: Minimum size in GB
+
+    Returns:
+        (is_valid, model_type) where model_type is "safetensors", "gguf", or None
+    """
+    if not directory.exists() or not directory.is_dir():
+        return False, None
+
+    has_config = (directory / "config.json").exists()
+    safetensors_files = list(directory.glob("*.safetensors"))
+    gguf_files = list(directory.glob("*.gguf"))
+
+    # Determine model type
+    model_type = None
+    if (has_config and safetensors_files) or safetensors_files:
+        model_type = "safetensors"
+    elif gguf_files:
+        model_type = "gguf"
+    else:
+        return False, None
+
+    # Check size - only count model files (fast!)
+    total_size = 0
+    if model_type == "safetensors":
+        for f in safetensors_files:
+            try:
+                total_size += f.stat().st_size
+            except OSError:
+                pass
+    else:  # gguf
+        for f in gguf_files:
+            try:
+                total_size += f.stat().st_size
+            except OSError:
+                pass
+
+    size_gb = total_size / (1024**3)
+    if size_gb < min_size_gb:
+        return False, None
+
+    return True, model_type
+
+
+def scan_all_models_fast(mount_points: List[str], min_size_gb: float = 10.0, max_depth: int = 6) -> List[str]:
+    """
+    Fast scan for all model paths using find command
+
+    Args:
+        mount_points: List of mount points to scan
+        min_size_gb: Minimum model size in GB
+        max_depth: Maximum search depth (default: 6)
+
+    Returns:
+        List of valid model directory paths
+    """
+    model_paths = set()
+
+    for mount in mount_points:
+        if not os.path.exists(mount):
+            continue
+
+        # Find all config.json files
+        config_files = find_files_fast(mount, "config.json", max_depth=max_depth)
+        for config_path in config_files:
+            model_dir = Path(config_path).parent
+            is_valid, model_type = is_valid_model_directory(model_dir, min_size_gb)
+            if is_valid:
+                model_paths.add(str(model_dir.resolve()))
+
+        # Find all *.gguf files
+        gguf_files = find_files_fast(mount, "*.gguf", max_depth=max_depth)
+        for gguf_path in gguf_files:
+            model_dir = Path(gguf_path).parent
+            is_valid, model_type = is_valid_model_directory(model_dir, min_size_gb)
+            if is_valid:
+                model_paths.add(str(model_dir.resolve()))
+
+    return sorted(model_paths)
+
+
+def get_root_subdirs() -> List[str]:
+    """
+    Get subdirectories of / that are worth scanning
+
+    Filters out system paths only
+
+    Returns:
+        List of directories to scan
+    """
+    # System paths to exclude
+    excluded = {
+        "dev",
+        "proc",
+        "sys",
+        "run",
+        "boot",
+        "tmp",
+        "usr",
+        "lib",
+        "lib64",
+        "bin",
+        "sbin",
+        "etc",
+        "opt",
+        "var",
+        "snap",
+    }
+
+    scan_dirs = []
+
+    try:
+        for entry in os.scandir("/"):
+            if not entry.is_dir():
+                continue
+
+            # Skip excluded paths
+            if entry.name in excluded:
+                continue
+
+            scan_dirs.append(entry.path)
+
+    except PermissionError:
+        pass
+
+    return sorted(scan_dirs)
+
+
+def scan_directory_for_models(directory: str, min_file_size_gb: float = 2.0) -> Dict[str, tuple]:
+    """
+    Scan a directory for models using find command with size filter
+
+    Uses find -size +2G to only locate large model files (>=2GB)
+
+    Args:
+        directory: Directory to scan
+        min_file_size_gb: Minimum individual file size in GB (default: 2.0)
+
+    Returns:
+        Dict mapping model_path -> (model_type, size_bytes, file_count, files)
+    """
+    model_info = {}
+
+    # Convert GB to find's format (e.g., 2GB = +2G)
+    if min_file_size_gb >= 1.0:
+        size_filter = f"+{int(min_file_size_gb)}G"
+    else:
+        size_mb = int(min_file_size_gb * 1024)
+        size_filter = f"+{size_mb}M"
+
+    # 1. Find *.gguf files >= 2GB
+    gguf_cmd = f'find "{directory}" -name "*.gguf" -type f -size {size_filter} -printf "%p\\t%s\\n" 2>/dev/null'
+    result = subprocess.run(gguf_cmd, shell=True, capture_output=True, text=True, timeout=120)
+
+    # Group by directory
+    gguf_dirs = defaultdict(list)
+    for line in result.stdout.strip().split("\n"):
+        if not line:
+            continue
+        parts = line.split("\t")
+        if len(parts) != 2:
+            continue
+        file_path, size_str = parts
+        file_path_obj = Path(file_path)
+        dir_path = str(file_path_obj.parent)
+        gguf_dirs[dir_path].append((file_path_obj.name, int(size_str)))
+
+    # Add all gguf directories
+    for dir_path, files in gguf_dirs.items():
+        total_size = sum(size for _, size in files)
+        model_info[dir_path] = ("gguf", total_size, len(files), [name for name, _ in files])
+
+    # 2. Find *.safetensors files >= 2GB
+    safetensors_cmd = (
+        f'find "{directory}" -name "*.safetensors" -type f -size {size_filter} -printf "%p\\t%s\\n" 2>/dev/null'
+    )
+    result = subprocess.run(safetensors_cmd, shell=True, capture_output=True, text=True, timeout=120)
+
+    # Group by directory
+    safetensors_dirs = defaultdict(list)
+    for line in result.stdout.strip().split("\n"):
+        if not line:
+            continue
+        parts = line.split("\t")
+        if len(parts) != 2:
+            continue
+        file_path, size_str = parts
+        file_path_obj = Path(file_path)
+        dir_path = str(file_path_obj.parent)
+        safetensors_dirs[dir_path].append((file_path_obj.name, int(size_str)))
+
+    # 3. Check each safetensors directory for config.json
+    for dir_path, files in safetensors_dirs.items():
+        if os.path.exists(os.path.join(dir_path, "config.json")):
+            total_size = sum(size for _, size in files)
+            model_info[dir_path] = ("safetensors", total_size, len(files), [name for name, _ in files])
+
+    return model_info
+
+
+def scan_all_models_with_info(
+    mount_points: Optional[List[str]] = None, min_size_gb: float = 10.0, max_depth: int = 6
+) -> Dict[str, tuple]:
+    """
+    Fast scan with parallel directory scanning
+
+    Strategy:
+    1. Use provided directories or auto-detect root subdirectories
+    2. Scan each directory in parallel (one thread per directory)
+    3. Use find -size +2G to find large model files (>=2GB)
+
+    Args:
+        mount_points: Specific directories to scan, or None to auto-detect from / subdirs
+        min_size_gb: Not used anymore (kept for API compatibility)
+        max_depth: Not used anymore (kept for API compatibility)
+
+    Returns:
+        Dict mapping model_path -> (model_type, size_bytes, file_count, files)
+    """
+    from concurrent.futures import ThreadPoolExecutor, as_completed
+
+    # Get directories to scan
+    if mount_points is None:
+        # Get root subdirectories (exclude system paths)
+        scan_dirs = get_root_subdirs()
+    else:
+        scan_dirs = mount_points
+
+    if not scan_dirs:
+        return {}
+
+    model_info = {}
+
+    # Scan each directory in parallel (max 8 concurrent)
+    # Use 2GB threshold to find model files
+    with ThreadPoolExecutor(max_workers=min(len(scan_dirs), 8)) as executor:
+        futures = {executor.submit(scan_directory_for_models, d, 2.0): d for d in scan_dirs}
+
+        for future in as_completed(futures):
+            try:
+                dir_results = future.result()
+                model_info.update(dir_results)
+            except Exception as e:
+                # Skip directories with errors
+                pass
+
+    return model_info
+
+
+def find_model_roots_from_paths(model_paths: List[str]) -> Tuple[List[str], Dict[str, int]]:
+    """
+    Find optimal root paths from model paths using tree-based algorithm
+
+    Algorithm:
+    1. Build path tree with all intermediate paths
+    2. DFS to calculate f(x) = subtree sum (number of models in subtree)
+    3. Find roots where f(parent) = f(x) > max(f(children))
+
+    Args:
+        model_paths: List of model directory paths
+
+    Returns:
+        (root_paths, subtree_sizes) where:
+        - root_paths: List of inferred root directories
+        - subtree_sizes: Dict mapping each root to number of models
+    """
+    if not model_paths:
+        return [], {}
+
+    # 1. Build path set (including all intermediate paths)
+    all_paths = set()
+    model_set = set(model_paths)
+
+    for model_path in model_paths:
+        path = Path(model_path)
+        for i in range(1, len(path.parts) + 1):
+            all_paths.add(str(Path(*path.parts[:i])))
+
+    # 2. Build parent-child relationships
+    children_map = defaultdict(list)
+    for path in all_paths:
+        path_obj = Path(path)
+        if len(path_obj.parts) > 1:
+            parent = str(path_obj.parent)
+            if parent in all_paths:
+                children_map[parent].append(path)
+
+    # 3. DFS to calculate f(x) and max_child_f(x)
+    f = {}  # path -> subtree sum
+    max_child_f = {}  # path -> max(f(children))
+    visited = set()
+
+    def dfs(path: str) -> int:
+        if path in visited:
+            return f[path]
+        visited.add(path)
+
+        # Current node weight (1 if it's a model path, 0 otherwise)
+        weight = 1 if path in model_set else 0
+
+        # Recursively calculate children
+        children = children_map.get(path, [])
+        if not children:
+            # Leaf node
+            f[path] = weight
+            max_child_f[path] = 0
+            return weight
+
+        # Calculate f values for all children
+        children_f_values = [dfs(child) for child in children]
+
+        # Calculate f(x) and max_child_f(x)
+        f[path] = weight + sum(children_f_values)
+        max_child_f[path] = max(children_f_values) if children_f_values else 0
+
+        return f[path]
+
+    # Find top-level nodes (no parent in all_paths)
+    top_nodes = []
+    for path in all_paths:
+        parent = str(Path(path).parent)
+        if parent not in all_paths or parent == path:
+            top_nodes.append(path)
+
+    # Execute DFS from all top nodes
+    for top in top_nodes:
+        dfs(top)
+
+    # 4. Find root nodes: f(parent) = f(x) >= max(f(children))
+    # Note: Use >= instead of > to handle the case where a directory contains only one model
+    candidate_roots = []
+    for path in all_paths:
+        # Skip model paths themselves (leaf nodes in model tree)
+        if path in model_set:
+            continue
+
+        parent = str(Path(path).parent)
+
+        # Check condition: f(parent) = f(x) and f(x) >= max(f(children))
+        if parent in f and f.get(parent, 0) == f.get(path, 0):
+            if f.get(path, 0) >= max_child_f.get(path, 0) and f.get(path, 0) > 0:
+                candidate_roots.append(path)
+
+    # 5. Remove redundant roots (prefer deeper paths)
+    # If a root is an ancestor of another root with the same f value, remove it
+    roots = []
+    candidate_roots_sorted = sorted(candidate_roots, key=lambda p: -len(Path(p).parts))
+
+    for root in candidate_roots_sorted:
+        # Check if this root is a parent of any already selected root
+        is_redundant = False
+        for selected in roots:
+            if selected.startswith(root + "/"):
+                # selected is a child of root
+                # Only keep root if it has more models (shouldn't happen by algorithm)
+                if f.get(root, 0) == f.get(selected, 0):
+                    is_redundant = True
+                    break
+
+        if not is_redundant:
+            # Also filter out very shallow paths (< 3 levels)
+            if len(Path(root).parts) >= 3:
+                roots.append(root)
+
+    # Build subtree sizes for roots
+    subtree_sizes = {root: f.get(root, 0) for root in roots}
+
+    return sorted(roots), subtree_sizes
+
+
+@dataclass
+class ModelRootInfo:
+    """Information about a detected model root path"""
+
+    path: str
+    model_count: int
+    models: List[ScannedModel]
+
+
+def discover_models(
+    mount_points: Optional[List[str]] = None, min_size_gb: float = 10.0, max_depth: int = 6
+) -> List[ScannedModel]:
+    """
+    Discover all model directories on the system
+
+    Fast scan using find command to locate all models that meet the criteria
+
+    Args:
+        mount_points: List of mount points to scan (None = auto-detect)
+        min_size_gb: Minimum model size in GB (default: 10.0)
+        max_depth: Maximum search depth (default: 6)
+
+    Returns:
+        List of ScannedModel sorted by path
+    """
+    # Auto-detect mount points if not provided
+    if mount_points is None:
+        mount_points = _get_mount_points()
+
+    # Fast scan with cached info (only scan once!)
+    model_info = scan_all_models_with_info(mount_points, min_size_gb, max_depth)
+
+    if not model_info:
+        return []
+
+    # Convert to ScannedModel objects
+    results = []
+    for model_path, (model_type, total_size, file_count, files) in model_info.items():
+        results.append(
+            ScannedModel(path=model_path, format=model_type, size_bytes=total_size, file_count=file_count, files=files)
+        )
+
+    # Sort by path
+    results.sort(key=lambda m: m.path)
+    return results
+
+
+def _get_mount_points() -> List[str]:
+    """
+    Get all valid mount points from /proc/mounts, filtering out system paths
+
+    Returns:
+        List of mount point paths suitable for model storage
+        (excludes root "/" to avoid scanning entire filesystem)
+    """
+    mount_points = set()
+
+    # System paths to exclude (unlikely to contain model files)
+    excluded_paths = [
+        "/snap/",
+        "/proc/",
+        "/sys/",
+        "/run/",
+        "/boot",
+        "/dev/",
+        "/usr",
+        "/lib",
+        "/lib64",
+        "/bin",
+        "/sbin",
+        "/etc",
+        "/opt",
+        "/var",
+        "/tmp",
+    ]
+
+    try:
+        with open("/proc/mounts", "r") as f:
+            for line in f:
+                parts = line.split()
+                if len(parts) < 3:
+                    continue
+
+                device, mount_point, fs_type = parts[0], parts[1], parts[2]
+
+                # Filter out pseudo filesystems
+                pseudo_fs = {
+                    "proc",
+                    "sysfs",
+                    "devpts",
+                    "tmpfs",
+                    "devtmpfs",
+                    "cgroup",
+                    "cgroup2",
+                    "pstore",
+                    "bpf",
+                    "tracefs",
+                    "debugfs",
+                    "hugetlbfs",
+                    "mqueue",
+                    "configfs",
+                    "securityfs",
+                    "fuse.gvfsd-fuse",
+                    "fusectl",
+                    "squashfs",
+                    "overlay",  # snap packages
+                }
+
+                if fs_type in pseudo_fs:
+                    continue
+
+                # Skip root directory (too large to scan)
+                if mount_point == "/":
+                    continue
+
+                # Filter out system paths
+                if any(mount_point.startswith(x) for x in excluded_paths):
+                    continue
+
+                # Only include if it exists and is readable
+                if os.path.exists(mount_point) and os.access(mount_point, os.R_OK):
+                    mount_points.add(mount_point)
+
+        # If no mount points found, add common data directories
+        if not mount_points:
+            # Add /home if it exists and is not already a separate mount point
+            common_paths = ["/home", "/data", "/mnt"]
+            for path in common_paths:
+                if os.path.exists(path) and os.access(path, os.R_OK):
+                    mount_points.add(path)
+
+    except (FileNotFoundError, PermissionError):
+        # Fallback to common paths
+        mount_points = {"/home", "/mnt", "/data"}
+
+    return sorted(mount_points)