mirror of
https://github.com/eigent-ai/eigent.git
synced 2026-04-28 11:40:25 +00:00
Co-authored-by: Cursor <cursoragent@cursor.com> Co-authored-by: bytecii <bytecii@users.noreply.github.com>
319 lines
11 KiB
Python
319 lines
11 KiB
Python
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
|
|
|
|
"""File system utilities with robust path handling and edge-case safety."""
|
|
|
|
import logging
|
|
import os
|
|
import platform
|
|
import shutil
|
|
from pathlib import Path
|
|
|
|
from app.component.environment import env
|
|
from app.exception.exception import PathEscapesBaseError
|
|
from app.model.chat import Chat
|
|
|
|
logger = logging.getLogger("file_utils")
|
|
|
|
# Windows has a 260-character path limit unless long path support is enabled
|
|
MAX_PATH_LENGTH_WIN = 260
|
|
MAX_PATH_LENGTH_UNIX = 4096
|
|
# Default directory names to skip when listing (list_files)
|
|
DEFAULT_SKIP_DIRS = frozenset(
|
|
{".git", "node_modules", "__pycache__", "venv", ".venv"}
|
|
)
|
|
# Default file extensions to skip when listing (list_files)
|
|
DEFAULT_SKIP_EXTENSIONS: tuple[str, ...] = (".pyc", ".tmp", ".temp")
|
|
|
|
|
|
def _max_path_length() -> int:
|
|
"""Return the platform-appropriate max path length for validation."""
|
|
return (
|
|
MAX_PATH_LENGTH_WIN
|
|
if platform.system() == "Windows"
|
|
else MAX_PATH_LENGTH_UNIX
|
|
)
|
|
|
|
|
|
def _is_under_base(path_real: str, base_real: str) -> bool:
|
|
"""Return True if path_real is at or under base_real (both already realpath'd)."""
|
|
base = base_real.rstrip(os.sep)
|
|
return path_real.startswith(base + os.sep) or path_real == base
|
|
|
|
|
|
def _should_skip(
|
|
name: str,
|
|
skip_prefix: str,
|
|
skip_extensions: tuple[str, ...] = (),
|
|
) -> bool:
|
|
"""Return True if a file or directory name should be excluded from listing."""
|
|
if name.startswith(skip_prefix):
|
|
return True
|
|
return any(name.endswith(ext) for ext in skip_extensions)
|
|
|
|
|
|
def join_under_base(base: str, *parts: str) -> str | None:
|
|
"""Join path parts onto base, ensuring the result stays under base.
|
|
|
|
Args:
|
|
base (str): Base directory; must exist as a directory.
|
|
*parts (str): Path components to join onto base.
|
|
|
|
Returns:
|
|
str | None: Resolved absolute path if valid and under base, None otherwise.
|
|
"""
|
|
if not base or not base.strip():
|
|
return None
|
|
try:
|
|
base_resolved = Path(base).resolve()
|
|
if not base_resolved.is_dir():
|
|
return None
|
|
combined = base_resolved
|
|
for p in parts:
|
|
if p is None or (isinstance(p, str) and ".." in p.split(os.sep)):
|
|
return None
|
|
combined = combined / p
|
|
resolved = combined.resolve()
|
|
try:
|
|
resolved.relative_to(base_resolved)
|
|
except ValueError:
|
|
return None
|
|
if len(str(resolved)) > _max_path_length():
|
|
return None
|
|
return str(resolved)
|
|
except (OSError, RuntimeError) as e:
|
|
logger.debug("join_under_base failed: %s", e)
|
|
return None
|
|
|
|
|
|
def is_safe_path(path: str, base: str) -> bool:
|
|
"""Return True if path is under base (realpath) and within path length limits.
|
|
|
|
Args:
|
|
path (str): Path to validate (file or directory).
|
|
base (str): Base directory that path must be under.
|
|
|
|
Returns:
|
|
bool: True if path resolves under base and within path length limits.
|
|
"""
|
|
if not path or not base:
|
|
return False
|
|
try:
|
|
base_real = os.path.realpath(base)
|
|
path_real = os.path.realpath(path)
|
|
if not _is_under_base(path_real, base_real):
|
|
return False
|
|
return len(path_real) <= _max_path_length()
|
|
except (OSError, RuntimeError):
|
|
return False
|
|
|
|
|
|
def resolve_under_base(path: str, base: str) -> str:
|
|
"""Resolve path and verify it stays under base. Raises if it escapes.
|
|
|
|
Args:
|
|
path (str): Path to resolve (relative or absolute).
|
|
base (str): Base directory that path must be confined to.
|
|
|
|
Returns:
|
|
str: Resolved real path.
|
|
|
|
Raises:
|
|
ValueError: If path is empty or whitespace.
|
|
PathEscapesBaseError: If path resolves outside base or exceeds path length.
|
|
OSError: If path resolution fails due to filesystem error.
|
|
"""
|
|
if not path or not path.strip():
|
|
raise ValueError(f"Path must be non-empty, got: {path!r}")
|
|
base_abs = os.path.abspath(base)
|
|
if not os.path.isdir(base_abs):
|
|
raise PathEscapesBaseError(f"Base is not a directory: {base!r}")
|
|
resolved = os.path.normpath(os.path.join(base_abs, path))
|
|
resolved_real = os.path.realpath(resolved)
|
|
base_real = os.path.realpath(base_abs)
|
|
if not _is_under_base(resolved_real, base_real):
|
|
raise PathEscapesBaseError(
|
|
f"Path escapes base: path={path!r} base={base!r}"
|
|
)
|
|
if len(resolved_real) > _max_path_length():
|
|
raise PathEscapesBaseError(
|
|
f"Path exceeds max length ({len(resolved_real)}): {resolved_real!r}"
|
|
)
|
|
return resolved_real
|
|
|
|
|
|
def normalize_working_path(path: str | Path | None) -> str:
|
|
"""
|
|
Normalize and validate a working directory path using pathlib.
|
|
Requires a non-empty path; raises ValueError if path is None or empty.
|
|
For invalid or nonexistent paths, falls back to parent or user home.
|
|
|
|
Args:
|
|
path: Working directory path (str or Path). Must be specified.
|
|
|
|
Returns:
|
|
Absolute, resolved directory path as a string.
|
|
|
|
Raises:
|
|
ValueError: If path is None or empty/whitespace.
|
|
"""
|
|
if path is None or not str(path).strip():
|
|
raise ValueError("Working directory path must be specified.")
|
|
p = Path(path).expanduser().resolve()
|
|
try:
|
|
if len(str(p)) > _max_path_length():
|
|
logger.warning("Working path too long, using parent: %s", p)
|
|
p = p.parent
|
|
if not p.exists():
|
|
if p.parent.exists() and p.parent.is_dir():
|
|
return str(p.parent)
|
|
return str(Path.home())
|
|
if p.is_dir():
|
|
return str(p)
|
|
return str(p.parent)
|
|
except (OSError, RuntimeError) as e:
|
|
logger.warning("Invalid working path %r: %s", path, e)
|
|
return str(Path.home())
|
|
|
|
|
|
def list_files(
|
|
dir_path: str,
|
|
base: str | None = None,
|
|
*,
|
|
max_entries: int = 10_000,
|
|
skip_dirs: set[str] | None = None,
|
|
skip_extensions: tuple[str, ...] = DEFAULT_SKIP_EXTENSIONS,
|
|
skip_prefix: str = ".",
|
|
) -> list[str]:
|
|
"""List files under dir_path with optional base confinement and filters.
|
|
If base is set, only returns paths that resolve under base (no traversal).
|
|
|
|
Args:
|
|
dir_path (str): Directory to list; must resolve under base when base is set.
|
|
base (str | None): Confinement base (default: cwd). Paths outside this are excluded.
|
|
max_entries (int): Maximum number of file paths to return.
|
|
skip_dirs (set[str] | None): Directory names to skip (default: DEFAULT_SKIP_DIRS).
|
|
skip_extensions (tuple[str, ...]): File extensions to skip (default: DEFAULT_SKIP_EXTENSIONS).
|
|
skip_prefix (str): Skip dirs/files whose name starts with this prefix.
|
|
|
|
Returns:
|
|
List of real absolute file paths under dir_path (subject to filters and max_entries).
|
|
"""
|
|
if not dir_path or not dir_path.strip():
|
|
logger.warning("list_files: empty dir_path")
|
|
return []
|
|
resolve_base = base if base else os.getcwd()
|
|
try:
|
|
resolved_dir = resolve_under_base(dir_path, resolve_base)
|
|
except PathEscapesBaseError as e:
|
|
logger.warning("list_files: %s", e)
|
|
return []
|
|
except (ValueError, OSError) as e:
|
|
logger.warning("list_files: invalid dir_path %r: %s", dir_path, e)
|
|
return []
|
|
try:
|
|
if not os.path.isdir(resolved_dir):
|
|
return []
|
|
except OSError:
|
|
return []
|
|
base_real = os.path.realpath(resolve_base)
|
|
skip_dirs = set(DEFAULT_SKIP_DIRS) if skip_dirs is None else skip_dirs
|
|
result: list[str] = []
|
|
try:
|
|
for root, dirs, files in os.walk(resolved_dir, followlinks=False):
|
|
dirs[:] = [
|
|
d
|
|
for d in dirs
|
|
if d not in skip_dirs and not _should_skip(d, skip_prefix)
|
|
]
|
|
for name in files:
|
|
if _should_skip(name, skip_prefix, skip_extensions):
|
|
continue
|
|
try:
|
|
file_path = os.path.join(root, name)
|
|
real_path = os.path.realpath(file_path)
|
|
if not _is_under_base(real_path, base_real):
|
|
logger.debug(
|
|
"list_files: skipping %r (escapes base)", file_path
|
|
)
|
|
continue
|
|
result.append(real_path)
|
|
if len(result) >= max_entries:
|
|
logger.debug(
|
|
"list_files hit max_entries=%d", max_entries
|
|
)
|
|
return result
|
|
except OSError:
|
|
continue
|
|
except OSError as e:
|
|
logger.warning("list_files failed for %r: %s", dir_path, e)
|
|
return result
|
|
|
|
|
|
def get_working_directory(options: Chat, task_lock=None) -> str:
|
|
"""
|
|
Get the correct working directory for file operations.
|
|
First checks if there's an updated path from improve API call,
|
|
then falls back to environment variable or default path.
|
|
Result is normalized for safety (traversal, length, existence).
|
|
"""
|
|
if not task_lock:
|
|
from app.service.task import get_task_lock_if_exists
|
|
|
|
task_lock = get_task_lock_if_exists(options.project_id)
|
|
|
|
raw: Path | str
|
|
if (
|
|
task_lock
|
|
and hasattr(task_lock, "new_folder_path")
|
|
and task_lock.new_folder_path
|
|
):
|
|
raw = Path(task_lock.new_folder_path)
|
|
else:
|
|
raw = Path(env("file_save_path", options.file_save_path()))
|
|
|
|
return normalize_working_path(raw)
|
|
|
|
|
|
def sync_eigent_skills_to_project(working_directory: str) -> None:
|
|
"""
|
|
Copy skills from ~/.eigent/skills into the project's .eigent/skills
|
|
so the agent can load and execute them from the project working directory.
|
|
"""
|
|
src = Path.home() / ".eigent" / "skills"
|
|
dst = Path(working_directory) / ".eigent" / "skills"
|
|
if not src.is_dir():
|
|
return
|
|
try:
|
|
dst.mkdir(parents=True, exist_ok=True)
|
|
for skill_dir in src.iterdir():
|
|
if skill_dir.is_dir():
|
|
dest_skill = dst / skill_dir.name
|
|
if dest_skill.exists():
|
|
shutil.rmtree(dest_skill)
|
|
shutil.copytree(skill_dir, dest_skill)
|
|
logger.debug(
|
|
"Synced eigent skills to project",
|
|
extra={
|
|
"working_directory": working_directory,
|
|
"destination": str(dst),
|
|
},
|
|
)
|
|
except OSError as e:
|
|
logger.warning(
|
|
"Failed to sync ~/.eigent/skills to project %s: %s",
|
|
working_directory,
|
|
e,
|
|
exc_info=True,
|
|
)
|