mirror of
https://github.com/Fade78/Fileshed.git
synced 2026-04-28 11:30:20 +00:00
Security fixes: - Add explicit symlink detection in _resolve_chroot_path - Fix TOCTOU race conditions in _patch_text_impl and _patch_bytes_impl - Sanitize SQLite error messages to prevent information leakage - Add finally:conn.close() to prevent SQLite connection leaks - Add readonly check for output_csv in shed_sqlite Code quality: - Replace bare except with specific exceptions (OSError, UnicodeDecodeError, etc.) - Translate French comments to English in shed_import - Refactor shed_sqlite to use centralized _resolve_zone() API improvements: - Change safe=False to safe=True by default in patch_text/patch_bytes - Change message='' to message=None in zone movement functions Documentation: - Add comprehensive error codes reference table to SPEC.md https://claude.ai/code/session_01THb4YA4SqYG52LVwjwb5Uo
8260 lines
342 KiB
Python
8260 lines
342 KiB
Python
"""
|
|
title: Fileshed
|
|
description: Persistent file storage with group collaboration. FIRST: Run shed_help() for quick reference or shed_help(howto="...") for guides: download, csv_to_sqlite, upload, share, edit, commands, network, paths, full. Config: shed_parameters().
|
|
author: Fade78 (with Claude Opus 4.5)
|
|
version: 1.0.2
|
|
license: MIT
|
|
required_open_webui_version: 0.4.0
|
|
|
|
SETUP INSTRUCTIONS:
|
|
==================
|
|
For this tool to work properly, you must enable Native Function Calling:
|
|
|
|
Option 1 - Per Model (recommended):
|
|
Admin Panel > Settings > Models > [Select Model] > Advanced Parameters > Function Calling > "Native"
|
|
|
|
Option 2 - Per Chat:
|
|
Chat Controls (gear icon) > Advanced Params > Function Calling > "Native"
|
|
"""
|
|
|
|
# =============================================================================
|
|
# ⚠️ LLM WARNING - READ THIS FIRST ⚠️
|
|
# =============================================================================
|
|
#
|
|
# YOU CAN ONLY CALL FUNCTIONS STARTING WITH "shed_" !
|
|
#
|
|
# ✅ CORRECT (public API):
|
|
# shed_exec(zone="storage", cmd="ls", args=["-la"])
|
|
# shed_exec(zone="storage", cmd="mkdir", args=["-p", "projects"])
|
|
# shed_exec(zone="documents", cmd="git", args=["log"])
|
|
# shed_patch_text(zone="storage", path="notes.txt", content="Hello")
|
|
# shed_import(import_all=True)
|
|
# shed_sqlite(zone="storage", path="db.sqlite", query="SELECT * FROM t")
|
|
#
|
|
# ❌ WRONG (internal methods - will NOT work):
|
|
# _exec_command(...) <- INTERNAL, don't call!
|
|
# _git_run(...) <- INTERNAL, don't call!
|
|
# _validate_path(...) <- INTERNAL, don't call!
|
|
#
|
|
# ⚠️ SHELL COMMANDS FIRST:
|
|
# Use shed_exec() for ALL shell-doable operations!
|
|
# ✓ mkdir: shed_exec(zone="storage", cmd="mkdir", args=["-p", "dir"])
|
|
# ✗ WRONG: shed_patch_text(path="dir/.keep", content="")
|
|
#
|
|
# Methods starting with "_" are INTERNAL IMPLEMENTATION DETAILS.
|
|
# They are NOT callable by the LLM. Only "shed_*" functions are available.
|
|
#
|
|
# Run shed_help() for the list of available functions!
|
|
#
|
|
# =============================================================================
|
|
|
|
# =============================================================================
|
|
# 🔧 DEV NOTES
|
|
# =============================================================================
|
|
#
|
|
# 1. ARCHITECTURE: All public tools are in `class Tools`. Internal methods
|
|
# MUST be in `class _FileshedCore` to hide them from Open WebUI/LLM.
|
|
#
|
|
# 2. PATHS: Internally we use {Zone}/data/ but externally it's just {Zone}/.
|
|
# NEVER expose "data/" in help, messages, or errors. The LLM sees:
|
|
# Storage/myfile.txt (not Storage/data/myfile.txt)
|
|
#
|
|
# =============================================================================
|
|
|
|
import json
|
|
import mimetypes
|
|
import os
|
|
import re
|
|
import resource
|
|
import shutil
|
|
import sqlite3
|
|
import subprocess
|
|
import unicodedata
|
|
import uuid
|
|
import zipfile
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any, Callable, Optional
|
|
from pydantic import BaseModel, Field
|
|
|
|
# Try to import Open WebUI Groups API
|
|
GROUPS_AVAILABLE = False
|
|
try:
|
|
from open_webui.models.groups import Groups
|
|
GROUPS_AVAILABLE = True
|
|
except ImportError:
|
|
pass
|
|
|
|
|
|
# =============================================================================
|
|
# ZONE CONTEXT
|
|
# =============================================================================
|
|
|
|
@dataclass
|
|
class ZoneContext:
|
|
"""Result of zone resolution - contains all zone-specific info."""
|
|
zone_root: Path # Data directory path
|
|
zone_name: str # Canonical name (Storage, Documents, Uploads, group:xxx)
|
|
zone_lower: str # Lowercase (storage, documents, uploads, group)
|
|
editzone_base: Path # Base for editzones (None for uploads)
|
|
conv_id: str # Conversation ID
|
|
group_id: Optional[str] # Group ID if zone=group, else None
|
|
git_commit: bool # Auto-commit after modifications
|
|
readonly: bool # True for uploads
|
|
whitelist: set # Allowed commands for this zone
|
|
|
|
|
|
# =============================================================================
|
|
# CONFIGURATION
|
|
# =============================================================================
|
|
# Valves are defined inside Tools class (required by Open WebUI)
|
|
# See: Tools.Valves (inner class)
|
|
|
|
# =============================================================================
|
|
# WHITELISTS
|
|
# =============================================================================
|
|
|
|
# Read-only commands (Uploads)
|
|
WHITELIST_READONLY = {
|
|
# Reading
|
|
"cat", "head", "tail", "less", "more", "nl", "wc", "stat", "file", "du", "tac",
|
|
# Navigation
|
|
"ls", "tree", "find",
|
|
# Text search
|
|
"grep", "egrep", "fgrep", "rg", "awk", "sed",
|
|
# Text transformation
|
|
"sort", "uniq", "cut", "paste", "tr", "fold", "fmt", "column", "rev", "shuf",
|
|
"expand", "unexpand", "pr",
|
|
# Join
|
|
"join",
|
|
# Comparison
|
|
"diff", "diff3", "cmp", "comm",
|
|
# Archives (list)
|
|
"tar", "unzip", "zipinfo", "7z",
|
|
# Compression (stdout)
|
|
"zcat", "bzcat", "xzcat",
|
|
# Checksums
|
|
"md5sum", "sha1sum", "sha256sum", "sha512sum", "b2sum", "cksum",
|
|
# Encoding
|
|
"base32", "base64", "basenc",
|
|
# Binary/Hex
|
|
"strings", "od", "hexdump", "xxd",
|
|
# JSON/XML/YAML
|
|
"jq", "xmllint", "yq",
|
|
# Encoding conversion (stdout)
|
|
"iconv",
|
|
# Calculation
|
|
"bc", "dc", "expr", "factor", "numfmt",
|
|
# Paths
|
|
"basename", "dirname", "realpath",
|
|
# Misc
|
|
"echo", "printf",
|
|
# Media (info reading)
|
|
"ffprobe", "identify", "exiftool",
|
|
# Database
|
|
"sqlite3",
|
|
# REMOVED for security:
|
|
# - xargs : can execute arbitrary commands
|
|
# - timeout : can execute arbitrary commands (we have internal timeout)
|
|
# - env : can execute commands and expose secrets
|
|
}
|
|
|
|
# Read/write commands (Storage, Documents)
|
|
WHITELIST_READWRITE = WHITELIST_READONLY | {
|
|
# Additional reading
|
|
"df", "locate", "which", "whereis",
|
|
# Split
|
|
"split", "csplit",
|
|
# Additional comparison
|
|
"sdiff", "patch", "colordiff",
|
|
# Archives (extraction/creation)
|
|
"zip", "7za",
|
|
# Compression
|
|
"gzip", "gunzip", "bzip2", "bunzip2", "xz", "unxz", "lz4", "zstd",
|
|
# Additional checksums
|
|
"sum",
|
|
# Additional encoding
|
|
"uuencode", "uudecode",
|
|
# File modification (ln removed - security risk with hard/soft links)
|
|
"touch", "mkdir", "rm", "rmdir", "mv", "cp", "truncate", "mktemp",
|
|
"install", "shred", "rename",
|
|
# Permissions
|
|
"chmod",
|
|
# Document conversion
|
|
"pandoc",
|
|
# Encoding conversion
|
|
"dos2unix", "unix2dos", "recode",
|
|
# Additional calculation
|
|
"seq",
|
|
# Date/Time
|
|
"date", "cal",
|
|
# Additional paths
|
|
"readlink", "pathchk", "pwd",
|
|
# System (info only - env removed, can execute commands)
|
|
"uname", "nproc", "printenv",
|
|
# Control (timeout removed - can execute commands, we have internal timeout)
|
|
"sleep",
|
|
# Misc (xargs removed - can execute arbitrary commands)
|
|
"yes", "tee", "envsubst", "gettext", "tsort", "true", "false",
|
|
# Media
|
|
"ffmpeg", "magick", "convert",
|
|
# Versioning
|
|
"git",
|
|
}
|
|
|
|
# Allowed Git subcommands
|
|
GIT_WHITELIST_READ = {
|
|
"status", "log", "show", "diff", "branch", "tag", "blame", "ls-files",
|
|
"ls-tree", "shortlog", "reflog", "describe", "rev-parse", "rev-list", "cat-file",
|
|
}
|
|
|
|
GIT_WHITELIST_WRITE = {
|
|
"add", "commit", "reset", "restore", "checkout", "rm", "mv", "revert",
|
|
"cherry-pick", "stash", "clean",
|
|
}
|
|
|
|
GIT_BLACKLIST = {
|
|
"gc", "prune", "filter-branch", # Always forbidden (dangerous local ops)
|
|
}
|
|
|
|
# Git network commands (controlled by curl valves)
|
|
GIT_NETWORK_GET = {
|
|
"clone", "fetch", "pull", "submodule", "remote", # Download operations
|
|
}
|
|
GIT_NETWORK_PUSH = {
|
|
"push", # Upload/exfiltration
|
|
}
|
|
|
|
# Curl/wget commands (controlled by valves)
|
|
CURL_COMMANDS = {"curl", "wget"}
|
|
|
|
# Curl/wget options forbidden in GET mode (allow data exfiltration)
|
|
CURL_FORBIDDEN_GET_OPTS = {
|
|
# HTTP methods
|
|
"-X", "--request",
|
|
# Data upload
|
|
"-d", "--data", "--data-raw", "--data-binary", "--data-urlencode", "--data-ascii",
|
|
"-F", "--form", "--form-string",
|
|
"-T", "--upload-file",
|
|
# POST behavior
|
|
"--post301", "--post302", "--post303",
|
|
# wget upload
|
|
"--post-data", "--post-file", "--body-data", "--body-file", "--method",
|
|
}
|
|
|
|
# ffmpeg output protocols that allow data exfiltration
|
|
# These protocols can SEND data to remote servers
|
|
FFMPEG_OUTPUT_PROTOCOLS = {
|
|
"rtmp://", "rtmps://", "rtmpe://", "rtmpt://", # Streaming upload
|
|
"rtsp://", "rtsps://", # Real-time streaming
|
|
"srt://", # Secure Reliable Transport
|
|
"udp://", "tcp://", # Raw sockets
|
|
"rtp://", "srtp://", # Real-time protocol
|
|
"ftp://", "sftp://", # File transfer
|
|
"http://", "https://", # Can POST/PUT with -method
|
|
"icecast://", # Streaming server
|
|
"tee:", # Can duplicate to multiple outputs including network
|
|
}
|
|
|
|
# Commands that can SEND data over network (exfiltration risk)
|
|
# These are blocked unless network_mode is "all"
|
|
NETWORK_OUTPUT_COMMANDS = {"ffmpeg"}
|
|
|
|
# Commands that can RECEIVE data from network (read-only risk)
|
|
# These require network_mode "safe" or "all", with URL blocking if "disabled"
|
|
NETWORK_INPUT_COMMANDS = {"ffprobe", "pandoc", "magick", "convert", "identify"}
|
|
|
|
# All network-capable commands (union of above)
|
|
NETWORK_CAPABLE_COMMANDS = NETWORK_OUTPUT_COMMANDS | NETWORK_INPUT_COMMANDS
|
|
|
|
# Forbidden commands
|
|
BLACKLIST_COMMANDS = {
|
|
# Interpreters/Shells
|
|
"bash", "sh", "zsh", "fish", "dash", "csh", "tcsh", "ksh",
|
|
"python", "python3", "perl", "ruby", "node", "php", "lua",
|
|
"exec", "eval", "source",
|
|
# Background / Fork
|
|
"nohup", "disown", "setsid", "screen", "tmux", "at", "batch", "crontab",
|
|
# System privileges
|
|
"sudo", "su", "doas", "chown", "chgrp",
|
|
# Network (curl/wget controlled separately via valves)
|
|
"fetch", "ssh", "scp", "sftp", "rsync",
|
|
"nc", "netcat", "ncat", "telnet", "ftp", "ping", "traceroute",
|
|
# System / Dangerous
|
|
"dd", "mount", "umount", "kill", "killall", "pkill",
|
|
"reboot", "shutdown", "halt", "poweroff",
|
|
"systemctl", "service", "mkfs", "fdisk", "parted",
|
|
"iptables", "firewall-cmd",
|
|
}
|
|
|
|
# Pattern to detect dangerous arguments (shell metacharacters)
|
|
# Blocks: ; & | ` $ \n \r && || >> << > < $( ${
|
|
DANGEROUS_ARGS_PATTERN = re.compile(r'[;&|`$\n\r]|&&|\|\||>>|<<|>|<|\$\(|\$\{')
|
|
|
|
# Same pattern but allows | (for commands that use | in their internal syntax)
|
|
# Used for: jq (pipe operator), awk (print | "cmd" - but we block system() separately)
|
|
DANGEROUS_ARGS_PATTERN_ALLOW_PIPE = re.compile(r'[;&`$\n\r]|&&|>>|<<|>|<|\$\(|\$\{')
|
|
|
|
# Commands that use | in their internal syntax (not shell pipes)
|
|
COMMANDS_ALLOWING_PIPE = {"jq", "awk", "gawk", "mawk", "nawk"}
|
|
|
|
# Pattern to detect URLs (network access via ffmpeg, pandoc, imagemagick, etc.)
|
|
# Blocks: http://, https://, ftp://, rtmp://, rtsp://, smb://, file://, etc.
|
|
URL_PATTERN = re.compile(r'^[a-zA-Z][a-zA-Z0-9+.-]*://', re.IGNORECASE)
|
|
|
|
# find options that can execute commands (security risk)
|
|
FIND_EXEC_OPTIONS = {"-exec", "-execdir", "-ok", "-okdir"}
|
|
|
|
# awk patterns that can execute commands (security risk)
|
|
# system() executes shell commands, getline can pipe from commands
|
|
AWK_DANGEROUS_PATTERNS = re.compile(r'\bsystem\s*\(|\|\s*getline|\bgetline\s*<')
|
|
|
|
# ffmpeg options that can be used for data exfiltration or other dangerous operations
|
|
# in "safe" network mode. These are blocked unless network_mode="all"
|
|
FFMPEG_DANGEROUS_OPTIONS = {
|
|
# Metadata can be used to embed arbitrary data for exfiltration
|
|
"-metadata", "-metadata:s", "-metadata:g",
|
|
# filter_complex can contain network destinations
|
|
"-filter_complex",
|
|
# Can write to multiple outputs including network
|
|
"-f", "tee",
|
|
# HTTP method override (can enable POST/PUT)
|
|
"-method",
|
|
# Can be used to send data via HTTP headers
|
|
"-headers",
|
|
# Content type manipulation
|
|
"-content_type",
|
|
}
|
|
|
|
# =============================================================================
|
|
# ERRORS
|
|
# =============================================================================
|
|
|
|
class StorageError(Exception):
|
|
"""Base storage error with contextual help."""
|
|
def __init__(self, code: str, message: str, details: dict = None, hint: str = None, func: str = None):
|
|
self.code = code
|
|
self.message = message
|
|
self.details = details or {}
|
|
self.hint = hint
|
|
self.func = func # Function name for contextual help
|
|
super().__init__(message)
|
|
|
|
def to_dict(self, function_help: str = None) -> dict:
|
|
result = {
|
|
"success": False,
|
|
"error": self.code,
|
|
"message": self.message,
|
|
"details": self.details,
|
|
"hint": self.hint,
|
|
}
|
|
if function_help:
|
|
result["help"] = function_help
|
|
return result
|
|
|
|
|
|
# =============================================================================
|
|
# MAIN CLASS
|
|
# =============================================================================
|
|
|
|
|
|
# =============================================================================
|
|
# OPEN WEBUI BRIDGE (isolates internal API calls)
|
|
# =============================================================================
|
|
|
|
class _OpenWebUIBridge:
|
|
"""
|
|
Bridge to Open WebUI internal Python API.
|
|
|
|
This class isolates all direct interactions with Open WebUI's internal modules.
|
|
If Open WebUI's internal API changes between versions, only this class needs updating.
|
|
|
|
Supported Open WebUI versions: 0.6.x (tested with 0.6.40+)
|
|
"""
|
|
|
|
_instance = None
|
|
_initialized = False
|
|
_files_module = None
|
|
_files_class = None
|
|
_file_form_class = None
|
|
|
|
def __new__(cls):
|
|
"""Singleton pattern to avoid repeated imports."""
|
|
if cls._instance is None:
|
|
cls._instance = super().__new__(cls)
|
|
return cls._instance
|
|
|
|
def _ensure_initialized(self):
|
|
"""Lazy initialization of Open WebUI imports."""
|
|
if self._initialized:
|
|
return True
|
|
|
|
try:
|
|
# Open WebUI 0.6.x API
|
|
from open_webui.models.files import Files, FileForm
|
|
self._files_class = Files
|
|
self._file_form_class = FileForm
|
|
self._initialized = True
|
|
return True
|
|
except ImportError as e:
|
|
# Try alternative import paths for different versions
|
|
try:
|
|
# Hypothetical future API path
|
|
from open_webui.core.models.files import Files, FileForm
|
|
self._files_class = Files
|
|
self._file_form_class = FileForm
|
|
self._initialized = True
|
|
return True
|
|
except ImportError:
|
|
pass
|
|
raise StorageError(
|
|
"OPENWEBUI_API_UNAVAILABLE",
|
|
f"Cannot import Open WebUI internal API: {e}",
|
|
{"import_error": str(e)},
|
|
"Open WebUI internal modules not available. This feature requires running inside Open WebUI."
|
|
)
|
|
|
|
def insert_file(
|
|
self,
|
|
user_id: str,
|
|
file_id: str,
|
|
filename: str,
|
|
file_path: str,
|
|
content_type: str,
|
|
file_size: int,
|
|
metadata: dict = None
|
|
):
|
|
"""
|
|
Insert a new file into Open WebUI's file system.
|
|
|
|
Args:
|
|
user_id: Open WebUI user ID
|
|
file_id: Unique file ID (UUID)
|
|
filename: Display name of the file
|
|
file_path: Absolute path to the file on disk
|
|
content_type: MIME type
|
|
file_size: Size in bytes
|
|
metadata: Optional additional metadata
|
|
|
|
Returns:
|
|
File model object or None on failure
|
|
"""
|
|
self._ensure_initialized()
|
|
|
|
try:
|
|
file_item = self._files_class.insert_new_file(
|
|
user_id,
|
|
self._file_form_class(
|
|
**{
|
|
"id": file_id,
|
|
"filename": filename,
|
|
"path": file_path,
|
|
"data": {}, # No RAG processing needed for download-only files
|
|
"meta": {
|
|
"name": filename,
|
|
"content_type": content_type,
|
|
"size": file_size,
|
|
"data": metadata or {},
|
|
},
|
|
}
|
|
),
|
|
)
|
|
return file_item
|
|
except Exception as e:
|
|
raise StorageError(
|
|
"OPENWEBUI_INSERT_ERROR",
|
|
f"Failed to insert file into Open WebUI: {e}",
|
|
{"file_id": file_id, "error": str(e)}
|
|
)
|
|
|
|
def get_file_by_id(self, file_id: str):
|
|
"""Get file metadata by ID."""
|
|
self._ensure_initialized()
|
|
try:
|
|
return self._files_class.get_file_by_id(file_id)
|
|
except Exception as e:
|
|
raise StorageError(
|
|
"OPENWEBUI_GET_ERROR",
|
|
f"Failed to get file from Open WebUI: {e}",
|
|
{"file_id": file_id, "error": str(e)}
|
|
)
|
|
|
|
def delete_file_by_id(self, file_id: str):
|
|
"""Delete a file by ID."""
|
|
self._ensure_initialized()
|
|
try:
|
|
return self._files_class.delete_file_by_id(file_id)
|
|
except Exception as e:
|
|
raise StorageError(
|
|
"OPENWEBUI_DELETE_ERROR",
|
|
f"Failed to delete file from Open WebUI: {e}",
|
|
{"file_id": file_id, "error": str(e)}
|
|
)
|
|
|
|
@classmethod
|
|
def is_available(cls) -> bool:
|
|
"""Check if Open WebUI internal API is available."""
|
|
try:
|
|
instance = cls()
|
|
instance._ensure_initialized()
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
@classmethod
|
|
def get_api_version(cls) -> str:
|
|
"""Return the detected Open WebUI API version."""
|
|
try:
|
|
from open_webui import __version__
|
|
return __version__
|
|
except ImportError:
|
|
return "unknown"
|
|
|
|
|
|
# =============================================================================
|
|
# INTERNAL CORE (not exposed to LLM)
|
|
# =============================================================================
|
|
|
|
class _FileshedCore:
|
|
"""
|
|
Internal implementation class containing all private methods.
|
|
This class is NOT exposed to the LLM - only Tools.shed_* methods are visible.
|
|
"""
|
|
|
|
def __init__(self, tools):
|
|
self._tools = tools # Reference to parent Tools instance
|
|
self._commands_cache = None
|
|
self._db_initialized = False
|
|
|
|
@property
|
|
def valves(self):
|
|
"""Access valves from parent Tools (ensures sync with Open WebUI updates)."""
|
|
return self._tools.valves
|
|
|
|
|
|
# =========================================================================
|
|
# FUNCTION HELP INDEX (for contextual error messages)
|
|
# =========================================================================
|
|
|
|
FUNCTION_HELP = {
|
|
# === DIRECT WRITE FUNCTIONS ===
|
|
"shed_patch_text": {
|
|
"usage": "shed_patch_text(zone, path, content, position='end', overwrite=False, ...)",
|
|
"desc": "THE standard function to write/create text files. Use this for all file writing!",
|
|
"workflows": ["Direct Write"],
|
|
"howtos": ["edit"],
|
|
"not_for": ["Locked Edit workflow (shed_lockedit_*)"],
|
|
"tips": [
|
|
"Create new file: shed_patch_text(zone, path, content, overwrite=True)",
|
|
"Append to file: shed_patch_text(zone, path, content) # position='end' by default",
|
|
"To READ files: use shed_exec(cmd='cat', args=['file']) or head/tail/sed",
|
|
"⚠️ CSV: quote fields with comma/newline/quotes. Escape quotes by doubling: \"\"",
|
|
"position: 'start', 'end', 'before', 'after', 'replace' (NOT 'at' - that's for bytes!)",
|
|
"For 'before'/'after'/'replace': use line=N (first line is 1) or pattern='...'",
|
|
],
|
|
},
|
|
"shed_patch_bytes": {
|
|
"usage": "shed_patch_bytes(zone, path, content, position='end', offset=None)",
|
|
"desc": "Write binary data (hex string) to a file",
|
|
"workflows": ["Direct Write"],
|
|
"howtos": ["edit", "large_files"],
|
|
"not_for": ["Locked Edit workflow (shed_lockedit_*)"],
|
|
"tips": ["content must be a hex string like '48454C4C4F'"],
|
|
},
|
|
|
|
# === SAFE EDIT WORKFLOW ===
|
|
"shed_lockedit_open": {
|
|
"usage": "shed_lockedit_open(zone, path)",
|
|
"desc": "Step 1/3: Lock file and create working copy",
|
|
"workflows": ["Locked Edit"],
|
|
"howtos": ["edit"],
|
|
"not_for": ["Direct Write workflow (shed_patch_*)"],
|
|
"tips": [
|
|
"WORKFLOW: shed_lockedit_open → shed_lockedit_overwrite → shed_lockedit_save",
|
|
"After this, use shed_lockedit_overwrite (NOT shed_patch_text!)",
|
|
],
|
|
},
|
|
"shed_lockedit_overwrite": {
|
|
"usage": "shed_lockedit_overwrite(zone, path, content, append=False)",
|
|
"desc": "Step 2/3: Write to locked file (working copy)",
|
|
"workflows": ["Locked Edit"],
|
|
"howtos": ["edit"],
|
|
"not_for": ["Direct Write workflow (shed_patch_*)"],
|
|
"tips": [
|
|
"REQUIRES shed_lockedit_open() first!",
|
|
"NO position/pattern/line params - those are for shed_patch_text!",
|
|
"Use append=True to append instead of overwrite",
|
|
],
|
|
},
|
|
"shed_lockedit_save": {
|
|
"usage": "shed_lockedit_save(zone, path, message=None)",
|
|
"desc": "Step 3/3: Save changes and unlock file",
|
|
"workflows": ["Locked Edit"],
|
|
"howtos": ["edit"],
|
|
"not_for": ["Direct Write workflow (shed_patch_*)"],
|
|
"tips": [
|
|
"⚠️ THIS CLOSES EDIT MODE! File is unlocked after save.",
|
|
"To edit again, call shed_lockedit_open() first!",
|
|
],
|
|
},
|
|
"shed_lockedit_cancel": {
|
|
"usage": "shed_lockedit_cancel(zone, path)",
|
|
"desc": "Discard changes and unlock file",
|
|
"workflows": ["Locked Edit"],
|
|
"howtos": ["edit"],
|
|
"not_for": ["Direct Write workflow (shed_patch_*)"],
|
|
"tips": ["Use instead of shed_lockedit_save to discard changes"],
|
|
},
|
|
"shed_lockedit_exec": {
|
|
"usage": "shed_lockedit_exec(zone, path, cmd, args=[])",
|
|
"desc": "Run command on locked file (in working copy)",
|
|
"workflows": ["Locked Edit", "Shell Commands"],
|
|
"howtos": ["edit", "commands"],
|
|
"not_for": ["Direct Write workflow (shed_patch_*)"],
|
|
"tips": ["REQUIRES shed_lockedit_open() first!"],
|
|
},
|
|
|
|
# === SHELL COMMANDS ===
|
|
"shed_exec": {
|
|
"usage": "shed_exec(zone, cmd, args=[], timeout=None, stdout_file=None, stderr_file=None, group=None)",
|
|
"desc": "Execute shell command in zone. Use for reading files and shell operations!",
|
|
"workflows": ["Shell Commands", "Download"],
|
|
"howtos": ["commands", "large_files", "download"],
|
|
"not_for": ["Locked Edit workflow (use shed_lockedit_exec instead if file is locked)"],
|
|
"tips": [
|
|
"READ files: cmd='cat', args=['file.txt'] (or head/tail/sed for partial reads)",
|
|
"DOWNLOAD files or call APIs: use curl (saves full content to disk for processing)",
|
|
"Download: cmd='curl', args=['-L', '-o', 'data.csv', 'URL']",
|
|
"REDIRECT output to file: stdout_file='output.txt' (like shell > redirection)",
|
|
"Check available commands: shed_allowed_commands()",
|
|
],
|
|
},
|
|
|
|
# === FILE OPERATIONS ===
|
|
"shed_delete": {
|
|
"usage": "shed_delete(zone, path, group=None, message=None)",
|
|
"desc": "Delete a file or empty directory",
|
|
"workflows": ["File Operations"],
|
|
"howtos": ["commands"],
|
|
"not_for": ["Locked Edit workflow", "Direct Write workflow"],
|
|
"tips": [
|
|
"Cannot delete non-empty directories",
|
|
"message: Git commit message (for documents/group zones)",
|
|
],
|
|
},
|
|
"shed_rename": {
|
|
"usage": "shed_rename(zone, old_path, new_path)",
|
|
"desc": "Rename or move a file within same zone",
|
|
"workflows": ["File Operations"],
|
|
"howtos": ["commands"],
|
|
"not_for": ["Locked Edit workflow", "Direct Write workflow"],
|
|
"tips": ["For cross-zone moves, use shed_copy_* or shed_move_* bridges"],
|
|
},
|
|
"shed_tree": {
|
|
"usage": "shed_tree(zone, path='.', depth=3)",
|
|
"desc": "Show directory tree (builtin, always works)",
|
|
"workflows": ["File Operations", "Shell Commands"],
|
|
"howtos": ["commands"],
|
|
"not_for": [],
|
|
"tips": ["Use when 'tree' command is not available"],
|
|
},
|
|
|
|
# === SQLITE ===
|
|
"shed_sqlite": {
|
|
"usage": "shed_sqlite(zone, path, query=None, import_csv=None, table=None, ...)",
|
|
"desc": "Execute SQL or import CSV into SQLite",
|
|
"workflows": ["CSV Import", "Data Processing"],
|
|
"howtos": ["csv_to_sqlite"],
|
|
"not_for": ["Locked Edit workflow", "Direct Write workflow"],
|
|
"tips": [
|
|
"For CSV import: shed_sqlite(..., import_csv='data.csv', table='mytable')",
|
|
"Auto-detects delimiter and encoding!",
|
|
"⚠️ Creating CSV: quote fields with comma/newline/quotes. Escape \" as \"\" (RFC 4180)",
|
|
],
|
|
},
|
|
|
|
# === UPLOADS/BRIDGES ===
|
|
"shed_import": {
|
|
"usage": "shed_import(filename=None, import_all=False, dest_subdir='')",
|
|
"desc": "Import uploaded files to Uploads zone",
|
|
"workflows": ["Upload Handling"],
|
|
"howtos": ["upload"],
|
|
"not_for": ["Locked Edit workflow", "Direct Write workflow"],
|
|
"tips": [
|
|
"filename: specific file to import, or None to see available files",
|
|
"import_all=True: import all files at once",
|
|
"After import, use shed_move_uploads_to_storage() to move files",
|
|
],
|
|
},
|
|
"shed_move_uploads_to_storage": {
|
|
"usage": "shed_move_uploads_to_storage(src, dest)",
|
|
"desc": "Move file from Uploads to Storage",
|
|
"workflows": ["Upload Handling", "File Operations"],
|
|
"howtos": ["upload"],
|
|
"not_for": ["Locked Edit workflow"],
|
|
"tips": ["Uploads zone is read-only, move files to Storage for editing"],
|
|
},
|
|
"shed_move_uploads_to_documents": {
|
|
"usage": "shed_move_uploads_to_documents(src, dest, message=None)",
|
|
"desc": "Move file from Uploads to Documents (versioned)",
|
|
"workflows": ["Upload Handling", "File Operations"],
|
|
"howtos": ["upload"],
|
|
"not_for": ["Locked Edit workflow"],
|
|
"tips": ["Documents zone has Git versioning"],
|
|
},
|
|
"shed_copy_storage_to_documents": {
|
|
"usage": "shed_copy_storage_to_documents(src, dest, message=None)",
|
|
"desc": "Copy file from Storage to Documents (versioned)",
|
|
"workflows": ["File Operations"],
|
|
"howtos": ["upload"],
|
|
"not_for": ["Locked Edit workflow"],
|
|
"tips": [],
|
|
},
|
|
"shed_move_documents_to_storage": {
|
|
"usage": "shed_move_documents_to_storage(src, dest, message=None)",
|
|
"desc": "Move file from Documents to Storage (removes versioning)",
|
|
"workflows": ["File Operations"],
|
|
"howtos": ["upload"],
|
|
"not_for": ["Locked Edit workflow"],
|
|
"tips": ["message: Git commit message for the removal from Documents"],
|
|
},
|
|
|
|
# === LINKS ===
|
|
"shed_link_create": {
|
|
"usage": "shed_link_create(zone, path, group=None)",
|
|
"desc": "Create download link for a file",
|
|
"workflows": ["Share Files"],
|
|
"howtos": ["share"],
|
|
"not_for": ["Locked Edit workflow", "Direct Write workflow"],
|
|
"tips": [
|
|
"Returns clickable_link in Markdown format - show it directly to user!",
|
|
"Links require authentication - not public!",
|
|
"Use group= for files in group zones",
|
|
],
|
|
},
|
|
"shed_link_list": {
|
|
"usage": "shed_link_list()",
|
|
"desc": "List all your download links",
|
|
"workflows": ["Share Files"],
|
|
"howtos": ["share"],
|
|
"not_for": [],
|
|
"tips": [],
|
|
},
|
|
"shed_link_delete": {
|
|
"usage": "shed_link_delete(file_id)",
|
|
"desc": "Delete a download link",
|
|
"workflows": ["Share Files"],
|
|
"howtos": ["share"],
|
|
"not_for": [],
|
|
"tips": ["Get file_id from shed_link_list()"],
|
|
},
|
|
|
|
# === GROUPS ===
|
|
"shed_group_list": {
|
|
"usage": "shed_group_list()",
|
|
"desc": "List groups you belong to",
|
|
"workflows": ["Collaboration"],
|
|
"howtos": [],
|
|
"not_for": [],
|
|
"tips": ["Group names are case-sensitive!"],
|
|
},
|
|
"shed_group_info": {
|
|
"usage": "shed_group_info(group)",
|
|
"desc": "Get group details and members",
|
|
"workflows": ["Collaboration"],
|
|
"howtos": [],
|
|
"not_for": [],
|
|
"tips": [],
|
|
},
|
|
"shed_copy_to_group": {
|
|
"usage": "shed_copy_to_group(src_zone, src_path, group, dest_path, message=None)",
|
|
"desc": "Copy file to a group",
|
|
"workflows": ["Collaboration", "File Operations"],
|
|
"howtos": [],
|
|
"not_for": ["Locked Edit workflow", "Direct Write workflow"],
|
|
"tips": [],
|
|
},
|
|
|
|
# === ZIP ===
|
|
"shed_zip": {
|
|
"usage": "shed_zip(zone, src, dest='', include_empty_dirs=False)",
|
|
"desc": "Create ZIP archive from file or folder",
|
|
"workflows": ["File Operations"],
|
|
"howtos": ["commands"],
|
|
"not_for": ["Locked Edit workflow", "Direct Write workflow"],
|
|
"tips": [
|
|
"src: file or folder to compress",
|
|
"dest: output ZIP path (default: src + '.zip')",
|
|
"include_empty_dirs=True: preserve empty directories in archive",
|
|
],
|
|
},
|
|
"shed_unzip": {
|
|
"usage": "shed_unzip(zone, src, dest='')",
|
|
"desc": "Extract ZIP archive",
|
|
"workflows": ["File Operations", "Download"],
|
|
"howtos": ["commands", "download"],
|
|
"not_for": ["Locked Edit workflow"],
|
|
"tips": [
|
|
"src: path to ZIP file",
|
|
"dest: extraction folder (default: same folder as ZIP)",
|
|
"After curl download, use shed_unzip to extract",
|
|
],
|
|
},
|
|
"shed_zipinfo": {
|
|
"usage": "shed_zipinfo(zone, path)",
|
|
"desc": "List ZIP contents without extracting",
|
|
"workflows": ["File Operations"],
|
|
"howtos": ["commands"],
|
|
"not_for": [],
|
|
"tips": [],
|
|
},
|
|
|
|
# === INFO ===
|
|
"shed_stats": {
|
|
"usage": "shed_stats()",
|
|
"desc": "Show storage usage statistics",
|
|
"workflows": ["Info"],
|
|
"howtos": [],
|
|
"not_for": [],
|
|
"tips": [],
|
|
},
|
|
"shed_parameters": {
|
|
"usage": "shed_parameters()",
|
|
"desc": "Show configuration limits (quota, file size, etc.)",
|
|
"workflows": ["Info"],
|
|
"howtos": ["network"],
|
|
"not_for": [],
|
|
"tips": [],
|
|
},
|
|
"shed_allowed_commands": {
|
|
"usage": "shed_allowed_commands()",
|
|
"desc": "List available shell commands",
|
|
"workflows": ["Info", "Shell Commands"],
|
|
"howtos": ["commands"],
|
|
"not_for": [],
|
|
"tips": [],
|
|
},
|
|
"shed_file_type": {
|
|
"usage": "shed_file_type(zone, path)",
|
|
"desc": "Detect file MIME type and extension",
|
|
"workflows": ["Download", "File Operations"],
|
|
"howtos": ["download"],
|
|
"not_for": [],
|
|
"tips": ["Useful after downloading files with unknown type"],
|
|
},
|
|
"shed_convert_eol": {
|
|
"usage": "shed_convert_eol(zone, path, to='unix')",
|
|
"desc": "Convert line endings (CRLF ↔ LF)",
|
|
"workflows": ["File Operations"],
|
|
"howtos": ["commands"],
|
|
"not_for": [],
|
|
"tips": [
|
|
"to='unix': Convert to LF (\\n)",
|
|
"to='windows': Convert to CRLF (\\r\\n)",
|
|
],
|
|
},
|
|
"shed_hexdump": {
|
|
"usage": "shed_hexdump(zone, path, offset=0, length=256)",
|
|
"desc": "Show hex dump of file (for binary inspection)",
|
|
"workflows": ["File Operations"],
|
|
"howtos": ["large_files"],
|
|
"not_for": [],
|
|
"tips": ["Useful for inspecting binary files without loading them"],
|
|
},
|
|
"shed_force_unlock": {
|
|
"usage": "shed_force_unlock(zone, path, group=None)",
|
|
"desc": "Force unlock a stuck file (crash recovery)",
|
|
"workflows": ["Locked Edit"],
|
|
"howtos": ["edit"],
|
|
"not_for": [],
|
|
"tips": [
|
|
"Use if a file is stuck in edit mode after a crash",
|
|
"For group files, use group= instead of zone=",
|
|
],
|
|
},
|
|
"shed_group_set_mode": {
|
|
"usage": "shed_group_set_mode(group, path, mode)",
|
|
"desc": "Change file permission mode in group",
|
|
"workflows": ["Collaboration"],
|
|
"howtos": [],
|
|
"not_for": [],
|
|
"tips": [
|
|
"mode: 'owner' (only owner), 'group' (all members), 'owner_ro' (read-only for others)",
|
|
],
|
|
},
|
|
"shed_group_chown": {
|
|
"usage": "shed_group_chown(group, path, new_owner)",
|
|
"desc": "Transfer file ownership in group",
|
|
"workflows": ["Collaboration"],
|
|
"howtos": [],
|
|
"not_for": [],
|
|
"tips": ["new_owner: user ID of the new owner"],
|
|
},
|
|
"shed_help": {
|
|
"usage": "shed_help(howto=None)",
|
|
"desc": "Get help (general or specific topic)",
|
|
"workflows": ["Info"],
|
|
"howtos": [],
|
|
"not_for": [],
|
|
"tips": [
|
|
"shed_help() for quick reference",
|
|
"shed_help(howto='edit') for editing guide",
|
|
"shed_help(howto='download') for download guide",
|
|
],
|
|
},
|
|
"shed_maintenance": {
|
|
"usage": "shed_maintenance()",
|
|
"desc": "Cleanup expired locks and orphan editzones",
|
|
"workflows": ["Info"],
|
|
"howtos": [],
|
|
"not_for": [],
|
|
"tips": [
|
|
"Runs automatically, but can be called manually",
|
|
"Cleans locks older than lock_max_age_hours",
|
|
],
|
|
},
|
|
}
|
|
|
|
def _get_function_help(self, func_name: str) -> str:
|
|
"""Generate contextual help for a function."""
|
|
if func_name not in self.FUNCTION_HELP:
|
|
return ""
|
|
|
|
info = self.FUNCTION_HELP[func_name]
|
|
lines = [
|
|
f"\n📖 HELP for {func_name}:",
|
|
f" Usage: {info['usage']}",
|
|
f" → {info['desc']}",
|
|
]
|
|
|
|
# Workflows this function belongs to (most important for LLM!)
|
|
if info.get("workflows"):
|
|
wf_list = ", ".join(info["workflows"])
|
|
lines.append(f" 🔧 Belongs to workflow(s): {wf_list}")
|
|
|
|
# What this function is NOT for (avoid confusion)
|
|
if info.get("not_for"):
|
|
not_list = ", ".join(info["not_for"])
|
|
lines.append(f" ⛔ NOT for: {not_list}")
|
|
|
|
# Tips
|
|
if info.get("tips"):
|
|
lines.append(" 💡 Tips:")
|
|
for tip in info["tips"]:
|
|
lines.append(f" • {tip}")
|
|
|
|
# Related howto guides
|
|
if info.get("howtos"):
|
|
howto_list = ", ".join(f'shed_help(howto="{h}")' for h in info["howtos"])
|
|
lines.append(f" 📚 More info: {howto_list}")
|
|
|
|
return "\n".join(lines)
|
|
|
|
# HOWTO Guides
|
|
HOWTO_GUIDES = {
|
|
"download": """
|
|
# HOWTO: Download and work with files from the internet
|
|
|
|
## When to use curl
|
|
|
|
**Use curl to download files you need to WORK with** (save, process, import, convert):
|
|
- Datasets (CSV, JSON, XML)
|
|
- API responses
|
|
- Repositories
|
|
- Any file to process locally
|
|
|
|
curl saves the full content to disk for further processing.
|
|
|
|
## Quick Recipe
|
|
```
|
|
shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "data.csv", "URL"])
|
|
```
|
|
|
|
## Common data sources
|
|
|
|
### World/country data
|
|
```
|
|
# REST Countries API (JSON with population, area, etc.)
|
|
shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "countries.json",
|
|
"https://restcountries.com/v3.1/all?fields=name,area,population"])
|
|
```
|
|
|
|
### GitHub raw files
|
|
```
|
|
shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "data.csv",
|
|
"https://raw.githubusercontent.com/user/repo/main/data.csv"])
|
|
```
|
|
|
|
## Download + import to SQLite
|
|
```
|
|
shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "data.csv", "URL"])
|
|
shed_sqlite(zone="storage", path="db.sqlite", import_csv="data.csv", table="mytable")
|
|
```
|
|
|
|
## Important
|
|
- `-o filename` saves to file (mandatory!)
|
|
- `-L` follows redirects
|
|
- Requires network_mode = "safe" or "all"
|
|
""",
|
|
|
|
"csv_to_sqlite": """
|
|
# HOWTO: Insert CSV data into SQLite
|
|
|
|
## ⚠️ CRITICAL: When CREATING CSV files (RFC 4180)
|
|
|
|
If you generate a CSV file, follow these quoting rules:
|
|
|
|
**Quote a field if it contains:**
|
|
- The delimiter (`,` or `;`)
|
|
- A newline
|
|
- Double quotes
|
|
|
|
**Inside quoted fields, escape quotes by doubling them:**
|
|
|
|
```
|
|
✅ CORRECT:
|
|
name,location,comment
|
|
"Acme Corp","New York, NY","Contains ""quotes"" here"
|
|
"Café du Monde","Paris, France","Great coffee"
|
|
Simple Value,Boston,No quotes needed
|
|
|
|
❌ WRONG (extra columns / broken parsing):
|
|
name,location,comment
|
|
Acme Corp,New York, NY,No quotes
|
|
Café,Said "hello",Unescaped quote
|
|
```
|
|
|
|
**Rules summary:**
|
|
- Field with comma → `"New York, NY"`
|
|
- Field with quote → `"Contains ""quotes"" inside"`
|
|
- Field with newline → `"Line1\nLine2"` (whole field quoted)
|
|
- Simple field → `Boston` (no quotes needed)
|
|
|
|
## Quick Recipe (RECOMMENDED)
|
|
```
|
|
# Basic import (auto-detects delimiter and encoding)
|
|
shed_sqlite(zone="storage", path="database.db",
|
|
import_csv="data.csv", table="users")
|
|
```
|
|
|
|
## Why use shed_sqlite with import_csv?
|
|
1. **No context pollution**: CSV stays on disk, not in your context
|
|
2. **Fast**: Batch INSERT (1000 rows at a time), uses pandas if available
|
|
3. **Smart auto-detection**: Delimiter, encoding, separators detected automatically
|
|
4. **Auto-creates table**: Column names from CSV headers
|
|
|
|
## All CSV Parameters
|
|
|
|
| Parameter | Values | Description |
|
|
|-----------|--------|-------------|
|
|
| `import_csv` | path | CSV file path (in same zone) |
|
|
| `table` | name | Target table name (required) |
|
|
| `if_exists` | `"fail"` / `"replace"` / `"append"` | What to do if table exists (default: "fail") |
|
|
| `delimiter` | `","` `;` `"\\t"` `"|"` | CSV delimiter. `None` = auto-detect |
|
|
| `encoding` | `"utf-8"` `"latin-1"` `"cp1252"` | File encoding. `None` = auto-detect |
|
|
| `date_columns` | `["col1", "col2"]` | Columns to parse as dates |
|
|
| `date_format` | `"dayfirst"` / `"monthfirst"` / `"%d/%m/%Y"` | Date format |
|
|
| `decimal` | `","` | Decimal separator (European: `","`) |
|
|
| `skip_rows` | `0`, `1`, `2`... | Skip N rows before header |
|
|
| `has_header` | `True` / `False` | ⚠️ Does first row contain column names? Default: True. If False → columns named col_1, col_2... |
|
|
|
|
## Common Scenarios
|
|
|
|
### Standard CSV (comma-separated, UTF-8)
|
|
```
|
|
shed_sqlite(zone="storage", path="db.sqlite",
|
|
import_csv="data.csv", table="data")
|
|
```
|
|
|
|
### French/European CSV (semicolon, comma decimal, ISO-8859-1)
|
|
```
|
|
shed_sqlite(zone="storage", path="db.sqlite",
|
|
import_csv="french.csv", table="ventes",
|
|
delimiter=";", decimal=",", encoding="latin-1")
|
|
```
|
|
|
|
### Excel export (often semicolon + Windows encoding)
|
|
```
|
|
shed_sqlite(zone="storage", path="db.sqlite",
|
|
import_csv="export.csv", table="data",
|
|
delimiter=";", encoding="cp1252")
|
|
```
|
|
|
|
### TSV file (tab-separated)
|
|
```
|
|
shed_sqlite(zone="storage", path="db.sqlite",
|
|
import_csv="data.tsv", table="data",
|
|
delimiter="\\t")
|
|
```
|
|
|
|
### With European dates (DD/MM/YYYY)
|
|
```
|
|
shed_sqlite(zone="storage", path="db.sqlite",
|
|
import_csv="orders.csv", table="orders",
|
|
date_columns=["order_date", "ship_date"],
|
|
date_format="dayfirst")
|
|
```
|
|
|
|
### With US dates (MM/DD/YYYY)
|
|
```
|
|
shed_sqlite(zone="storage", path="db.sqlite",
|
|
import_csv="orders.csv", table="orders",
|
|
date_columns=["order_date"],
|
|
date_format="monthfirst")
|
|
```
|
|
|
|
### Skip title rows (file has title + blank line before headers)
|
|
```
|
|
shed_sqlite(zone="storage", path="db.sqlite",
|
|
import_csv="report.csv", table="report",
|
|
skip_rows=2)
|
|
```
|
|
|
|
### CSV without header row (data only)
|
|
⚠️ If your CSV has NO column names on first line, use `has_header=False`.
|
|
Otherwise the first data row will be interpreted as column names!
|
|
```
|
|
shed_sqlite(zone="storage", path="db.sqlite",
|
|
import_csv="raw_data.csv", table="raw",
|
|
has_header=False)
|
|
# → Creates columns: col_1, col_2, col_3, ...
|
|
```
|
|
|
|
### Replace existing table
|
|
```
|
|
shed_sqlite(zone="storage", path="db.sqlite",
|
|
import_csv="updated.csv", table="users",
|
|
if_exists="replace")
|
|
```
|
|
|
|
## Step-by-step Example
|
|
|
|
### 1. Check your CSV structure first
|
|
```
|
|
shed_exec(zone="storage", cmd="head", args=["-5", "data.csv"])
|
|
shed_exec(zone="storage", cmd="file", args=["data.csv"]) # Check encoding
|
|
```
|
|
|
|
### 2. Import (auto-detection usually works)
|
|
```
|
|
shed_sqlite(zone="storage", path="mydb.db",
|
|
import_csv="data.csv", table="users")
|
|
```
|
|
|
|
### 3. Check the response for detection info
|
|
```json
|
|
{
|
|
"import_info": {
|
|
"method": "pandas",
|
|
"detected_encoding": "latin-1",
|
|
"detected_delimiter": "';'"
|
|
}
|
|
}
|
|
```
|
|
|
|
### 4. Query the data
|
|
```
|
|
shed_sqlite(zone="storage", path="mydb.db", query="SELECT COUNT(*) FROM users")
|
|
shed_sqlite(zone="storage", path="mydb.db", query="SELECT * FROM users LIMIT 5")
|
|
```
|
|
|
|
## ⚠️ SELECT Query Limits (Context Protection)
|
|
|
|
**By default, SELECT without LIMIT returns only 10 rows** to protect your context.
|
|
|
|
### Getting more rows
|
|
```
|
|
# Request more rows explicitly
|
|
shed_sqlite(..., query="SELECT * FROM users", limit=100)
|
|
|
|
# Or use LIMIT in SQL (respected as-is)
|
|
shed_sqlite(..., query="SELECT * FROM users LIMIT 50")
|
|
```
|
|
|
|
### Export ALL results to CSV (no context pollution!)
|
|
```
|
|
# Writes all rows to file, returns only stats
|
|
shed_sqlite(zone="storage", path="db.sqlite",
|
|
query="SELECT * FROM users",
|
|
output_csv="users_export.csv")
|
|
# → {"rows_exported": 5432, "output_csv": "users_export.csv"}
|
|
|
|
# Then use the CSV file
|
|
shed_exec(zone="storage", cmd="head", args=["-20", "users_export.csv"])
|
|
```
|
|
|
|
### Disable limit (use with caution!)
|
|
```
|
|
shed_sqlite(..., query="SELECT * FROM users", limit=0)
|
|
# → Returns ALL rows - be careful with large tables!
|
|
```
|
|
|
|
## Troubleshooting
|
|
|
|
### "UnicodeDecodeError"
|
|
→ Try `encoding="latin-1"` or `encoding="cp1252"`
|
|
|
|
### Wrong columns / data in wrong fields
|
|
→ Check delimiter: `delimiter=";"` or `delimiter="\\t"`
|
|
|
|
### Numbers with commas (1.234,56)
|
|
→ Add `decimal=","`
|
|
|
|
### Dates not parsed correctly
|
|
→ Add `date_columns=["col"]` and `date_format="dayfirst"`
|
|
|
|
### Headers on line 3 (title rows above)
|
|
→ Add `skip_rows=2`
|
|
|
|
## NEVER do this (pollutes context, very slow):
|
|
```
|
|
# BAD: Reading file content then inserting row by row
|
|
content = shed_exec(zone="storage", cmd="cat", args=["huge.csv"]) # Pollutes context!
|
|
for row in content: # Slow!
|
|
shed_sqlite(query="INSERT INTO...") # One call per row!
|
|
```
|
|
""",
|
|
|
|
"upload": """
|
|
# HOWTO: Handle user-uploaded files
|
|
|
|
## Quick Recipe
|
|
```
|
|
# Step 1: Import the file (MANDATORY)
|
|
shed_import(import_all=True)
|
|
|
|
# Step 2: Move to Storage for processing
|
|
shed_move_uploads_to_storage(src="filename.csv", dest="filename.csv")
|
|
|
|
# Step 3: Now you can work with it
|
|
shed_exec(zone="storage", cmd="head", args=["-10", "filename.csv"])
|
|
```
|
|
|
|
## Why is shed_import() mandatory?
|
|
Files uploaded by users are NOT automatically accessible. They must be imported first.
|
|
|
|
## Workflow for multiple files
|
|
```
|
|
# Import all at once
|
|
shed_import(import_all=True)
|
|
|
|
# Move each file
|
|
shed_move_uploads_to_storage(src="data.csv", dest="data.csv")
|
|
shed_move_uploads_to_storage(src="config.json", dest="config.json")
|
|
```
|
|
|
|
## Workflow for specific file
|
|
```
|
|
shed_import(filename="report.pdf")
|
|
shed_move_uploads_to_storage(src="report.pdf", dest="reports/report.pdf")
|
|
```
|
|
|
|
## Check what was uploaded
|
|
```
|
|
shed_exec(zone="uploads", cmd="ls", args=["-la"])
|
|
```
|
|
|
|
## Move to Documents (version-controlled)
|
|
```
|
|
shed_import(import_all=True)
|
|
shed_move_uploads_to_documents(src="important.docx", dest="important.docx")
|
|
```
|
|
""",
|
|
|
|
"share": """
|
|
# HOWTO: Share files with the user
|
|
|
|
## shed_link - Create download links
|
|
|
|
Three functions to manage download links:
|
|
|
|
| Function | Description |
|
|
|----------|-------------|
|
|
| `shed_link_create()` | Create download link |
|
|
| `shed_link_list()` | List your download links |
|
|
| `shed_link_delete()` | Remove a download link |
|
|
|
|
## shed_link_create - Create a link
|
|
```
|
|
shed_link_create(zone="storage", path="report.pdf")
|
|
shed_link_create(zone="storage", path="archive.zip")
|
|
shed_link_create(zone="documents", path="presentation.pptx")
|
|
shed_link_create(zone="group", group="team", path="shared/data.csv")
|
|
```
|
|
- Works for ANY file type (PDF, ZIP, images, CSV, etc.)
|
|
- Returns `clickable_link` in Markdown format → **show it directly to the user!**
|
|
- Example response: `"clickable_link": "[📥 Download report.pdf](https://...)"`
|
|
|
|
## shed_link_list - List links
|
|
```
|
|
shed_link_list()
|
|
```
|
|
- Returns list of all download links you've created
|
|
- Each link has a `clickable_link` field ready to display
|
|
|
|
## shed_link_delete - Remove a download link
|
|
```
|
|
shed_link_delete(file_id="abc123-...")
|
|
```
|
|
- Removes file from Open WebUI
|
|
- Use file_id from shed_link_create() or shed_link_list()
|
|
""",
|
|
|
|
"edit": """
|
|
# HOWTO: Edit files
|
|
|
|
## 🔥 CHOOSE YOUR WORKFLOW
|
|
|
|
### WORKFLOW 1: Direct Write (simple, no locking)
|
|
For quick edits when you don't need locking:
|
|
|
|
```
|
|
# Overwrite entire file
|
|
shed_patch_text(zone="storage", path="file.txt", content="New content", overwrite=True)
|
|
|
|
# Append to file
|
|
shed_patch_text(zone="storage", path="file.txt", content="\\nNew line", position="end")
|
|
|
|
# Replace pattern
|
|
shed_patch_text(zone="storage", path="config.py", content="DEBUG=False", pattern="DEBUG=True", position="replace")
|
|
```
|
|
|
|
⚠️ `overwrite` is a PARAMETER (True/False), NOT a position value!
|
|
```
|
|
✅ CORRECT: shed_patch_text(..., overwrite=True)
|
|
❌ WRONG: shed_patch_text(..., position="overwrite")
|
|
```
|
|
|
|
### WORKFLOW 2: Locked Edit (with locking)
|
|
For concurrent access or when you need rollback:
|
|
|
|
```
|
|
# Step 1: Open (locks file)
|
|
shed_lockedit_open(zone="storage", path="config.json")
|
|
|
|
# Step 2: Modify (use shed_lockedit_overwrite, NOT shed_patch_text!)
|
|
shed_lockedit_overwrite(zone="storage", path="config.json", content="new content")
|
|
|
|
# Step 3: Save OR Cancel
|
|
shed_lockedit_save(zone="storage", path="config.json") # ⚠️ CLOSES edit mode!
|
|
# OR
|
|
shed_lockedit_cancel(zone="storage", path="config.json") # Discard changes
|
|
```
|
|
|
|
⚠️ **CRITICAL**: `shed_lockedit_save()` CLOSES edit mode!
|
|
To edit again, you MUST call `shed_lockedit_open()` first.
|
|
|
|
```
|
|
❌ WRONG (will fail):
|
|
shed_lockedit_save(...)
|
|
shed_lockedit_save(...) # ERROR: NOT_IN_EDIT_MODE
|
|
|
|
✅ CORRECT:
|
|
shed_lockedit_save(...)
|
|
shed_lockedit_open(...) # Reopen first!
|
|
shed_lockedit_overwrite(...)
|
|
shed_lockedit_save(...)
|
|
```
|
|
|
|
## Quick Reference
|
|
|
|
| Task | Command |
|
|
|------|---------|
|
|
| Overwrite file | `shed_patch_text(..., overwrite=True)` |
|
|
| Append to file | `shed_patch_text(..., position="end")` |
|
|
| Prepend to file | `shed_patch_text(..., position="start")` |
|
|
| Insert before line N | `shed_patch_text(..., position="before", line=N)` |
|
|
| Insert after line N | `shed_patch_text(..., position="after", line=N)` |
|
|
| Replace pattern | `shed_patch_text(..., pattern="...", position="replace")` |
|
|
| Replace line N | `shed_patch_text(..., position="replace", line=N)` |
|
|
| Safe edit (lock) | `shed_lockedit_open` → `shed_lockedit_overwrite` → `shed_lockedit_save` |
|
|
|
|
📌 **Line numbers start at 1** (first line = line=1, NOT line=0)
|
|
|
|
⚠️ **position="at" is for shed_patch_bytes (binary), NOT for text!**
|
|
|
|
## ⚠️ Creating CSV files (RFC 4180)
|
|
|
|
**Quote a field if it contains:** delimiter (`,`), newline, or double quotes.
|
|
**Escape quotes by doubling them:** `""` inside quoted fields.
|
|
|
|
```
|
|
✅ CORRECT:
|
|
name,location,comment
|
|
"Acme Corp","New York, NY","Has ""quotes"" inside"
|
|
Simple,Boston,No quotes needed
|
|
|
|
❌ WRONG:
|
|
name,location,comment
|
|
Acme Corp,New York, NY,Unquoted comma
|
|
```
|
|
|
|
Unquoted special characters break CSV parsing!
|
|
""",
|
|
|
|
"commands": """
|
|
# HOWTO: Available commands by zone
|
|
|
|
## Check what's available
|
|
```
|
|
shed_allowed_commands()
|
|
```
|
|
|
|
## Uploads zone (READ-ONLY)
|
|
cat, head, tail, less, wc, stat, file, du, ls, find, grep, awk, sed (read),
|
|
sort, uniq, cut, diff, tar (list), unzip (list), md5sum, sha256sum, jq, etc.
|
|
|
|
## Storage zone (READ-WRITE)
|
|
All read-only commands PLUS:
|
|
cp, mv, rm, mkdir, rmdir, touch, chmod, ln, tar (create/extract),
|
|
zip, gzip, gunzip, patch, split, csplit, truncate, etc.
|
|
|
|
Network commands (if enabled): curl, wget, git, rsync, scp, ssh
|
|
|
|
## Documents zone (READ-WRITE + VERSIONED)
|
|
Same as Storage, with automatic Git commits.
|
|
|
|
## Group zone
|
|
Same as Documents, with ownership/permission checks.
|
|
|
|
## Builtins (ALWAYS available, no system dependency)
|
|
These work even if system commands are missing:
|
|
|
|
| Builtin | Replaces | Use when... |
|
|
|---------|----------|-------------|
|
|
| `shed_tree(zone, path, depth)` | `tree` | tree command missing |
|
|
| `shed_zip(zone, src, dest, include_empty_dirs=False)` | `zip` | zip command missing |
|
|
| `shed_unzip(zone, src, dest)` | `unzip` | unzip command missing |
|
|
| `shed_zipinfo(zone, path)` | `zipinfo` | zipinfo command missing |
|
|
| `shed_file_type(zone, path)` | `file` | file command missing |
|
|
| `shed_convert_eol(zone, path, to)` | `dos2unix` | dos2unix/unix2dos missing |
|
|
| `shed_hexdump(zone, path, offset, length)` | `xxd`/`hexdump` | hex tools missing |
|
|
| `shed_sqlite(zone, path, query, params)` | `sqlite3` | sqlite3 CLI missing |
|
|
| `shed_sqlite(zone, path, import_csv, table)` | `sqlite3 .import` | Import CSV into SQLite (fast!) |
|
|
|
|
## Workarounds for missing commands
|
|
|
|
### No `jq`? Use grep/sed for simple JSON
|
|
```
|
|
shed_exec(zone="storage", cmd="grep", args=['"key":', "file.json"])
|
|
```
|
|
|
|
### No `tree`? Use builtin
|
|
```
|
|
shed_tree(zone="storage", path=".", depth=3)
|
|
```
|
|
|
|
### No `sqlite3` CLI? Use builtin for queries
|
|
```
|
|
shed_sqlite(zone="storage", path="db.sqlite", query="SELECT * FROM users")
|
|
```
|
|
|
|
### Import CSV to SQLite (RECOMMENDED - no context pollution!)
|
|
```
|
|
shed_sqlite(zone="storage", path="db.sqlite", import_csv="data.csv", table="mytable")
|
|
```
|
|
|
|
### No `unzip`? Use builtin
|
|
```
|
|
shed_unzip(zone="storage", src="archive.zip", dest="extracted/")
|
|
```
|
|
|
|
### No `file`? Use builtin
|
|
```
|
|
shed_file_type(zone="storage", path="unknown.bin")
|
|
```
|
|
|
|
### Large file processing without loading into context
|
|
```
|
|
# Count lines
|
|
shed_exec(zone="storage", cmd="wc", args=["-l", "huge.csv"])
|
|
|
|
# Get first/last lines
|
|
shed_exec(zone="storage", cmd="head", args=["-100", "huge.csv"])
|
|
shed_exec(zone="storage", cmd="tail", args=["-100", "huge.csv"])
|
|
|
|
# Search without reading whole file
|
|
shed_exec(zone="storage", cmd="grep", args=["pattern", "huge.csv"])
|
|
|
|
# Extract specific columns (CSV)
|
|
shed_exec(zone="storage", cmd="cut", args=["-d,", "-f1,3", "data.csv"])
|
|
|
|
# Process with awk (no context pollution)
|
|
shed_exec(zone="storage", cmd="awk", args=["-F,", "{sum+=$2} END {print sum}", "data.csv"])
|
|
```
|
|
""",
|
|
|
|
"network": """
|
|
# HOWTO: Network access
|
|
|
|
## Check if network is enabled
|
|
```
|
|
shed_parameters()
|
|
# Look for: "network_mode": "disabled" | "safe" | "all"
|
|
```
|
|
|
|
## Network modes
|
|
|
|
### disabled (default)
|
|
- No network access
|
|
- curl, wget, git clone all blocked
|
|
|
|
### safe (downloads only)
|
|
- curl/wget GET requests allowed
|
|
- git clone/fetch/pull allowed
|
|
- git push BLOCKED
|
|
- No data exfiltration possible
|
|
|
|
### all (full access - ⚠️ risky)
|
|
- Everything allowed including uploads
|
|
- git push allowed
|
|
- Use with caution
|
|
|
|
## Download with curl (requires "safe" or "all")
|
|
```
|
|
shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "file.zip", "https://..."])
|
|
```
|
|
|
|
## Clone git repo (requires "safe" or "all")
|
|
```
|
|
shed_exec(zone="storage", cmd="git", args=["clone", "https://github.com/user/repo.git"])
|
|
```
|
|
""",
|
|
|
|
"paths": """
|
|
# HOWTO: Path rules
|
|
|
|
## ⚠️ CRITICAL: Never include the zone name in the path!
|
|
|
|
The `zone` parameter already specifies WHERE to operate. The path in `args` is RELATIVE to that zone.
|
|
|
|
**Fileshed automatically rejects paths that start with the zone name** (error code: PATH_STARTS_WITH_ZONE).
|
|
|
|
### Example of the mistake
|
|
|
|
User asks: "In Documents, create a folder MyProject"
|
|
|
|
```
|
|
❌ REJECTED (would create Documents/Documents/MyProject):
|
|
shed_exec(zone="Documents", cmd="mkdir", args=["-p", "Documents/MyProject"])
|
|
→ Error: PATH_STARTS_WITH_ZONE
|
|
|
|
✅ CORRECT (creates Documents/MyProject):
|
|
shed_exec(zone="Documents", cmd="mkdir", args=["-p", "MyProject"])
|
|
```
|
|
|
|
### Why this happens
|
|
|
|
The zone parameter already points to the Documents folder:
|
|
- zone="Documents" → You're working INSIDE Documents
|
|
- args=["MyProject"] → Creates MyProject/ inside Documents
|
|
- args=["Documents/MyProject"] → Creates Documents/MyProject/ inside Documents (WRONG!)
|
|
|
|
### More examples
|
|
|
|
```
|
|
✅ CORRECT: shed_exec(zone="storage", cmd="cat", args=["projects/file.txt"])
|
|
❌ REJECTED: shed_exec(zone="storage", cmd="cat", args=["Storage/projects/file.txt"])
|
|
|
|
✅ CORRECT: shed_exec(zone="documents", cmd="ls", args=["reports"])
|
|
❌ REJECTED: shed_exec(zone="documents", cmd="ls", args=["Documents/reports"])
|
|
|
|
✅ CORRECT: shed_exec(zone="documents", cmd="mkdir", args=["-p", "Projects/2024"])
|
|
❌ REJECTED: shed_exec(zone="documents", cmd="mkdir", args=["-p", "Documents/Projects/2024"])
|
|
```
|
|
|
|
## Exception: allow_zone_in_path
|
|
|
|
In rare cases where the user explicitly wants a subfolder named after the zone
|
|
(e.g., a "Storage" folder inside Storage), use the `allow_zone_in_path` parameter:
|
|
|
|
```
|
|
# User explicitly wants: Storage/Storage/backup/
|
|
shed_exec(zone="storage", cmd="mkdir", args=["-p", "Storage/backup"], allow_zone_in_path=True)
|
|
```
|
|
|
|
This parameter is available on: `shed_exec`, `shed_patch_text`, `shed_patch_bytes`,
|
|
`shed_delete`, `shed_rename`, all `shed_lockedit_*` functions, `shed_copy_to_group`,
|
|
and all `shed_move_*/shed_copy_*` bridge functions.
|
|
|
|
**Only use this when the user explicitly confirms they want a subfolder with that name.**
|
|
|
|
## Zone roots
|
|
|
|
Paths are always relative to the zone root:
|
|
- Uploads: per-conversation (auto-managed)
|
|
- Storage: your personal workspace
|
|
- Documents: your versioned documents
|
|
- Group: shared group space
|
|
|
|
## Case sensitivity
|
|
|
|
- **Zone parameter**: case-insensitive ("Storage" = "storage" = "STORAGE")
|
|
- **Group name**: ⚠️ **CASE-SENSITIVE** ("MyTeam" ≠ "myteam" ≠ "MYTEAM")
|
|
- **File paths**: depends on filesystem (usually case-sensitive on Linux)
|
|
|
|
## Creating folders
|
|
|
|
```
|
|
shed_exec(zone="storage", cmd="mkdir", args=["-p", "projects/webapp/src"])
|
|
```
|
|
|
|
## Listing contents
|
|
|
|
```
|
|
shed_exec(zone="storage", cmd="ls", args=["-la"]) # Root of Storage
|
|
shed_exec(zone="storage", cmd="ls", args=["-la", "projects"]) # Subfolder
|
|
shed_tree(zone="storage", path=".", depth=3) # Tree view
|
|
```
|
|
""",
|
|
|
|
"large_files": """
|
|
# HOWTO: Process large files without context pollution
|
|
|
|
## Golden Rule
|
|
NEVER read a large file into your context. Use tools that process files on disk.
|
|
|
|
## ❌ WRONG (pollutes context)
|
|
```
|
|
content = shed_exec(zone="storage", cmd="cat", args=["huge.csv"]) # 100MB in context!
|
|
# Then trying to process it...
|
|
```
|
|
|
|
## ✅ CORRECT (data stays on disk)
|
|
|
|
### Get file info without reading
|
|
```
|
|
shed_exec(zone="storage", cmd="wc", args=["-l", "huge.csv"]) # Line count
|
|
shed_exec(zone="storage", cmd="du", args=["-h", "huge.csv"]) # File size
|
|
shed_exec(zone="storage", cmd="head", args=["-5", "huge.csv"]) # First 5 lines (structure)
|
|
```
|
|
|
|
### Search without loading
|
|
```
|
|
shed_exec(zone="storage", cmd="grep", args=["error", "huge.log"]) # Find lines
|
|
shed_exec(zone="storage", cmd="grep", args=["-c", "error", "huge.log"]) # Count matches
|
|
shed_exec(zone="storage", cmd="grep", args=["-n", "pattern", "file.txt"]) # With line numbers
|
|
```
|
|
|
|
### Extract specific data (CSV)
|
|
```
|
|
# Get specific columns
|
|
shed_exec(zone="storage", cmd="cut", args=["-d,", "-f1,3,5", "data.csv"])
|
|
|
|
# Filter rows
|
|
shed_exec(zone="storage", cmd="awk", args=["-F,", "$3 > 100", "data.csv"])
|
|
|
|
# Sum a column
|
|
shed_exec(zone="storage", cmd="awk", args=["-F,", "{sum+=$2} END {print sum}", "data.csv"])
|
|
|
|
# Count unique values
|
|
shed_exec(zone="storage", cmd="cut", args=["-d,", "-f2", "data.csv"])
|
|
# then pipe conceptually via multiple commands or awk
|
|
shed_exec(zone="storage", cmd="awk", args=["-F,", "{a[$2]++} END {for(k in a) print k,a[k]}", "data.csv"])
|
|
```
|
|
|
|
### Transform files on disk
|
|
```
|
|
# Sort a file (output to new file)
|
|
shed_exec(zone="storage", cmd="sort", args=["input.csv", "-o", "sorted.csv"])
|
|
|
|
# Remove duplicates
|
|
shed_exec(zone="storage", cmd="sort", args=["-u", "input.txt", "-o", "unique.txt"])
|
|
|
|
# Convert encoding
|
|
shed_exec(zone="storage", cmd="iconv", args=["-f", "ISO-8859-1", "-t", "UTF-8", "old.txt", "-o", "new.txt"])
|
|
```
|
|
|
|
### Split large files
|
|
```
|
|
# Split by line count (1000 lines per file)
|
|
shed_exec(zone="storage", cmd="split", args=["-l", "1000", "huge.csv", "part_"])
|
|
|
|
# Split by size (10MB per file)
|
|
shed_exec(zone="storage", cmd="split", args=["-b", "10M", "huge.bin", "chunk_"])
|
|
```
|
|
|
|
### Process into database (best for structured data)
|
|
```
|
|
# Import CSV directly into SQLite (see howto="csv_to_sqlite")
|
|
shed_exec(zone="storage", cmd="sqlite3", args=[
|
|
"data.db", "-cmd", ".mode csv", "-cmd", ".import huge.csv mytable"
|
|
])
|
|
|
|
# Then query without loading everything
|
|
shed_sqlite(zone="storage", path="data.db",
|
|
query="SELECT category, COUNT(*), AVG(value) FROM mytable GROUP BY category")
|
|
```
|
|
|
|
### JSON processing
|
|
```
|
|
# Extract specific field (if jq available)
|
|
shed_exec(zone="storage", cmd="jq", args=[".results[].name", "huge.json"])
|
|
|
|
# If jq missing, use grep for simple extraction
|
|
shed_exec(zone="storage", cmd="grep", args=["-o", '"name": "[^"]*"', "data.json"])
|
|
```
|
|
|
|
### Redirect output to file (like shell > redirection)
|
|
```
|
|
# Transform JSON to CSV and save to file (instead of returning in context)
|
|
shed_exec(zone="storage", cmd="jq",
|
|
args=["-r", ".[] | [.name, .value] | @csv", "data.json"],
|
|
stdout_file="output.csv")
|
|
|
|
# Combine stderr with stdout
|
|
shed_exec(zone="storage", cmd="some_cmd", args=["..."],
|
|
stdout_file="all_output.log", redirect_stderr_to_stdout=True)
|
|
```
|
|
|
|
## Summary: Tools for large files
|
|
|
|
| Task | Tool | Example |
|
|
|------|------|---------|
|
|
| Line count | `wc -l` | `wc -l file.csv` |
|
|
| File size | `du -h` | `du -h file.csv` |
|
|
| Preview | `head`/`tail` | `head -20 file.csv` |
|
|
| Search | `grep` | `grep pattern file` |
|
|
| Columns | `cut` | `cut -d, -f1,3 file.csv` |
|
|
| Aggregation | `awk` | `awk -F, '{sum+=$2} END {print sum}' file.csv` |
|
|
| Sort | `sort` | `sort file -o sorted` |
|
|
| Transform | `sed` | `sed 's/old/new/g' file` |
|
|
| Database | `sqlite3` | `.import file.csv table` |
|
|
| Save output | `stdout_file=` | `shed_exec(..., stdout_file="out.txt")` |
|
|
"""
|
|
}
|
|
|
|
def _get_user_root(self, __user__: dict) -> Path:
|
|
"""Returns the user's root directory."""
|
|
user_id = __user__.get("id", "anonymous")
|
|
return Path(self.valves.storage_base_path) / "users" / user_id
|
|
|
|
def _get_groups_root(self) -> Path:
|
|
"""Returns the groups root directory."""
|
|
return Path(self.valves.storage_base_path) / "groups"
|
|
|
|
def _get_db_path(self) -> Path:
|
|
"""Returns the SQLite database path."""
|
|
return Path(self.valves.storage_base_path) / "access_auth.sqlite"
|
|
|
|
def _get_conv_id(self, __metadata__: dict) -> str:
|
|
"""Returns the conversation ID."""
|
|
return __metadata__.get("chat_id", "unknown")
|
|
|
|
def _resolve_zone(
|
|
self,
|
|
zone: str,
|
|
group: Optional[str],
|
|
__user__: dict,
|
|
__metadata__: dict,
|
|
require_write: bool = False,
|
|
) -> ZoneContext:
|
|
"""
|
|
Resolves a zone string to a complete ZoneContext.
|
|
|
|
:param zone: "uploads", "storage", "documents", or "group"
|
|
:param group: Group name/ID (required if zone="group")
|
|
:param __user__: Open WebUI user context
|
|
:param __metadata__: Conversation metadata
|
|
:param require_write: If True, rejects uploads zone
|
|
:return: ZoneContext with all zone-specific info
|
|
:raises StorageError: If zone invalid or access denied
|
|
"""
|
|
zone_lower = zone.lower()
|
|
user_root = self._get_user_root(__user__)
|
|
conv_id = self._get_conv_id(__metadata__)
|
|
|
|
# UPLOADS: read-only, isolated per conversation
|
|
if zone_lower == "uploads":
|
|
if require_write:
|
|
raise StorageError(
|
|
"ZONE_READONLY",
|
|
"Uploads zone is read-only",
|
|
{"zone": zone},
|
|
"Use 'storage' or 'documents' for write operations"
|
|
)
|
|
zone_path = user_root / "Uploads" / conv_id
|
|
return ZoneContext(
|
|
zone_root=zone_path,
|
|
zone_name="Uploads",
|
|
zone_lower="uploads",
|
|
editzone_base=None,
|
|
conv_id=conv_id,
|
|
group_id=None,
|
|
git_commit=False,
|
|
readonly=True,
|
|
whitelist=WHITELIST_READONLY,
|
|
)
|
|
|
|
# STORAGE: read/write, no Git
|
|
elif zone_lower == "storage":
|
|
zone_path = user_root / "Storage" / "data"
|
|
self._ensure_dir(zone_path)
|
|
return ZoneContext(
|
|
zone_root=zone_path,
|
|
zone_name="Storage",
|
|
zone_lower="storage",
|
|
editzone_base=user_root / "Storage",
|
|
conv_id=conv_id,
|
|
group_id=None,
|
|
git_commit=False,
|
|
readonly=False,
|
|
whitelist=WHITELIST_READWRITE,
|
|
)
|
|
|
|
# DOCUMENTS: read/write, auto Git
|
|
elif zone_lower == "documents":
|
|
zone_path = user_root / "Documents" / "data"
|
|
self._ensure_dir(zone_path)
|
|
self._init_git_repo(zone_path)
|
|
return ZoneContext(
|
|
zone_root=zone_path,
|
|
zone_name="Documents",
|
|
zone_lower="documents",
|
|
editzone_base=user_root / "Documents",
|
|
conv_id=conv_id,
|
|
group_id=None,
|
|
git_commit=True,
|
|
readonly=False,
|
|
whitelist=WHITELIST_READWRITE,
|
|
)
|
|
|
|
# GROUP: like Documents but with group validation
|
|
elif zone_lower == "group":
|
|
if not group:
|
|
raise StorageError(
|
|
"MISSING_PARAMETER",
|
|
"Group parameter is required for zone='group'",
|
|
{"zone": zone},
|
|
"Use: shed_xxx(zone='group', group='team-name', ...)"
|
|
)
|
|
|
|
group_id = self._validate_group_id(group)
|
|
self._check_group_access(__user__, group_id)
|
|
zone_path = self._ensure_group_space(group_id)
|
|
|
|
return ZoneContext(
|
|
zone_root=zone_path,
|
|
zone_name=f"Group:{group}",
|
|
zone_lower="group",
|
|
editzone_base=self._get_groups_root() / group_id,
|
|
conv_id=conv_id,
|
|
group_id=group_id,
|
|
git_commit=True,
|
|
readonly=False,
|
|
whitelist=WHITELIST_READWRITE,
|
|
)
|
|
|
|
else:
|
|
raise StorageError(
|
|
"INVALID_ZONE",
|
|
f"Invalid zone: {zone}",
|
|
{"zone": zone, "valid": ["uploads", "storage", "documents", "group"]},
|
|
"Use one of: uploads, storage, documents, group"
|
|
)
|
|
|
|
def _resolve_chroot_path(self, base: Path, relative_path: str) -> Path:
|
|
"""
|
|
Resolves a relative path within a chroot and verifies it doesn't escape.
|
|
Raises PATH_ESCAPE if escape attempt detected.
|
|
Also detects symlinks that could point outside the chroot.
|
|
"""
|
|
# Clean the path
|
|
relative_path = relative_path.lstrip("/")
|
|
|
|
# Build path without resolving symlinks first
|
|
raw_path = base / relative_path
|
|
|
|
# Check for symlinks in the path that could escape chroot
|
|
# Walk from base to target, checking each existing component
|
|
current = base.resolve()
|
|
parts = Path(relative_path).parts
|
|
for i, part in enumerate(parts):
|
|
next_path = current / part
|
|
if next_path.is_symlink():
|
|
# Symlink found - resolve it and verify it stays in chroot
|
|
link_target = next_path.resolve()
|
|
base_resolved = base.resolve()
|
|
try:
|
|
link_target.relative_to(base_resolved)
|
|
except ValueError:
|
|
raise StorageError(
|
|
"PATH_ESCAPE",
|
|
"Symlink escape attempt detected",
|
|
{"path": relative_path, "symlink": str(next_path)},
|
|
"Symlinks pointing outside the zone are not allowed"
|
|
)
|
|
if next_path.exists():
|
|
current = next_path.resolve()
|
|
else:
|
|
# Path doesn't exist yet, remaining parts are for new file/dir
|
|
break
|
|
|
|
# Resolve final path
|
|
target = raw_path.resolve()
|
|
base_resolved = base.resolve()
|
|
|
|
# Verify we stay in chroot
|
|
try:
|
|
target.relative_to(base_resolved)
|
|
except ValueError:
|
|
raise StorageError(
|
|
"PATH_ESCAPE",
|
|
"Chroot escape attempt detected",
|
|
{"path": relative_path, "chroot": str(base)},
|
|
"Use only relative paths without ../"
|
|
)
|
|
|
|
return target
|
|
|
|
def _validate_relative_path(
|
|
self,
|
|
path: str,
|
|
zone_name: str = None,
|
|
allow_zone_in_path: bool = False
|
|
) -> str:
|
|
"""
|
|
Validates that a relative path contains no traversal.
|
|
Returns the cleaned and normalized path.
|
|
|
|
:param path: The path to validate
|
|
:param zone_name: If provided, checks that path doesn't start with zone name
|
|
:param allow_zone_in_path: If True, allows path to start with zone name
|
|
"""
|
|
# Normalize Unicode to NFC (prevents path confusion attacks)
|
|
path = unicodedata.normalize("NFC", path)
|
|
|
|
# Clean
|
|
path = path.lstrip("/")
|
|
|
|
# Block absolute paths
|
|
if path.startswith("/"):
|
|
raise StorageError(
|
|
"PATH_ESCAPE",
|
|
"Absolute paths forbidden",
|
|
{"path": path},
|
|
"Use only relative paths"
|
|
)
|
|
|
|
# Block .. that escapes current directory
|
|
# Virtually resolve the path to check
|
|
parts = []
|
|
for part in path.split("/"):
|
|
if part == "..":
|
|
if not parts:
|
|
raise StorageError(
|
|
"PATH_ESCAPE",
|
|
"Directory escape attempt",
|
|
{"path": path},
|
|
"Paths with .. going too high are forbidden"
|
|
)
|
|
parts.pop()
|
|
elif part and part != ".":
|
|
parts.append(part)
|
|
|
|
cleaned_path = "/".join(parts) if parts else ""
|
|
|
|
# Check if path starts with zone name (common LLM mistake)
|
|
if zone_name and not allow_zone_in_path and parts:
|
|
# Zone names to check (case-insensitive): Storage, Documents, Uploads
|
|
# For groups, zone_name is like "group:team-name", we extract just "group"
|
|
zone_check = zone_name.split(":")[0].lower()
|
|
first_part_lower = parts[0].lower()
|
|
|
|
if first_part_lower == zone_check:
|
|
raise StorageError(
|
|
"PATH_STARTS_WITH_ZONE",
|
|
f"Path '{cleaned_path}' starts with zone name '{parts[0]}'",
|
|
{"zone": zone_name, "path": cleaned_path, "first_component": parts[0]},
|
|
f"The zone parameter already sets the working directory. "
|
|
f"Use '{'/'.join(parts[1:])}' instead of '{cleaned_path}'. "
|
|
f"If you really want a subfolder named '{parts[0]}', add allow_zone_in_path=True"
|
|
)
|
|
|
|
return cleaned_path
|
|
|
|
def _validate_group_id(self, group_id: str) -> str:
|
|
"""
|
|
Validates and resolves a group identifier.
|
|
Accepts either a group ID (UUID) or a group name (case-sensitive).
|
|
Returns the validated group ID.
|
|
"""
|
|
if not group_id:
|
|
raise StorageError(
|
|
"INVALID_GROUP_ID",
|
|
"Group ID cannot be empty"
|
|
)
|
|
|
|
# Block path traversal characters
|
|
if ".." in group_id or "/" in group_id or "\\" in group_id:
|
|
raise StorageError(
|
|
"INVALID_GROUP_ID",
|
|
f"Invalid group ID: contains forbidden characters",
|
|
{"group_id": group_id},
|
|
"Group ID cannot contain '..' or path separators"
|
|
)
|
|
|
|
# Block null bytes and other control characters
|
|
if any(ord(c) < 32 for c in group_id):
|
|
raise StorageError(
|
|
"INVALID_GROUP_ID",
|
|
"Group ID contains invalid characters"
|
|
)
|
|
|
|
# Check if it's a valid UUID (group ID)
|
|
try:
|
|
uuid.UUID(group_id)
|
|
return group_id # Already a valid UUID
|
|
except ValueError:
|
|
pass # Not a UUID, try to resolve as group name
|
|
|
|
# Try to resolve group name to ID (case-sensitive)
|
|
if GROUPS_AVAILABLE:
|
|
try:
|
|
# Search for group by name
|
|
groups = Groups.get_all_groups()
|
|
case_insensitive_matches = []
|
|
|
|
for g in groups:
|
|
if g.name == group_id:
|
|
return g.id # Exact match found
|
|
# Collect case-insensitive matches for hint
|
|
if g.name.lower() == group_id.lower():
|
|
case_insensitive_matches.append(g.name)
|
|
|
|
# No exact match - check if there's a case mismatch
|
|
if case_insensitive_matches:
|
|
raise StorageError(
|
|
"GROUP_NOT_FOUND",
|
|
f"Group '{group_id}' not found (name is case-sensitive)",
|
|
{"requested": group_id, "similar": case_insensitive_matches},
|
|
f"Did you mean: {', '.join(case_insensitive_matches)}?"
|
|
)
|
|
except StorageError:
|
|
raise # Re-raise our own errors
|
|
except Exception:
|
|
pass
|
|
|
|
# If we get here, it's not a UUID and not a known group name
|
|
# Return as-is and let _check_group_access handle the error
|
|
return group_id
|
|
|
|
def _validate_command(self, cmd: str, whitelist: set, args: list = None) -> None:
|
|
"""Validates that a command is allowed."""
|
|
if cmd in BLACKLIST_COMMANDS:
|
|
raise StorageError(
|
|
"COMMAND_FORBIDDEN",
|
|
f"Command '{cmd}' is forbidden",
|
|
{"command": cmd},
|
|
"See shed_help() for allowed commands"
|
|
)
|
|
|
|
# Handle curl/wget separately (controlled by valves)
|
|
if cmd in CURL_COMMANDS:
|
|
if self.valves.network_mode == "all":
|
|
# All curl operations allowed
|
|
return
|
|
elif self.valves.network_mode == "safe":
|
|
# Only GET operations - validate args
|
|
self._validate_curl_args_get_only(args or [])
|
|
return
|
|
else:
|
|
raise StorageError(
|
|
"COMMAND_FORBIDDEN",
|
|
f"Command '{cmd}' is disabled",
|
|
{"command": cmd},
|
|
"Ask admin to set network_mode to 'safe' or 'all'"
|
|
)
|
|
|
|
if cmd not in whitelist:
|
|
raise StorageError(
|
|
"COMMAND_FORBIDDEN",
|
|
f"Command '{cmd}' is not in whitelist",
|
|
{"command": cmd, "allowed": sorted(list(whitelist))[:20]},
|
|
"Use shed_allowed_commands() to see available commands"
|
|
)
|
|
|
|
# If git, validate subcommands
|
|
if cmd == "git" and args is not None:
|
|
self._validate_git_command(args)
|
|
|
|
# If find, block -exec options (can execute arbitrary commands)
|
|
if cmd == "find" and args is not None:
|
|
for arg in args:
|
|
if str(arg) in FIND_EXEC_OPTIONS:
|
|
raise StorageError(
|
|
"ARGUMENT_FORBIDDEN",
|
|
f"Option '{arg}' is forbidden for find",
|
|
{"argument": str(arg)},
|
|
"find -exec can execute arbitrary commands. Use find + manual processing instead."
|
|
)
|
|
|
|
# If awk (or variants), block system() and getline pipes (can execute commands)
|
|
# gawk, mawk, nawk are all awk implementations with same dangerous capabilities
|
|
if cmd in {"awk", "gawk", "mawk", "nawk"} and args is not None:
|
|
for arg in args:
|
|
if AWK_DANGEROUS_PATTERNS.search(str(arg)):
|
|
raise StorageError(
|
|
"ARGUMENT_FORBIDDEN",
|
|
f"{cmd} script contains forbidden patterns (system, getline pipe)",
|
|
{"argument": str(arg)[:100]},
|
|
f"{cmd} system() and getline pipes can execute commands"
|
|
)
|
|
|
|
# Block ln entirely - both symlinks and hard links have security risks
|
|
# - Symlinks can point outside chroot
|
|
# - Hard links can reference sensitive files on the same filesystem
|
|
# Use cp instead for safe file duplication
|
|
if cmd == "ln":
|
|
raise StorageError(
|
|
"COMMAND_FORBIDDEN",
|
|
"ln command is forbidden for security reasons",
|
|
{"command": "ln"},
|
|
"Use 'cp' instead to copy files. Both symlinks and hard links pose security risks."
|
|
)
|
|
|
|
# If tar, block --absolute-names / -P (extracts to absolute paths)
|
|
if cmd == "tar" and args is not None:
|
|
for arg in args:
|
|
arg_str = str(arg)
|
|
if arg_str == "-P" or arg_str == "--absolute-names":
|
|
raise StorageError(
|
|
"ARGUMENT_FORBIDDEN",
|
|
"Option --absolute-names (-P) is forbidden for tar",
|
|
{"argument": arg_str},
|
|
"This option allows extracting to absolute paths outside the allowed zone."
|
|
)
|
|
|
|
# Handle network-capable commands (ffmpeg, pandoc, convert, etc.)
|
|
if cmd in NETWORK_CAPABLE_COMMANDS:
|
|
self._validate_network_command(cmd, args or [])
|
|
|
|
def _validate_curl_args_get_only(self, args: list) -> None:
|
|
"""Validates curl/wget arguments in GET-only mode (blocks upload/POST options)."""
|
|
for arg in args:
|
|
arg_str = str(arg)
|
|
|
|
# Check if it's a forbidden option
|
|
# Handle both "-X" and "-XPOST" formats
|
|
for forbidden in CURL_FORBIDDEN_GET_OPTS:
|
|
if arg_str == forbidden:
|
|
raise StorageError(
|
|
"ARGUMENT_FORBIDDEN",
|
|
f"Option '{arg_str}' is forbidden in GET-only mode",
|
|
{"argument": arg_str, "forbidden_options": sorted(CURL_FORBIDDEN_GET_OPTS)},
|
|
"This option allows data upload. Ask admin to set network_mode to 'all'"
|
|
)
|
|
# Handle combined format: -XPOST, --request=POST
|
|
if arg_str.startswith(forbidden + "=") or arg_str.startswith(forbidden):
|
|
if forbidden in {"-X", "--request", "-d", "--data", "-F", "--form", "-T"}:
|
|
raise StorageError(
|
|
"ARGUMENT_FORBIDDEN",
|
|
f"Option '{arg_str}' is forbidden in GET-only mode",
|
|
{"argument": arg_str},
|
|
"This option allows data upload. Ask admin to set network_mode to 'all'"
|
|
)
|
|
|
|
def _validate_network_command(self, cmd: str, args: list) -> bool:
|
|
"""
|
|
Validates network-capable commands (ffmpeg, pandoc, convert, etc.).
|
|
|
|
Returns True if URLs should be allowed in subsequent _validate_args call.
|
|
Raises StorageError if command is not allowed with current valve settings.
|
|
"""
|
|
# Commands that can SEND data (exfiltration risk)
|
|
if cmd in NETWORK_OUTPUT_COMMANDS: # ffmpeg
|
|
if self.valves.network_mode == "all":
|
|
# All operations allowed, URLs ok
|
|
return True
|
|
elif self.valves.network_mode == "safe":
|
|
# Block output protocols that can exfiltrate data
|
|
for arg in args:
|
|
arg_str = str(arg).lower()
|
|
for protocol in FFMPEG_OUTPUT_PROTOCOLS:
|
|
if protocol in arg_str:
|
|
raise StorageError(
|
|
"ARGUMENT_FORBIDDEN",
|
|
f"Output protocol '{protocol}' is forbidden in 'safe' mode",
|
|
{"argument": str(arg), "protocol": protocol},
|
|
"This protocol can send data to remote servers. Ask admin to set network_mode to 'all'"
|
|
)
|
|
|
|
# Block dangerous ffmpeg options that can be used for exfiltration
|
|
for i, arg in enumerate(args):
|
|
arg_str = str(arg)
|
|
# Check exact match or prefix match (e.g., -metadata:s:v)
|
|
for dangerous_opt in FFMPEG_DANGEROUS_OPTIONS:
|
|
if arg_str == dangerous_opt or arg_str.startswith(dangerous_opt + ":"):
|
|
raise StorageError(
|
|
"ARGUMENT_FORBIDDEN",
|
|
f"ffmpeg option '{arg_str}' is forbidden in 'safe' mode",
|
|
{"argument": arg_str, "option": dangerous_opt},
|
|
"This option can be used for data exfiltration. Ask admin to set network_mode to 'all'"
|
|
)
|
|
# Check for tee muxer in format specification
|
|
if arg_str == "-f" and i + 1 < len(args) and str(args[i + 1]).lower() == "tee":
|
|
raise StorageError(
|
|
"ARGUMENT_FORBIDDEN",
|
|
"ffmpeg tee muxer is forbidden in 'safe' mode",
|
|
{"argument": "-f tee"},
|
|
"The tee muxer can duplicate output to multiple destinations. Ask admin to set network_mode to 'all'"
|
|
)
|
|
|
|
# Input URLs are ok
|
|
return True
|
|
else:
|
|
# Network disabled - block ffmpeg entirely
|
|
raise StorageError(
|
|
"COMMAND_FORBIDDEN",
|
|
f"Command '{cmd}' requires network access",
|
|
{"command": cmd},
|
|
"Ask admin to set network_mode to 'safe' or 'all'"
|
|
)
|
|
|
|
# Commands that can only RECEIVE data (read-only risk)
|
|
if cmd in NETWORK_INPUT_COMMANDS: # ffprobe, pandoc, convert, etc.
|
|
if self.valves.network_mode in ("safe", "all"):
|
|
# Network access allowed, URLs ok
|
|
return True
|
|
else:
|
|
# Network disabled - block URLs in arguments
|
|
# (command itself is ok for local files)
|
|
for arg in args:
|
|
arg_str = str(arg)
|
|
if URL_PATTERN.match(arg_str):
|
|
raise StorageError(
|
|
"ARGUMENT_FORBIDDEN",
|
|
f"URLs are forbidden when network is disabled",
|
|
{"argument": arg_str, "command": cmd},
|
|
"Ask admin to set network_mode to 'safe' or 'all' for URL access"
|
|
)
|
|
return False
|
|
|
|
return False
|
|
|
|
def _validate_args(self, args: list, readonly: bool = False, cmd: str = "") -> None:
|
|
"""Validates arguments to detect injections and network access."""
|
|
# Check if URL check should be skipped for network-capable commands
|
|
skip_url_check = False
|
|
|
|
# Network-capable commands (ffmpeg, pandoc, convert, etc.)
|
|
if cmd in NETWORK_CAPABLE_COMMANDS:
|
|
skip_url_check = self.valves.network_mode in ("safe", "all")
|
|
|
|
# curl/wget are handled separately but also need URL allowance
|
|
if cmd in CURL_COMMANDS:
|
|
skip_url_check = self.valves.network_mode in ("safe", "all")
|
|
|
|
# git clone/fetch/pull need URLs in safe/all mode
|
|
if cmd == "git":
|
|
skip_url_check = self.valves.network_mode in ("safe", "all")
|
|
|
|
for arg in args:
|
|
arg_str = str(arg)
|
|
|
|
# Check if this argument is a URL (for network-capable commands with network enabled)
|
|
is_allowed_url = skip_url_check and URL_PATTERN.match(arg_str)
|
|
|
|
# Choose pattern based on command (some commands use | in their internal syntax)
|
|
dangerous_pattern = (
|
|
DANGEROUS_ARGS_PATTERN_ALLOW_PIPE
|
|
if cmd in COMMANDS_ALLOWING_PIPE
|
|
else DANGEROUS_ARGS_PATTERN
|
|
)
|
|
|
|
# Check dangerous patterns
|
|
# For allowed URLs, we skip this check because & is common in query strings
|
|
if not is_allowed_url and dangerous_pattern.search(arg_str):
|
|
raise StorageError(
|
|
"ARGUMENT_FORBIDDEN",
|
|
f"Dangerous argument detected",
|
|
{"argument": arg_str},
|
|
"Characters ; | & && || > >> < << $( ${ ` are forbidden"
|
|
)
|
|
|
|
# Block URLs (network access via ffmpeg, pandoc, imagemagick, etc.)
|
|
# Skip if command is network-capable and network valves are enabled
|
|
if not skip_url_check and URL_PATTERN.match(arg_str):
|
|
raise StorageError(
|
|
"ARGUMENT_FORBIDDEN",
|
|
f"URLs are forbidden in arguments",
|
|
{"argument": arg_str},
|
|
"Network access via URLs is blocked. Ask admin to set network_mode to 'safe' or 'all'."
|
|
)
|
|
|
|
# In readonly mode, forbid -i/--in-place for sed (in-place editing)
|
|
if readonly and (arg_str == "-i" or arg_str.startswith("-i") or arg_str == "--in-place" or arg_str.startswith("--in-place")):
|
|
raise StorageError(
|
|
"ARGUMENT_FORBIDDEN",
|
|
"Option -i/--in-place is forbidden in read-only mode",
|
|
{"argument": arg_str},
|
|
"This zone is read-only"
|
|
)
|
|
|
|
def _is_expression_not_path(self, arg: str, cmd: str) -> bool:
|
|
"""
|
|
Determines if an argument is a regex expression rather than a path.
|
|
Used for sed/grep/awk commands where /pattern/ syntax is common.
|
|
"""
|
|
# Commands that use /pattern/ expressions
|
|
expression_commands = {"sed", "grep", "egrep", "fgrep", "awk", "perl"}
|
|
|
|
if cmd not in expression_commands or not arg.startswith("/"):
|
|
return False
|
|
|
|
# Clear expression indicators:
|
|
# - Contains space: "/Team: Eng/a new line"
|
|
# - Contains colon: "/Team: Eng/"
|
|
# - Ends with /: "/pattern/"
|
|
if " " in arg:
|
|
return True
|
|
if ":" in arg:
|
|
return True
|
|
if arg.endswith("/"):
|
|
return True
|
|
|
|
if len(arg) > 2:
|
|
# Check for /pattern/X format where X is a single sed command
|
|
# Valid: /foo/d, /bar/p, /baz/a text
|
|
# Invalid: /etc/passwd (passwd is not a single letter)
|
|
second_slash = arg.find("/", 1)
|
|
if second_slash > 0 and second_slash < len(arg) - 1:
|
|
after_slash = arg[second_slash + 1:]
|
|
# Must be a single sed command letter, alone or followed by space/text
|
|
if len(after_slash) == 1 and after_slash in "acdipqswy":
|
|
return True
|
|
if len(after_slash) > 1 and after_slash[0] in "acdipqswy" and after_slash[1] in " \t/":
|
|
return True
|
|
|
|
return False
|
|
|
|
def _validate_path_args(
|
|
self,
|
|
args: list,
|
|
chroot: Path,
|
|
cmd: str = "",
|
|
zone_name: str = None,
|
|
allow_zone_in_path: bool = False
|
|
) -> list:
|
|
"""
|
|
Validates that arguments don't allow escaping the chroot.
|
|
Blocks: absolute paths and .. that escape chroot.
|
|
|
|
For sed/grep/awk, expressions starting with / are NOT treated as paths.
|
|
|
|
:param args: List of command arguments
|
|
:param chroot: The zone root path
|
|
:param cmd: The command being executed
|
|
:param zone_name: If provided, checks that paths don't start with zone name
|
|
:param allow_zone_in_path: If True, allows paths to start with zone name
|
|
"""
|
|
chroot_resolved = chroot.resolve()
|
|
|
|
for arg in args:
|
|
arg_str = str(arg)
|
|
|
|
# Skip flags (like -i, -e, -n, etc.)
|
|
if arg_str.startswith("-"):
|
|
continue
|
|
|
|
# Skip regex expressions for sed/grep/awk
|
|
if self._is_expression_not_path(arg_str, cmd):
|
|
continue
|
|
|
|
# Block absolute paths (that aren't expressions)
|
|
if arg_str.startswith("/"):
|
|
raise StorageError(
|
|
"PATH_ESCAPE",
|
|
"Absolute paths forbidden",
|
|
{"path": arg_str},
|
|
"Use only relative paths"
|
|
)
|
|
|
|
# Use _validate_relative_path for standard validation + zone prefix check
|
|
# This validates: Unicode normalization, .., and zone prefix
|
|
self._validate_relative_path(arg_str, zone_name, allow_zone_in_path)
|
|
|
|
# Additional chroot escape check with resolved paths
|
|
if ".." in arg_str:
|
|
try:
|
|
target = (chroot / arg_str).resolve()
|
|
target.relative_to(chroot_resolved)
|
|
except ValueError:
|
|
raise StorageError(
|
|
"PATH_ESCAPE",
|
|
"Chroot escape attempt detected",
|
|
{"path": arg_str, "chroot": str(chroot)},
|
|
"Resolved path escapes allowed zone"
|
|
)
|
|
|
|
return list(args)
|
|
|
|
def _validate_git_command(self, args: list) -> None:
|
|
"""Validates a Git subcommand based on whitelists and network valves."""
|
|
if not args:
|
|
raise StorageError(
|
|
"ARGUMENT_FORBIDDEN",
|
|
"Git command without subcommand",
|
|
{},
|
|
"Example: git status, git log"
|
|
)
|
|
|
|
subcmd = args[0]
|
|
|
|
# Always forbidden (dangerous local operations)
|
|
if subcmd in GIT_BLACKLIST:
|
|
raise StorageError(
|
|
"COMMAND_FORBIDDEN",
|
|
f"Command 'git {subcmd}' is forbidden",
|
|
{"subcommand": subcmd},
|
|
"gc, prune, filter-branch are always forbidden"
|
|
)
|
|
|
|
# Network push (exfiltration) - requires network_mode="all"
|
|
if subcmd in GIT_NETWORK_PUSH:
|
|
if self.valves.network_mode != "all":
|
|
raise StorageError(
|
|
"COMMAND_FORBIDDEN",
|
|
f"Command 'git {subcmd}' is disabled (network upload)",
|
|
{"subcommand": subcmd},
|
|
"Ask admin to set network_mode to 'all'"
|
|
)
|
|
return # Allowed
|
|
|
|
# Network download - requires network_mode="safe" or "all"
|
|
if subcmd in GIT_NETWORK_GET:
|
|
if not (self.valves.network_mode in ("safe", "all")):
|
|
raise StorageError(
|
|
"COMMAND_FORBIDDEN",
|
|
f"Command 'git {subcmd}' is disabled (network access)",
|
|
{"subcommand": subcmd},
|
|
"Ask admin to set network_mode to 'safe' or 'all'"
|
|
)
|
|
return # Allowed
|
|
|
|
# Standard whitelist check for local operations
|
|
if subcmd not in GIT_WHITELIST_READ and subcmd not in GIT_WHITELIST_WRITE:
|
|
raise StorageError(
|
|
"COMMAND_FORBIDDEN",
|
|
f"Git subcommand '{subcmd}' is not allowed",
|
|
{"subcommand": subcmd, "allowed_read": sorted(GIT_WHITELIST_READ),
|
|
"allowed_write": sorted(GIT_WHITELIST_WRITE)}
|
|
)
|
|
|
|
def _exec_command(
|
|
self,
|
|
cmd: str,
|
|
args: list,
|
|
cwd: Path,
|
|
timeout: int,
|
|
max_output: int = None,
|
|
stdout_file: Path = None,
|
|
stderr_file: Path = None,
|
|
redirect_stderr_to_stdout: bool = False,
|
|
) -> dict:
|
|
"""Executes a command and returns the result.
|
|
|
|
Args:
|
|
cmd: Command to execute
|
|
args: Command arguments
|
|
cwd: Working directory
|
|
timeout: Timeout in seconds
|
|
max_output: Max output size in bytes (None=default, 0=unlimited up to absolute max)
|
|
stdout_file: Path to redirect stdout to (None=capture in memory)
|
|
stderr_file: Path to redirect stderr to (None=capture in memory)
|
|
redirect_stderr_to_stdout: If True, redirect stderr to stdout (2>&1)
|
|
"""
|
|
# Handle tar extraction: add --no-same-owner to prevent ownership errors
|
|
# This avoids "Cannot change ownership" errors that cause tar to return code 2
|
|
# even though files are extracted successfully
|
|
if cmd == "tar":
|
|
args_str = " ".join(str(a) for a in args)
|
|
is_extraction = any(x in args_str for x in ["-x", "--extract"])
|
|
# Also check combined flags like -xJf, -xzf, etc.
|
|
if not is_extraction:
|
|
for arg in args:
|
|
arg_s = str(arg)
|
|
if arg_s.startswith("-") and not arg_s.startswith("--") and "x" in arg_s:
|
|
is_extraction = True
|
|
break
|
|
if is_extraction and "--no-same-owner" not in args_str:
|
|
args = ["--no-same-owner"] + list(args)
|
|
|
|
# Handle curl: require -o/--output to prevent stdout pollution
|
|
# Also add -sS to suppress progress but show errors
|
|
if cmd == "curl":
|
|
args_str = " ".join(str(a) for a in args)
|
|
# Check for output redirection (allow if stdout_file is specified)
|
|
has_output = any(x in args_str for x in ["-o", "--output", "-O", "--remote-name"]) or stdout_file
|
|
if not has_output:
|
|
raise StorageError(
|
|
"ARGUMENT_REQUIRED",
|
|
"curl requires -o to save to file",
|
|
{"command": "curl", "args": [str(a) for a in args]},
|
|
'Use: curl -L -o "filename" "url" to save directly to file. This prevents context pollution.'
|
|
)
|
|
if "-s" not in args_str and "--silent" not in args_str:
|
|
args = ["-sS"] + list(args) # -s=silent, -S=show-error
|
|
|
|
# Handle wget: require -O/--output-document to prevent stdout pollution
|
|
# Also add -q to suppress progress
|
|
if cmd == "wget":
|
|
args_str = " ".join(str(a) for a in args)
|
|
# Check for output redirection (allow if stdout_file is specified)
|
|
has_output = any(x in args_str for x in ["-O", "--output-document"]) or stdout_file
|
|
if not has_output:
|
|
raise StorageError(
|
|
"ARGUMENT_REQUIRED",
|
|
"wget requires -O to save to file",
|
|
{"command": "wget", "args": [str(a) for a in args]},
|
|
'Use: wget -O "filename" "url" to save directly to file. This prevents context pollution.'
|
|
)
|
|
if "-q" not in args_str and "--quiet" not in args_str:
|
|
args = ["-q"] + list(args)
|
|
|
|
# Build command
|
|
full_cmd = [cmd] + [str(a) for a in args]
|
|
|
|
# Prepare file handles for redirection
|
|
stdout_handle = None
|
|
stderr_handle = None
|
|
files_to_close = []
|
|
|
|
try:
|
|
# Setup stdout redirection
|
|
if stdout_file:
|
|
stdout_file.parent.mkdir(parents=True, exist_ok=True)
|
|
stdout_handle = open(stdout_file, 'w', encoding='utf-8')
|
|
files_to_close.append(stdout_handle)
|
|
else:
|
|
stdout_handle = subprocess.PIPE
|
|
|
|
# Setup stderr redirection
|
|
if redirect_stderr_to_stdout:
|
|
stderr_handle = subprocess.STDOUT
|
|
elif stderr_file:
|
|
stderr_file.parent.mkdir(parents=True, exist_ok=True)
|
|
stderr_handle = open(stderr_file, 'w', encoding='utf-8')
|
|
files_to_close.append(stderr_handle)
|
|
else:
|
|
stderr_handle = subprocess.PIPE
|
|
|
|
# Create preexec function to set resource limits (DoS protection)
|
|
def set_resource_limits():
|
|
"""Apply resource limits to prevent DoS attacks."""
|
|
# Memory limit
|
|
mem_limit_mb = self.valves.exec_memory_limit_mb
|
|
if mem_limit_mb > 0:
|
|
mem_limit_bytes = mem_limit_mb * 1024 * 1024
|
|
try:
|
|
resource.setrlimit(resource.RLIMIT_AS, (mem_limit_bytes, mem_limit_bytes))
|
|
except (ValueError, resource.error):
|
|
pass # May fail on some systems
|
|
|
|
# CPU time limit
|
|
cpu_limit = self.valves.exec_cpu_limit_seconds
|
|
if cpu_limit > 0:
|
|
try:
|
|
resource.setrlimit(resource.RLIMIT_CPU, (cpu_limit, cpu_limit))
|
|
except (ValueError, resource.error):
|
|
pass # May fail on some systems
|
|
|
|
result = subprocess.run(
|
|
full_cmd,
|
|
cwd=str(cwd),
|
|
stdout=stdout_handle,
|
|
stderr=stderr_handle,
|
|
text=True,
|
|
timeout=timeout,
|
|
preexec_fn=set_resource_limits,
|
|
)
|
|
|
|
# Close files before reading them
|
|
for f in files_to_close:
|
|
f.close()
|
|
files_to_close = []
|
|
|
|
# Get stdout content
|
|
if stdout_file:
|
|
stdout = f"[Output written to {stdout_file.name}]"
|
|
stdout_truncated = False
|
|
else:
|
|
# Truncate stdout if too long (prevents context pollution)
|
|
if max_output is None:
|
|
effective_max = self.valves.max_output_default
|
|
elif max_output == 0:
|
|
effective_max = self.valves.max_output_absolute
|
|
else:
|
|
effective_max = min(max_output, self.valves.max_output_absolute)
|
|
|
|
stdout = result.stdout or ""
|
|
stdout_truncated = False
|
|
if len(stdout) > effective_max:
|
|
stdout = stdout[:effective_max] + f"\n\n... [TRUNCATED - {len(result.stdout)} bytes total, showing first {effective_max}] ..."
|
|
stdout_truncated = True
|
|
|
|
# Get stderr content
|
|
if stderr_file:
|
|
stderr = f"[Errors written to {stderr_file.name}]"
|
|
stderr_truncated = False
|
|
elif redirect_stderr_to_stdout:
|
|
stderr = ""
|
|
stderr_truncated = False
|
|
else:
|
|
if max_output is None:
|
|
effective_max = self.valves.max_output_default
|
|
elif max_output == 0:
|
|
effective_max = self.valves.max_output_absolute
|
|
else:
|
|
effective_max = min(max_output, self.valves.max_output_absolute)
|
|
|
|
stderr = result.stderr or ""
|
|
stderr_truncated = False
|
|
if len(stderr) > effective_max:
|
|
stderr = stderr[:effective_max] + f"\n\n... [TRUNCATED - {len(result.stderr)} bytes total, showing first {effective_max}] ..."
|
|
stderr_truncated = True
|
|
|
|
response = {
|
|
"success": result.returncode == 0,
|
|
"cmd": cmd,
|
|
"args": [str(a) for a in args],
|
|
"stdout": stdout,
|
|
"stderr": stderr,
|
|
"returncode": result.returncode,
|
|
}
|
|
|
|
# Add file info if redirected
|
|
if stdout_file:
|
|
response["stdout_file"] = stdout_file.name
|
|
if stderr_file:
|
|
response["stderr_file"] = stderr_file.name
|
|
|
|
if (not stdout_file and stdout_truncated) or (not stderr_file and not redirect_stderr_to_stdout and stderr_truncated):
|
|
response["truncated"] = True
|
|
response["hint"] = "Output was truncated. Use stdout_file= to save full output to a file."
|
|
|
|
return response
|
|
|
|
except subprocess.TimeoutExpired:
|
|
raise StorageError(
|
|
"TIMEOUT",
|
|
f"Command timeout after {timeout}s",
|
|
{"command": cmd, "timeout": timeout},
|
|
f"Increase timeout (max: {self.valves.exec_timeout_max}s)"
|
|
)
|
|
except FileNotFoundError:
|
|
raise StorageError(
|
|
"COMMAND_NOT_FOUND",
|
|
f"Command '{cmd}' not found on system",
|
|
{"command": cmd},
|
|
"Use shed_allowed_commands() to see available commands"
|
|
)
|
|
except Exception as e:
|
|
raise StorageError(
|
|
"EXEC_ERROR",
|
|
f"Execution error: {str(e)}",
|
|
{"command": cmd, "error": str(e)}
|
|
)
|
|
finally:
|
|
# Ensure files are closed on error
|
|
for f in files_to_close:
|
|
try:
|
|
f.close()
|
|
except OSError:
|
|
pass
|
|
|
|
def _ensure_dir(self, path: Path) -> None:
|
|
"""Creates a directory and its parents if needed."""
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
|
|
def _rm_with_empty_parents(self, filepath: Path, stop_at: Path) -> None:
|
|
"""Deletes a file then walks up deleting empty folders."""
|
|
if filepath.exists():
|
|
if filepath.is_dir():
|
|
shutil.rmtree(filepath)
|
|
else:
|
|
filepath.unlink()
|
|
|
|
# Walk up and delete empty folders
|
|
parent = filepath.parent
|
|
stop_at_resolved = stop_at.resolve()
|
|
|
|
while parent.resolve() != stop_at_resolved:
|
|
try:
|
|
parent.rmdir() # Fails if not empty
|
|
parent = parent.parent
|
|
except OSError:
|
|
break
|
|
|
|
def _get_lock_path(self, zone_root: Path, relative_path: str) -> Path:
|
|
"""Returns the lock file path."""
|
|
return zone_root / "locks" / (relative_path + ".lock")
|
|
|
|
def _get_editzone_path(self, zone_root: Path, conv_id: str, relative_path: str) -> Path:
|
|
"""Returns the path in editzone."""
|
|
return zone_root / "editzone" / conv_id / relative_path
|
|
|
|
def _acquire_lock(self, lock_path: Path, conv_id: str, user_id: str, path: str) -> None:
|
|
"""
|
|
Atomically acquires a lock file.
|
|
Uses exclusive file creation to prevent race conditions (TOCTOU).
|
|
Raises FILE_LOCKED if already locked by another conversation.
|
|
"""
|
|
self._ensure_dir(lock_path.parent)
|
|
|
|
lock_data = {
|
|
"conv_id": conv_id,
|
|
"user_id": user_id,
|
|
"locked_at": datetime.now(timezone.utc).isoformat(),
|
|
"path": path,
|
|
}
|
|
|
|
# Check if lock exists and is from another conversation
|
|
if lock_path.exists():
|
|
try:
|
|
existing_lock = json.loads(lock_path.read_text())
|
|
if existing_lock.get("conv_id") != conv_id:
|
|
raise StorageError(
|
|
"FILE_LOCKED",
|
|
f"File locked by another conversation",
|
|
{
|
|
"locked_by": existing_lock.get("user_id"),
|
|
"locked_at": existing_lock.get("locked_at"),
|
|
"conv_id": existing_lock.get("conv_id"),
|
|
"path": existing_lock.get("path"),
|
|
},
|
|
"Wait or use shed_force_unlock() / shed_maintenance()"
|
|
)
|
|
# Same conversation - can proceed (re-lock)
|
|
lock_path.write_text(json.dumps(lock_data, indent=2))
|
|
return
|
|
except json.JSONDecodeError:
|
|
# Corrupted lock - overwrite it
|
|
pass
|
|
|
|
# Try atomic creation with exclusive mode
|
|
try:
|
|
# os.open with O_CREAT | O_EXCL is atomic
|
|
fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
|
|
try:
|
|
os.write(fd, json.dumps(lock_data, indent=2).encode('utf-8'))
|
|
finally:
|
|
os.close(fd)
|
|
except FileExistsError:
|
|
# Race condition: another process created the lock between our check and create
|
|
# Re-read and check
|
|
try:
|
|
existing_lock = json.loads(lock_path.read_text())
|
|
if existing_lock.get("conv_id") != conv_id:
|
|
raise StorageError(
|
|
"FILE_LOCKED",
|
|
f"File locked by another conversation",
|
|
{
|
|
"locked_by": existing_lock.get("user_id"),
|
|
"locked_at": existing_lock.get("locked_at"),
|
|
"conv_id": existing_lock.get("conv_id"),
|
|
},
|
|
"Wait or use shed_force_unlock() / shed_maintenance()"
|
|
)
|
|
except (json.JSONDecodeError, FileNotFoundError):
|
|
# Lock was corrupted or removed - try again
|
|
lock_path.write_text(json.dumps(lock_data, indent=2))
|
|
|
|
def _check_lock_owner(self, lock_path: Path, user_id: str) -> None:
|
|
"""
|
|
Verifies that the current user owns the lock.
|
|
Raises NOT_LOCK_OWNER if not.
|
|
"""
|
|
if lock_path.exists():
|
|
try:
|
|
lock_data = json.loads(lock_path.read_text())
|
|
if lock_data.get("user_id") != user_id:
|
|
raise StorageError(
|
|
"NOT_LOCK_OWNER",
|
|
"You don't own this lock",
|
|
{"locked_by": lock_data.get("user_id"), "your_id": user_id},
|
|
"Only the user who opened the file can save/cancel"
|
|
)
|
|
except json.JSONDecodeError:
|
|
pass # Corrupted lock, allow operation
|
|
|
|
def _validate_content_size(self, content: str) -> None:
|
|
"""Checks that content doesn't exceed max size."""
|
|
max_bytes = self.valves.max_file_size_mb * 1024 * 1024
|
|
if len(content.encode('utf-8')) > max_bytes:
|
|
raise StorageError(
|
|
"FILE_TOO_LARGE",
|
|
f"Content too large ({len(content.encode('utf-8')) / 1024 / 1024:.2f} MB)",
|
|
{"max_mb": self.valves.max_file_size_mb},
|
|
f"Max size is {self.valves.max_file_size_mb} MB"
|
|
)
|
|
|
|
def _get_user_usage(self, user_root: Path) -> int:
|
|
"""Calculate total size of user's personal space in bytes."""
|
|
total = 0
|
|
for zone in ["Uploads", "Storage/data", "Documents/data"]:
|
|
zone_path = user_root / zone
|
|
if zone_path.exists():
|
|
for f in zone_path.rglob("*"):
|
|
if f.is_file():
|
|
try:
|
|
total += f.stat().st_size
|
|
except (OSError, FileNotFoundError):
|
|
pass
|
|
return total
|
|
|
|
def _get_path_size(self, path: Path) -> int:
|
|
"""Calculate size of a file or directory in bytes."""
|
|
if not path.exists():
|
|
return 0
|
|
if path.is_file():
|
|
return path.stat().st_size
|
|
# Directory: sum all files recursively
|
|
total = 0
|
|
for f in path.rglob("*"):
|
|
if f.is_file():
|
|
try:
|
|
total += f.stat().st_size
|
|
except (OSError, FileNotFoundError):
|
|
pass
|
|
return total
|
|
|
|
def _check_quota(self, __user__: dict, additional_bytes: int) -> None:
|
|
"""
|
|
Checks if adding additional_bytes would exceed user quota.
|
|
Raises QUOTA_EXCEEDED if quota would be exceeded.
|
|
"""
|
|
user_root = self._get_user_root(__user__)
|
|
current_usage = self._get_user_usage(user_root)
|
|
quota_bytes = self.valves.quota_per_user_mb * 1024 * 1024
|
|
|
|
if current_usage + additional_bytes > quota_bytes:
|
|
raise StorageError(
|
|
"QUOTA_EXCEEDED",
|
|
f"Quota exceeded: {current_usage / 1024 / 1024:.1f} MB used + {additional_bytes / 1024 / 1024:.1f} MB requested > {self.valves.quota_per_user_mb} MB quota",
|
|
{
|
|
"current_usage_mb": round(current_usage / 1024 / 1024, 2),
|
|
"requested_mb": round(additional_bytes / 1024 / 1024, 2),
|
|
"quota_mb": self.valves.quota_per_user_mb,
|
|
},
|
|
"Free up space or contact administrator to increase quota"
|
|
)
|
|
|
|
def _check_group_quota(self, group_id: str, additional_bytes: int) -> None:
|
|
"""
|
|
Checks if adding additional_bytes would exceed group quota.
|
|
Raises QUOTA_EXCEEDED if quota would be exceeded.
|
|
"""
|
|
group_data_path = self._get_groups_root() / group_id / "data"
|
|
current_usage = self._get_path_size(group_data_path)
|
|
quota_bytes = self.valves.quota_per_group_mb * 1024 * 1024
|
|
|
|
if current_usage + additional_bytes > quota_bytes:
|
|
raise StorageError(
|
|
"QUOTA_EXCEEDED",
|
|
f"Group quota exceeded: {current_usage / 1024 / 1024:.1f} MB used + {additional_bytes / 1024 / 1024:.1f} MB requested > {self.valves.quota_per_group_mb} MB quota",
|
|
{
|
|
"group_id": group_id,
|
|
"current_usage_mb": round(current_usage / 1024 / 1024, 2),
|
|
"requested_mb": round(additional_bytes / 1024 / 1024, 2),
|
|
"quota_mb": self.valves.quota_per_group_mb,
|
|
},
|
|
"Free up space in group or contact administrator to increase quota"
|
|
)
|
|
|
|
def _git_run(self, args: list, cwd: Path, timeout: int = 30) -> subprocess.CompletedProcess:
|
|
"""
|
|
Layer 2: Executes a git command.
|
|
All git operations MUST use this method for consistency.
|
|
Includes timeout and error handling.
|
|
"""
|
|
try:
|
|
result = subprocess.run(
|
|
["git"] + args,
|
|
cwd=str(cwd),
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout,
|
|
)
|
|
return result
|
|
except subprocess.TimeoutExpired:
|
|
raise StorageError(
|
|
"TIMEOUT",
|
|
f"Git command timed out after {timeout}s",
|
|
{"command": ["git"] + args},
|
|
"Try a simpler operation or increase timeout"
|
|
)
|
|
|
|
def _check_command_available(self, cmd: str) -> bool:
|
|
"""
|
|
Layer 2: Checks if a command is available on the system.
|
|
Used for introspection (shed_allowed_commands).
|
|
"""
|
|
try:
|
|
result = subprocess.run(
|
|
["which", cmd],
|
|
capture_output=True,
|
|
timeout=5,
|
|
)
|
|
return result.returncode == 0
|
|
except Exception:
|
|
return False
|
|
|
|
def _init_git_repo(self, repo_path: Path) -> None:
|
|
"""Initializes a Git repository if needed."""
|
|
git_dir = repo_path / ".git"
|
|
if not git_dir.exists():
|
|
self._ensure_dir(repo_path)
|
|
self._git_run(["init"], repo_path)
|
|
self._git_run(["config", "user.email", "storage@openwebui.local"], repo_path)
|
|
self._git_run(["config", "user.name", "Fileshed"], repo_path)
|
|
# Security: disable hooks to prevent code execution via malicious repos
|
|
self._neutralize_git_hooks(repo_path)
|
|
|
|
def _neutralize_git_hooks(self, repo_path: Path) -> None:
|
|
"""
|
|
Neutralizes Git hooks to prevent arbitrary code execution.
|
|
This is critical when cloning untrusted repositories.
|
|
"""
|
|
hooks_path = repo_path / ".git" / "hooks"
|
|
if hooks_path.exists():
|
|
# Remove all hook files (they could be malicious)
|
|
import shutil
|
|
shutil.rmtree(hooks_path, ignore_errors=True)
|
|
# Recreate empty hooks directory
|
|
hooks_path.mkdir(exist_ok=True)
|
|
# Configure git to use empty hooks path (defense in depth)
|
|
self._git_run(["config", "core.hooksPath", "/dev/null"], repo_path)
|
|
|
|
def _git_commit(self, repo_path: Path, message: str) -> None:
|
|
"""Performs a Git commit."""
|
|
self._git_run(["add", "-A"], repo_path)
|
|
self._git_run(["commit", "-m", message, "--allow-empty-message"], repo_path)
|
|
|
|
def _git_commit_as_user(self, repo_path: Path, message: str, user_id: str) -> None:
|
|
"""Performs a Git commit with user as author."""
|
|
self._git_run(["add", "-A"], repo_path)
|
|
author = f"{user_id} <{user_id}@fileshed>"
|
|
self._git_run(["commit", "--author", author, "-m", message, "--allow-empty-message"], repo_path)
|
|
|
|
# =========================================================================
|
|
# GROUP HELPERS
|
|
# =========================================================================
|
|
|
|
def _init_db(self) -> None:
|
|
"""Initialize SQLite database if needed."""
|
|
if self._db_initialized:
|
|
return
|
|
|
|
db_path = self._get_db_path()
|
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
conn = sqlite3.connect(str(db_path), timeout=10.0)
|
|
try:
|
|
conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS file_ownership (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
group_id TEXT NOT NULL,
|
|
file_path TEXT NOT NULL,
|
|
owner_id TEXT NOT NULL,
|
|
write_access TEXT NOT NULL CHECK(write_access IN ('owner', 'group', 'owner_ro')),
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(group_id, file_path)
|
|
)
|
|
""")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_ownership_group ON file_ownership(group_id)")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_ownership_owner ON file_ownership(owner_id)")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_ownership_path ON file_ownership(group_id, file_path)")
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
self._db_initialized = True
|
|
|
|
def _db_execute(self, query: str, params: tuple = ()) -> tuple:
|
|
"""
|
|
Execute a database query.
|
|
Returns (rows, rowcount) tuple:
|
|
- rows: list of Row objects for SELECT, empty list for others
|
|
- rowcount: number of affected rows for INSERT/UPDATE/DELETE
|
|
"""
|
|
self._init_db()
|
|
conn = sqlite3.connect(str(self._get_db_path()), timeout=10.0, isolation_level="IMMEDIATE")
|
|
conn.row_factory = sqlite3.Row
|
|
try:
|
|
cursor = conn.execute(query, params)
|
|
result = cursor.fetchall()
|
|
rowcount = cursor.rowcount
|
|
conn.commit()
|
|
return result, rowcount
|
|
finally:
|
|
conn.close()
|
|
|
|
def _get_user_groups(self, user_id: str) -> list:
|
|
"""Get groups the user belongs to via Open WebUI API."""
|
|
if not GROUPS_AVAILABLE:
|
|
return []
|
|
try:
|
|
return Groups.get_groups_by_member_id(user_id)
|
|
except Exception:
|
|
return []
|
|
|
|
def _is_group_member(self, user_id: str, group_id: str) -> bool:
|
|
"""Check if user is member of group."""
|
|
user_groups = self._get_user_groups(user_id)
|
|
return any(g.id == group_id for g in user_groups)
|
|
|
|
def _check_group_access(self, __user__: dict, group_id: str) -> None:
|
|
"""Verify user has access to group. Raises error if not."""
|
|
if not GROUPS_AVAILABLE:
|
|
raise StorageError(
|
|
"GROUP_NOT_AVAILABLE",
|
|
"Group features are not available",
|
|
hint="Open WebUI Groups API not found"
|
|
)
|
|
|
|
user_id = __user__.get("id", "")
|
|
if not self._is_group_member(user_id, group_id):
|
|
raise StorageError(
|
|
"GROUP_ACCESS_DENIED",
|
|
f"You are not a member of group '{group_id}'",
|
|
{"group_id": group_id},
|
|
"Request access from a group administrator"
|
|
)
|
|
|
|
def _ensure_group_space(self, group_id: str) -> Path:
|
|
"""Ensure group space exists. Returns data path."""
|
|
group_path = self._get_groups_root() / group_id
|
|
data_path = group_path / "data"
|
|
|
|
if not data_path.exists():
|
|
data_path.mkdir(parents=True, exist_ok=True)
|
|
(group_path / "editzone").mkdir(exist_ok=True)
|
|
(group_path / "locks").mkdir(exist_ok=True)
|
|
|
|
# Initialize Git repository via Layer 2
|
|
self._git_run(["init"], data_path)
|
|
self._git_run(["config", "user.email", "storage@openwebui"], data_path)
|
|
self._git_run(["config", "user.name", "Fileshed"], data_path)
|
|
|
|
return data_path
|
|
|
|
def _get_group_data_path(self, group_id: str) -> Path:
|
|
"""Get data path for a group."""
|
|
return self._get_groups_root() / group_id / "data"
|
|
|
|
def _get_group_editzone(self, group_id: str, conv_id: str) -> Path:
|
|
"""Get editzone path for a group."""
|
|
return self._get_groups_root() / group_id / "editzone" / conv_id
|
|
|
|
def _get_group_locks_dir(self, group_id: str) -> Path:
|
|
"""Get locks directory for a group."""
|
|
return self._get_groups_root() / group_id / "locks"
|
|
|
|
def _get_file_ownership(self, group_id: str, file_path: str) -> Optional[dict]:
|
|
"""Get ownership info for a file in group space."""
|
|
rows, _ = self._db_execute(
|
|
"SELECT owner_id, write_access FROM file_ownership WHERE group_id = ? AND file_path = ?",
|
|
(group_id, file_path)
|
|
)
|
|
if rows:
|
|
return {"owner_id": rows[0]["owner_id"], "write_access": rows[0]["write_access"]}
|
|
return None
|
|
|
|
def _set_file_ownership(self, group_id: str, file_path: str, owner_id: str, write_access: str) -> None:
|
|
"""Set or update ownership for a file."""
|
|
self._db_execute("""
|
|
INSERT INTO file_ownership (group_id, file_path, owner_id, write_access)
|
|
VALUES (?, ?, ?, ?)
|
|
ON CONFLICT(group_id, file_path) DO UPDATE SET
|
|
owner_id = excluded.owner_id,
|
|
write_access = excluded.write_access,
|
|
updated_at = CURRENT_TIMESTAMP
|
|
""", (group_id, file_path, owner_id, write_access))
|
|
|
|
def _delete_file_ownership(self, group_id: str, file_path: str) -> None:
|
|
"""Delete ownership record for a file."""
|
|
self._db_execute(
|
|
"DELETE FROM file_ownership WHERE group_id = ? AND file_path = ?",
|
|
(group_id, file_path)
|
|
)
|
|
|
|
def _delete_file_ownership_recursive(self, group_id: str, dir_path: str) -> int:
|
|
"""
|
|
Delete ownership records for a directory and all its contents.
|
|
Returns the number of records deleted.
|
|
"""
|
|
# Delete exact match and all paths starting with dir_path/
|
|
if dir_path:
|
|
_, rowcount = self._db_execute(
|
|
"DELETE FROM file_ownership WHERE group_id = ? AND (file_path = ? OR file_path LIKE ?)",
|
|
(group_id, dir_path, dir_path + "/%")
|
|
)
|
|
else:
|
|
# Empty path means root - delete all for this group
|
|
_, rowcount = self._db_execute(
|
|
"DELETE FROM file_ownership WHERE group_id = ?",
|
|
(group_id,)
|
|
)
|
|
return rowcount if rowcount > 0 else 0
|
|
|
|
def _update_file_ownership_paths(self, group_id: str, old_path: str, new_path: str) -> None:
|
|
"""
|
|
Update ownership records when a directory is renamed.
|
|
Updates the path itself and all paths under it.
|
|
"""
|
|
# Update exact match
|
|
self._db_execute(
|
|
"UPDATE file_ownership SET file_path = ?, updated_at = CURRENT_TIMESTAMP WHERE group_id = ? AND file_path = ?",
|
|
(new_path, group_id, old_path)
|
|
)
|
|
|
|
# Update all paths under old_path/
|
|
# SQLite: replace the prefix old_path/ with new_path/
|
|
old_prefix = old_path + "/"
|
|
new_prefix = new_path + "/"
|
|
self._db_execute("""
|
|
UPDATE file_ownership
|
|
SET file_path = ? || SUBSTR(file_path, ?),
|
|
updated_at = CURRENT_TIMESTAMP
|
|
WHERE group_id = ? AND file_path LIKE ?
|
|
""", (new_prefix, len(old_prefix) + 1, group_id, old_prefix + "%"))
|
|
|
|
def _can_write_group_file(self, group_id: str, file_path: str, user_id: str) -> tuple:
|
|
"""
|
|
Check if user can modify file in group space.
|
|
Returns (can_write: bool, error_code: str|None)
|
|
"""
|
|
ownership = self._get_file_ownership(group_id, file_path)
|
|
|
|
if ownership is None:
|
|
return True, None # New file, anyone can create
|
|
|
|
mode = ownership["write_access"]
|
|
owner = ownership["owner_id"]
|
|
|
|
if mode == "group":
|
|
return True, None
|
|
elif mode == "owner":
|
|
if user_id == owner:
|
|
return True, None
|
|
return False, "FILE_OWNER_ONLY"
|
|
elif mode == "owner_ro":
|
|
return False, "FILE_READ_ONLY"
|
|
|
|
return False, "PERMISSION_DENIED"
|
|
|
|
def _can_delete_group_file(self, group_id: str, file_path: str, user_id: str) -> tuple:
|
|
"""Check if user can delete file in group space."""
|
|
ownership = self._get_file_ownership(group_id, file_path)
|
|
|
|
if ownership is None:
|
|
return True, None # Untracked file
|
|
|
|
mode = ownership["write_access"]
|
|
owner = ownership["owner_id"]
|
|
|
|
if mode == "group":
|
|
return True, None # Everyone can delete
|
|
elif mode == "owner":
|
|
if user_id == owner:
|
|
return True, None # Owner can delete
|
|
return False, "FILE_OWNER_ONLY"
|
|
elif mode == "owner_ro":
|
|
return False, "FILE_READ_ONLY" # Nobody can delete, change mode first
|
|
|
|
return False, "PERMISSION_DENIED"
|
|
|
|
def _format_response(self, success: bool, data: Any = None, message: str = None) -> str:
|
|
"""Formats a JSON response."""
|
|
response = {"success": success}
|
|
if data is not None:
|
|
response["data"] = data
|
|
if message:
|
|
response["message"] = message
|
|
return json.dumps(response, indent=2, ensure_ascii=False)
|
|
|
|
def _format_error(self, e: StorageError, func_name: str = None) -> str:
|
|
"""Formats a StorageError with contextual help."""
|
|
function_help = self._get_function_help(func_name) if func_name else None
|
|
return json.dumps(e.to_dict(function_help), indent=2, ensure_ascii=False)
|
|
|
|
def _clamp_timeout(self, timeout: int = None) -> int:
|
|
"""Clamps timeout to configured values. Uses exec_timeout_default if not specified."""
|
|
if timeout is None:
|
|
timeout = self.valves.exec_timeout_default
|
|
return max(1, min(timeout, self.valves.exec_timeout_max))
|
|
|
|
# =========================================================================
|
|
# UPLOADS (2 functions)
|
|
# =========================================================================
|
|
|
|
def _get_howto_description(self, howto: str) -> str:
|
|
"""Get short description for a howto topic."""
|
|
descriptions = {
|
|
"download": "Download files with curl (to work with them locally)",
|
|
"csv_to_sqlite": "Import CSV/TSV data into SQLite (fast, no context pollution)",
|
|
"upload": "Handle user-uploaded files",
|
|
"share": "Create download links for files",
|
|
"edit": "Edit existing files safely",
|
|
"commands": "Available commands + workarounds when commands are missing",
|
|
"network": "Network access configuration",
|
|
"paths": "Path rules and examples",
|
|
"large_files": "Process large files WITHOUT loading into context",
|
|
"full": "Complete documentation (long)",
|
|
}
|
|
return descriptions.get(howto, "")
|
|
|
|
def _get_full_help(self) -> str:
|
|
"""Return the complete documentation."""
|
|
return """
|
|
# Fileshed - Documentation
|
|
|
|
## ⚠️ IMPORTANT: Only call shed_* functions!
|
|
|
|
```
|
|
✅ CORRECT: shed_exec(zone="storage", cmd="ls", args=["-la"])
|
|
✅ CORRECT: shed_exec(zone="storage", cmd="mkdir", args=["-p", "dir"])
|
|
✅ CORRECT: shed_patch_text(zone="storage", path="notes.txt", content="Hello")
|
|
✅ CORRECT: shed_sqlite(zone="storage", path="db.sqlite", query="SELECT * FROM t")
|
|
❌ WRONG: _exec_command(...) ← INTERNAL, will NOT work!
|
|
❌ WRONG: _validate_path(...) ← INTERNAL, will NOT work!
|
|
```
|
|
|
|
All methods starting with `_` are INTERNAL implementation details.
|
|
They are NOT callable. Only `shed_*` functions are available to you.
|
|
|
|
## IMPORTANT: FILE UPLOAD WORKFLOW
|
|
When a file is uploaded, you MUST follow these steps:
|
|
STEP 1: shed_import(import_all=True) <- Import ALL attached files
|
|
OR shed_import(filename="report.pdf") <- Import ONE specific file
|
|
STEP 2: shed_move_uploads_to_storage(src="filename", dest="filename")
|
|
NEVER skip step 1!
|
|
|
|
## PATH RULES - VERY IMPORTANT!
|
|
Each function works INSIDE its zone. Paths are relative to the zone root.
|
|
DO NOT include the zone name in paths!
|
|
|
|
CORRECT: shed_exec(zone="storage", cmd="cat", args=["projects/file.txt"])
|
|
WRONG: shed_exec(zone="storage", cmd="cat", args=["Storage/projects/file.txt"])
|
|
|
|
## DOWNLOAD LINKS
|
|
|
|
Three functions to create download links:
|
|
|
|
| Function | Description |
|
|
|----------|-------------|
|
|
| shed_link_create() | Create download link |
|
|
| shed_link_list() | List your download links |
|
|
| shed_link_delete() | Remove a download link |
|
|
|
|
### shed_link_create - Create a link
|
|
shed_link_create(zone="storage", path="report.pdf")
|
|
shed_link_create(zone="storage", path="data.zip")
|
|
|
|
Returns: {"download_url": "/api/v1/files/{id}/content", "file_id": "..."}
|
|
The user can click the download_url to download the file.
|
|
|
|
### shed_link_list - List links
|
|
shed_link_list()
|
|
|
|
Returns list of all download links you've created with file_id, filename, download_url.
|
|
|
|
### shed_link_delete - Remove a download link
|
|
shed_link_delete(file_id="abc123-...")
|
|
|
|
Removes file from Open WebUI. Use file_id from create or list.
|
|
|
|
## HOW TO DO COMMON OPERATIONS
|
|
|
|
⚠️ **SHELL COMMANDS FIRST!** Use shed_exec() for ALL shell-doable operations.
|
|
Only use shed_patch_text() to CREATE or MODIFY file CONTENT.
|
|
|
|
| Operation | Method |
|
|
|--------------------|------------------------------------------------------------|
|
|
| **Create folder** | shed_exec(zone="storage", cmd="mkdir", args=["-p", "dir"])|
|
|
| Read file | shed_exec(zone="storage", cmd="cat", args=["file.txt"]) |
|
|
| Copy file | shed_exec(zone="storage", cmd="cp", args=["src", "dest"]) |
|
|
| Move/rename | shed_exec(zone="storage", cmd="mv", args=["old", "new"]) |
|
|
| Delete file | shed_exec(zone="storage", cmd="rm", args=["file.txt"]) |
|
|
| Delete folder | shed_exec(zone="storage", cmd="rm", args=["-rf", "dir"]) |
|
|
| List files | shed_exec(zone="storage", cmd="ls", args=["-la"]) |
|
|
| Search in file | shed_exec(zone="storage", cmd="grep", args=["pat", "f"]) |
|
|
| Count lines | shed_exec(zone="storage", cmd="wc", args=["-l", "file"]) |
|
|
| Git operations | shed_exec(zone="documents", cmd="git", args=["log"]) |
|
|
|
|
CONTENT OPERATIONS (use shed_patch_text only for these):
|
|
| Operation | Method |
|
|
|------------------------|---------------------------------------------------------|
|
|
| Create new file | shed_patch_text(zone, path, content, overwrite=True) |
|
|
| Append to file | shed_patch_text(zone, path, content, position="end") |
|
|
| Replace pattern | shed_patch_text(zone, path, content, pattern="...", position="replace") |
|
|
| Edit specific line | shed_patch_text(zone, path, content, line=5, position="replace") |
|
|
|
|
📌 Line numbers start at 1 (first line = line=1)
|
|
|
|
❌ WRONG: Using patch_text to create directories (via .keep files)
|
|
✓ CORRECT: Using shed_exec(cmd="mkdir", args=["-p", "dir"])
|
|
|
|
## ZONES
|
|
All operations use a zone parameter: "uploads", "storage", "documents", or "group"
|
|
- **Uploads** : Temporary import area (read + delete only)
|
|
- **Storage** : Permanent user space (all operations, no Git)
|
|
- **Documents** : Permanent + Git versioned (auto-commit)
|
|
- **Groups** : Shared collaborative spaces (requires group= parameter)
|
|
|
|
## UNIFIED FUNCTIONS
|
|
|
|
### Core Operations (all zones)
|
|
- shed_exec(zone, cmd, args, group?) : Execute shell commands
|
|
- shed_patch_text(zone, path, content, ..., group?, message?) : Edit text files
|
|
- shed_patch_bytes(zone, path, content, ..., group?, message?) : Edit binary files
|
|
- shed_delete(zone, path, group?, message?) : Delete files/folders
|
|
- shed_rename(zone, old_path, new_path, group?, message?) : Rename/move files
|
|
|
|
### Locked Editing (with locking)
|
|
- shed_lockedit_open(zone, path, group?) : Lock + get content
|
|
- shed_lockedit_exec(zone, path, cmd, args, group?): Execute on working copy
|
|
- shed_lockedit_overwrite(zone, path, content, group?) : Write to working copy
|
|
- shed_lockedit_save(zone, path, group?, message?) : Save + unlock + commit
|
|
- shed_lockedit_cancel(zone, path, group?) : Discard + unlock
|
|
|
|
### Import (from chat)
|
|
- shed_import(import_all=True) : Import all attached files
|
|
- shed_import(filename="file.txt") : Import one specific file
|
|
|
|
### Bridges (between zones)
|
|
- shed_move_uploads_to_storage(src, dest)
|
|
- shed_move_uploads_to_documents(src, dest, message)
|
|
- shed_copy_storage_to_documents(src, dest, message)
|
|
- shed_move_documents_to_storage(src, dest, message)
|
|
- shed_copy_to_group(src_zone, src_path, group, dest_path, message, mode)
|
|
|
|
## GROUP-SPECIFIC FUNCTIONS
|
|
|
|
### Discovery
|
|
- shed_group_list() : List groups you belong to
|
|
- shed_group_info(group) : Show files, ownership, stats
|
|
|
|
### Ownership Management
|
|
- shed_group_set_mode(group, path, mode) : Change write mode
|
|
- shed_group_chown(group, path, new_owner): Transfer ownership
|
|
|
|
## WRITE MODES (Groups)
|
|
- **group** : Anyone in group can modify (default)
|
|
- **owner** : Only owner can modify
|
|
- **owner_ro** : Read-only for everyone (owner can delete)
|
|
|
|
## DOWNLOAD LINKS (3 functions)
|
|
- shed_link_create(zone, path, group?) : Create download link
|
|
- shed_link_list() : List links
|
|
- shed_link_delete(file_id) : Remove download link
|
|
|
|
Examples:
|
|
shed_link_create(zone="storage", path="report.pdf") # Returns download link
|
|
shed_link_list() # List your download links
|
|
shed_link_delete(file_id="abc123-...") # Remove a download link
|
|
|
|
## UTILITIES (6 functions)
|
|
- shed_help()
|
|
- shed_stats() : Show storage usage and quotas
|
|
- shed_parameters() : Show valve configuration (network_mode, quotas, etc.)
|
|
- shed_allowed_commands()
|
|
- shed_force_unlock(path, zone) : Unlock in personal zone (storage/documents)
|
|
- shed_force_unlock(path, group=id) : Unlock in group zone
|
|
- shed_maintenance()
|
|
|
|
## ANSWERING CONFIGURATION QUESTIONS
|
|
When user asks about configuration, call shed_parameters() first:
|
|
- "Is network enabled?" -> shed_parameters() -> check network_mode
|
|
- "Can I use curl?" -> shed_parameters() -> network_mode in ("safe", "all")
|
|
- "Can I git push?" -> shed_parameters() -> network_mode == "all"
|
|
- "What's my quota?" -> shed_parameters() -> quota_per_user_mb
|
|
- "Max file size?" -> shed_parameters() -> max_file_size_mb
|
|
|
|
## BUILTINS (8 functions - see below)
|
|
|
|
## BUILTIN FUNCTIONS (always available, no container dependency)
|
|
|
|
Unlike shed_*_exec() which needs external commands, builtins use Python:
|
|
|
|
| Function | Description |
|
|
|----------|-------------|
|
|
| shed_unzip(zone, src, dest) | Extract ZIP (replaces `unzip` command) |
|
|
| shed_zip(zone, src, dest, include_empty_dirs) | Create ZIP (replaces `zip` command) |
|
|
| shed_tree(zone, path, depth) | Directory tree (replaces `tree` command) |
|
|
| shed_zipinfo(zone, path) | ZIP info (replaces `zipinfo` command) |
|
|
| shed_file_type(zone, path) | File MIME type (replaces `file` command) |
|
|
| shed_convert_eol(zone, path, to) | Line endings (replaces `dos2unix`/`unix2dos`) |
|
|
| shed_hexdump(zone, path, offset, length) | Hex dump (replaces `xxd`/`hexdump`) |
|
|
| shed_sqlite(zone, path, query, params, group) | SQLite queries (replaces `sqlite3` command) |
|
|
| shed_sqlite(zone, path, import_csv, table, ...) | CSV import with auto-detection (delimiter, encoding, dates) |
|
|
|
|
Examples:
|
|
shed_unzip(zone="storage", src="repo.zip", dest="repo")
|
|
shed_zip(zone="storage", src="projects/app", dest="app.zip")
|
|
shed_zip(zone="storage", src="projects", dest="backup.zip", include_empty_dirs=True) # preserve empty dirs
|
|
shed_tree(zone="storage", path="projects", depth=2)
|
|
shed_zipinfo(zone="storage", path="backup.zip")
|
|
shed_file_type(zone="uploads", path="document.pdf")
|
|
shed_convert_eol(zone="storage", path="script.sh", to="unix")
|
|
shed_hexdump(zone="storage", path="binary.dat", length=128)
|
|
|
|
# SQLite queries
|
|
shed_sqlite(zone="storage", path="data.db", query="SELECT * FROM users")
|
|
shed_sqlite(zone="storage", path="data.db", query="INSERT INTO users VALUES (?, ?)", params=["Alice", "alice@ex.com"])
|
|
|
|
# CSV import (auto-detects delimiter and encoding)
|
|
shed_sqlite(zone="storage", path="data.db", import_csv="users.csv", table="users")
|
|
|
|
# European CSV (semicolon, comma decimal, latin-1)
|
|
shed_sqlite(zone="storage", path="data.db", import_csv="french.csv", table="ventes",
|
|
delimiter=";", decimal=",", encoding="latin-1")
|
|
|
|
# With date parsing (European DD/MM/YYYY)
|
|
shed_sqlite(zone="storage", path="data.db", import_csv="orders.csv", table="orders",
|
|
date_columns=["order_date"], date_format="dayfirst")
|
|
|
|
## ALLOWED COMMANDS
|
|
Use shed_allowed_commands() to see available commands.
|
|
|
|
## NETWORK ACCESS
|
|
Network is disabled by default. Admin can configure via the network_mode valve:
|
|
|
|
| network_mode | Effect |
|
|
|--------------|--------|
|
|
| "disabled" (default) | [X] No network. curl/wget blocked. ffmpeg blocked. URLs blocked in all commands. |
|
|
| "safe" | [OK] Downloads only. curl/wget GET, git clone/fetch/pull, ffmpeg (input only), pandoc/convert with URLs. |
|
|
| "all" | [!] Full access including upload. curl POST, git push, ffmpeg streaming output. EXFILTRATION RISK! |
|
|
|
|
Network-capable commands:
|
|
- **ffmpeg**: Blocked if disabled. In "safe" mode, output protocols (rtmp, udp, tcp, etc.) are blocked.
|
|
- **ffprobe, pandoc, convert, identify**: Local files always ok. URLs require "safe" or "all".
|
|
- **curl, wget**: Require "safe" (GET only) or "all" (all methods).
|
|
- **git**: clone/fetch/pull require "safe". push requires "all".
|
|
|
|
## DOWNLOADING FILES - IMPORTANT!
|
|
When network_mode is "safe" or "all", ALWAYS use curl via shed_*_exec() instead of fetch_url tool:
|
|
|
|
shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "data.csv", "https://example.com/data.csv"])
|
|
|
|
REQUIRED: The -o flag is MANDATORY for curl/wget. Omitting it returns an error.
|
|
This prevents context pollution when downloading large files.
|
|
|
|
Why curl instead of fetch_url?
|
|
1. **Complete download**: curl downloads the FULL file. fetch_url TRUNCATES large content.
|
|
2. **No context pollution**: File is saved to disk, not dumped into conversation context.
|
|
3. **Post-processing**: You can then unzip, grep, head, tail, awk the file as needed.
|
|
|
|
Examples:
|
|
# Download a CSV (extension known)
|
|
shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "data.csv", "https://example.com/data.csv"])
|
|
|
|
# Download and unzip
|
|
shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "repo.zip", "https://github.com/.../archive.zip"])
|
|
shed_unzip(zone="storage", src="repo.zip", dest="repo")
|
|
|
|
# Download then inspect
|
|
shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "big.json", "https://api.example.com/data"])
|
|
shed_exec(zone="storage", cmd="head", args=["-100", "big.json"]) # First 100 lines
|
|
shed_exec(zone="storage", cmd="jq", args=[".", "big.json"]) # Parse JSON
|
|
|
|
## DOWNLOAD WITH UNKNOWN EXTENSION
|
|
When the URL doesn't reveal the file type (e.g., Google Drive, API endpoints):
|
|
|
|
# Step 1: Download with temporary extension
|
|
shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "sample.tmp", "https://drive.google.com/uc?id=...&export=download"])
|
|
|
|
# Step 2: Detect the actual file type
|
|
shed_file_type(zone="storage", path="sample.tmp")
|
|
# Returns: {"mime_type": "text/csv", "extension": ".csv"}
|
|
|
|
# Step 3: Rename with correct extension
|
|
shed_rename(zone="storage", old_path="sample.tmp", new_path="sample.csv")
|
|
|
|
This workflow ensures files get proper extensions even from URLs that hide the file type.
|
|
|
|
Note: curl runs with -sS automatically (silent + show-error, no progress bar spam).
|
|
Note: stdout/stderr are truncated at 50KB to prevent context overflow.
|
|
|
|
## FORBIDDEN ARGUMENTS
|
|
- Shell metacharacters: ; | && & > >> $( `
|
|
- URLs: blocked unless network_mode is "safe" or "all" for network-capable commands
|
|
- find: -exec, -execdir, -ok, -okdir (can execute commands)
|
|
- awk: system(), getline pipes (can execute commands)
|
|
|
|
## REMOVED COMMANDS (security)
|
|
- xargs, timeout, env: can execute arbitrary commands
|
|
"""
|
|
|
|
|
|
|
|
# =========================================================================
|
|
# INTERNAL IMPLEMENTATION METHODS FOR patch_* FUNCTIONS
|
|
# =========================================================================
|
|
|
|
async def _patch_text_impl(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
content: str,
|
|
position: str,
|
|
line: int,
|
|
end_line: int,
|
|
pattern: str,
|
|
regex_flags: str,
|
|
match_all: bool,
|
|
overwrite: bool,
|
|
safe: bool,
|
|
group: str,
|
|
message: str,
|
|
mode: str,
|
|
allow_zone_in_path: bool,
|
|
__user__: dict,
|
|
__metadata__: dict,
|
|
) -> str:
|
|
"""Internal implementation for text file patching."""
|
|
user_id = __user__.get("id", "")
|
|
conv_id = self._get_conv_id(__metadata__)
|
|
zone_lower = zone.lower()
|
|
|
|
# === ZONE RESOLUTION ===
|
|
user_root = self._get_user_root(__user__)
|
|
git_commit = False
|
|
group_id = None
|
|
zone_name = None # For zone prefix validation
|
|
|
|
if zone_lower == "storage":
|
|
zone_root = user_root / "Storage" / "data"
|
|
editzone_base = user_root / "Storage"
|
|
zone_name = "Storage"
|
|
elif zone_lower == "documents":
|
|
zone_root = user_root / "Documents" / "data"
|
|
editzone_base = user_root / "Documents"
|
|
git_commit = True
|
|
zone_name = "Documents"
|
|
self._init_git_repo(zone_root)
|
|
elif zone_lower == "group":
|
|
if not group:
|
|
raise StorageError("MISSING_PARAMETER", "Group parameter required")
|
|
group_id = self._validate_group_id(group)
|
|
self._check_group_access(__user__, group_id)
|
|
zone_root = self._ensure_group_space(group_id)
|
|
editzone_base = self._get_groups_root() / group_id
|
|
git_commit = True
|
|
zone_name = f"Group:{group_id}"
|
|
else:
|
|
raise StorageError("ZONE_FORBIDDEN", f"Invalid zone: {zone}")
|
|
|
|
self._ensure_dir(zone_root)
|
|
path = self._validate_relative_path(path, zone_name, allow_zone_in_path)
|
|
target_path = self._resolve_chroot_path(zone_root, path)
|
|
|
|
# === PERMISSION CHECK (groups) ===
|
|
if group_id:
|
|
can_write, error = self._can_write_group_file(group_id, path, user_id)
|
|
if not can_write and target_path.exists():
|
|
raise StorageError(error, f"Cannot write to file: {error}")
|
|
|
|
# === VALIDATE PARAMETERS ===
|
|
valid_positions = ("start", "end", "before", "after", "replace")
|
|
if position not in valid_positions:
|
|
hint = ""
|
|
if position == "overwrite":
|
|
hint = ". To overwrite entire file, use overwrite=True parameter instead"
|
|
elif position == "at":
|
|
hint = ". 'at' is for shed_patch_bytes (binary). For text, use 'before' or 'after' with line=N"
|
|
raise StorageError(
|
|
"INVALID_PARAMETER",
|
|
f"Invalid position: {position}. Valid: {', '.join(valid_positions)}{hint}"
|
|
)
|
|
|
|
# Treat 0 as None (LLMs sometimes pass 0 instead of omitting the parameter)
|
|
if line == 0:
|
|
line = None
|
|
if end_line == 0:
|
|
end_line = None
|
|
|
|
if not overwrite and position in ("before", "after", "replace"):
|
|
if line is None and pattern is None:
|
|
raise StorageError("MISSING_PARAMETER", f"Position '{position}' requires 'line' or 'pattern'")
|
|
|
|
if line is not None and line < 1:
|
|
raise StorageError("INVALID_PARAMETER", "Line must be >= 1 (first line is 1, not 0)")
|
|
|
|
if end_line is not None and position != "replace":
|
|
raise StorageError("INVALID_PARAMETER", "end_line only valid with position='replace'")
|
|
|
|
if end_line is not None and end_line < line:
|
|
raise StorageError("INVALID_PARAMETER", "end_line must be >= line")
|
|
|
|
# === COMPILE REGEX ===
|
|
compiled_pattern = None
|
|
if pattern is not None:
|
|
flags = 0
|
|
for c in regex_flags.lower():
|
|
if c == 'i': flags |= re.IGNORECASE
|
|
elif c == 'm': flags |= re.MULTILINE
|
|
elif c == 's': flags |= re.DOTALL
|
|
try:
|
|
compiled_pattern = re.compile(pattern, flags)
|
|
except re.error as e:
|
|
raise StorageError("INVALID_PARAMETER", f"Invalid regex: {e}")
|
|
|
|
# === CHECK FILE EXISTS ===
|
|
file_exists = target_path.exists()
|
|
file_created = False
|
|
|
|
if not file_exists:
|
|
if overwrite or position in ("start", "end"):
|
|
file_created = True
|
|
else:
|
|
raise StorageError("FILE_NOT_FOUND", f"File not found: {path}")
|
|
|
|
# === SIZE AND QUOTA CHECKS ===
|
|
content_bytes = content.encode('utf-8')
|
|
max_size = self.valves.max_file_size_mb * 1024 * 1024
|
|
try:
|
|
current_size = target_path.stat().st_size if file_exists else 0
|
|
except FileNotFoundError:
|
|
current_size = 0
|
|
|
|
if current_size + len(content_bytes) > max_size:
|
|
raise StorageError("FILE_TOO_LARGE", f"File would exceed {self.valves.max_file_size_mb} MB")
|
|
|
|
if group_id:
|
|
self._check_group_quota(group_id, len(content_bytes))
|
|
else:
|
|
self._check_quota(__user__, len(content_bytes))
|
|
|
|
# === SAFE MODE SETUP ===
|
|
lock_path = None
|
|
working_path = target_path
|
|
|
|
if safe:
|
|
rel_path = str(target_path.relative_to(zone_root))
|
|
lock_path = editzone_base / "locks" / (rel_path + ".lock")
|
|
edit_path = editzone_base / "editzone" / conv_id / rel_path
|
|
|
|
self._acquire_lock(lock_path, conv_id, user_id, rel_path)
|
|
self._ensure_dir(edit_path.parent)
|
|
|
|
if file_exists:
|
|
shutil.copy2(target_path, edit_path)
|
|
else:
|
|
edit_path.touch()
|
|
working_path = edit_path
|
|
else:
|
|
if file_created:
|
|
self._ensure_dir(target_path.parent)
|
|
target_path.touch()
|
|
|
|
try:
|
|
# === READ CONTENT ===
|
|
if overwrite:
|
|
lines = []
|
|
elif file_created and not safe:
|
|
lines = []
|
|
else:
|
|
with open(working_path, 'r', encoding='utf-8', errors='replace') as f:
|
|
lines = f.readlines()
|
|
|
|
lines_affected = 0
|
|
match_count = 0
|
|
|
|
# === PERFORM EDIT ===
|
|
if overwrite:
|
|
lines = [content] if content else []
|
|
lines_affected = 1
|
|
elif position == "start":
|
|
lines.insert(0, content)
|
|
lines_affected = 1
|
|
elif position == "end":
|
|
lines.append(content)
|
|
lines_affected = 1
|
|
elif position == "before":
|
|
if line is not None:
|
|
if line > len(lines) + 1:
|
|
raise StorageError("INVALID_PARAMETER", f"Line {line} beyond file ({len(lines)} lines)")
|
|
lines.insert(line - 1, content)
|
|
lines_affected = 1
|
|
else:
|
|
new_lines = []
|
|
found = False
|
|
for l in lines:
|
|
if compiled_pattern.search(l) and (not found or match_all):
|
|
new_lines.append(content)
|
|
lines_affected += 1
|
|
match_count += 1
|
|
found = True
|
|
new_lines.append(l)
|
|
if not found:
|
|
raise StorageError("PATTERN_NOT_FOUND", f"Pattern not found: {pattern}")
|
|
lines = new_lines
|
|
elif position == "after":
|
|
if line is not None:
|
|
if line > len(lines):
|
|
raise StorageError("INVALID_PARAMETER", f"Line {line} beyond file ({len(lines)} lines)")
|
|
lines.insert(line, content)
|
|
lines_affected = 1
|
|
else:
|
|
new_lines = []
|
|
found = False
|
|
for l in lines:
|
|
new_lines.append(l)
|
|
if compiled_pattern.search(l) and (not found or match_all):
|
|
new_lines.append(content)
|
|
lines_affected += 1
|
|
match_count += 1
|
|
found = True
|
|
if not found:
|
|
raise StorageError("PATTERN_NOT_FOUND", f"Pattern not found: {pattern}")
|
|
lines = new_lines
|
|
elif position == "replace":
|
|
if line is not None:
|
|
if line > len(lines):
|
|
raise StorageError("INVALID_PARAMETER", f"Line {line} beyond file ({len(lines)} lines)")
|
|
start_idx = line - 1
|
|
end_idx = (end_line - 1) if end_line else start_idx
|
|
end_idx = min(end_idx, len(lines) - 1)
|
|
lines_affected = end_idx - start_idx + 1
|
|
lines = lines[:start_idx] + [content] + lines[end_idx + 1:]
|
|
else:
|
|
new_lines = []
|
|
found = False
|
|
for l in lines:
|
|
if compiled_pattern.search(l) and (not found or match_all):
|
|
new_lines.append(content)
|
|
lines_affected += 1
|
|
match_count += 1
|
|
found = True
|
|
else:
|
|
new_lines.append(l)
|
|
if not found:
|
|
raise StorageError("PATTERN_NOT_FOUND", f"Pattern not found: {pattern}")
|
|
lines = new_lines
|
|
|
|
# === WRITE RESULT ===
|
|
with open(working_path, 'w', encoding='utf-8') as f:
|
|
f.writelines(lines)
|
|
|
|
# === SAFE MODE FINALIZE ===
|
|
if safe:
|
|
self._ensure_dir(target_path.parent)
|
|
shutil.move(str(working_path), str(target_path))
|
|
lock_path.unlink(missing_ok=True)
|
|
|
|
# === GIT COMMIT ===
|
|
if git_commit:
|
|
commit_msg = message or f"Patch {path}: {position}"
|
|
if group_id:
|
|
self._git_commit_as_user(zone_root, commit_msg, user_id)
|
|
else:
|
|
self._git_commit(zone_root, commit_msg)
|
|
|
|
# === SET GROUP OWNERSHIP ===
|
|
if group_id and file_created:
|
|
effective_mode = mode or self.valves.group_default_mode
|
|
if effective_mode not in ("owner", "group", "owner_ro"):
|
|
effective_mode = "group"
|
|
self._set_file_ownership(group_id, path, user_id, effective_mode)
|
|
|
|
# === BUILD RESPONSE ===
|
|
result = {
|
|
"path": path,
|
|
"zone": zone,
|
|
"position": "overwrite" if overwrite else position,
|
|
"lines_affected": lines_affected,
|
|
"created": file_created,
|
|
"file_size": target_path.stat().st_size,
|
|
"safe_mode": safe,
|
|
}
|
|
if match_count > 0:
|
|
result["pattern_matches"] = match_count
|
|
if group_id:
|
|
result["group"] = group_id
|
|
|
|
action = "created" if file_created else ("overwritten" if overwrite else position)
|
|
return self._format_response(True, data=result, message=f"File {action}: {lines_affected} line(s) affected")
|
|
|
|
finally:
|
|
if safe and lock_path and lock_path.exists():
|
|
lock_path.unlink(missing_ok=True)
|
|
|
|
async def _patch_bytes_impl(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
content: str,
|
|
position: str,
|
|
offset: int,
|
|
length: int,
|
|
content_format: str,
|
|
safe: bool,
|
|
group: str,
|
|
message: str,
|
|
mode: str,
|
|
allow_zone_in_path: bool,
|
|
__user__: dict,
|
|
__metadata__: dict,
|
|
) -> str:
|
|
"""Internal implementation for binary file patching."""
|
|
import base64 as base64_module
|
|
|
|
user_id = __user__.get("id", "")
|
|
conv_id = self._get_conv_id(__metadata__)
|
|
zone_lower = zone.lower()
|
|
|
|
# === PARSE CONTENT ===
|
|
try:
|
|
if content_format == "hex":
|
|
hex_clean = content.replace(" ", "").replace("\n", "")
|
|
if len(hex_clean) % 2 != 0:
|
|
raise ValueError("Hex string must have even length")
|
|
content_bytes = bytes.fromhex(hex_clean)
|
|
elif content_format == "base64":
|
|
content_bytes = base64_module.b64decode(content)
|
|
elif content_format == "raw":
|
|
content_bytes = content.encode('utf-8')
|
|
else:
|
|
raise StorageError("INVALID_PARAMETER", f"Invalid content_format: {content_format}")
|
|
except ValueError as e:
|
|
raise StorageError("INVALID_PARAMETER", f"Invalid content: {e}")
|
|
|
|
# === ZONE RESOLUTION ===
|
|
user_root = self._get_user_root(__user__)
|
|
git_commit = False
|
|
group_id = None
|
|
zone_name = None # For zone prefix validation
|
|
|
|
if zone_lower == "storage":
|
|
zone_root = user_root / "Storage" / "data"
|
|
editzone_base = user_root / "Storage"
|
|
zone_name = "Storage"
|
|
elif zone_lower == "documents":
|
|
zone_root = user_root / "Documents" / "data"
|
|
editzone_base = user_root / "Documents"
|
|
git_commit = True
|
|
zone_name = "Documents"
|
|
self._init_git_repo(zone_root)
|
|
elif zone_lower == "group":
|
|
if not group:
|
|
raise StorageError("MISSING_PARAMETER", "Group parameter required")
|
|
group_id = self._validate_group_id(group)
|
|
self._check_group_access(__user__, group_id)
|
|
zone_root = self._ensure_group_space(group_id)
|
|
editzone_base = self._get_groups_root() / group_id
|
|
git_commit = True
|
|
zone_name = f"Group:{group_id}"
|
|
else:
|
|
raise StorageError("ZONE_FORBIDDEN", f"Invalid zone: {zone}")
|
|
|
|
self._ensure_dir(zone_root)
|
|
path = self._validate_relative_path(path, zone_name, allow_zone_in_path)
|
|
target_path = self._resolve_chroot_path(zone_root, path)
|
|
|
|
# === PERMISSION CHECK ===
|
|
if group_id:
|
|
can_write, error = self._can_write_group_file(group_id, path, user_id)
|
|
if not can_write and target_path.exists():
|
|
raise StorageError(error, f"Cannot write to file: {error}")
|
|
|
|
# === VALIDATE PARAMETERS ===
|
|
valid_positions = ("start", "end", "at", "replace")
|
|
if position not in valid_positions:
|
|
hint = ""
|
|
if position == "overwrite":
|
|
hint = ". To overwrite entire file, use overwrite=True parameter instead"
|
|
raise StorageError(
|
|
"INVALID_PARAMETER",
|
|
f"Invalid position: {position}. Valid: {', '.join(valid_positions)}{hint}"
|
|
)
|
|
|
|
if position in ("at", "replace") and offset is None:
|
|
raise StorageError("MISSING_PARAMETER", f"Position '{position}' requires 'offset'")
|
|
|
|
if position == "replace" and length is None:
|
|
raise StorageError("MISSING_PARAMETER", "Position 'replace' requires 'length'")
|
|
|
|
if offset is not None and offset < 0:
|
|
raise StorageError("INVALID_PARAMETER", "Offset must be >= 0")
|
|
|
|
if length is not None and length < 0:
|
|
raise StorageError("INVALID_PARAMETER", "Length must be >= 0")
|
|
|
|
# === CHECK FILE EXISTS ===
|
|
file_exists = target_path.exists()
|
|
file_created = False
|
|
|
|
if not file_exists:
|
|
if position in ("start", "end"):
|
|
file_created = True
|
|
else:
|
|
raise StorageError("FILE_NOT_FOUND", f"File not found: {path}")
|
|
|
|
# === SIZE CHECKS ===
|
|
max_size = self.valves.max_file_size_mb * 1024 * 1024
|
|
try:
|
|
current_size = target_path.stat().st_size if file_exists else 0
|
|
except FileNotFoundError:
|
|
current_size = 0
|
|
|
|
if offset is not None and offset > current_size:
|
|
raise StorageError("INVALID_PARAMETER", f"Offset {offset} beyond file size ({current_size})")
|
|
|
|
bytes_removed = 0
|
|
if position == "replace" and offset < current_size:
|
|
bytes_removed = min(length, current_size - offset)
|
|
|
|
if current_size + len(content_bytes) - bytes_removed > max_size:
|
|
raise StorageError("FILE_TOO_LARGE", f"File would exceed {self.valves.max_file_size_mb} MB")
|
|
|
|
if group_id:
|
|
self._check_group_quota(group_id, len(content_bytes))
|
|
else:
|
|
self._check_quota(__user__, len(content_bytes))
|
|
|
|
# === SAFE MODE SETUP ===
|
|
lock_path = None
|
|
working_path = target_path
|
|
|
|
if safe:
|
|
rel_path = str(target_path.relative_to(zone_root))
|
|
lock_path = editzone_base / "locks" / (rel_path + ".lock")
|
|
edit_path = editzone_base / "editzone" / conv_id / rel_path
|
|
|
|
self._acquire_lock(lock_path, conv_id, user_id, rel_path)
|
|
self._ensure_dir(edit_path.parent)
|
|
|
|
if file_exists:
|
|
shutil.copy2(target_path, edit_path)
|
|
else:
|
|
edit_path.touch()
|
|
working_path = edit_path
|
|
else:
|
|
if file_created:
|
|
self._ensure_dir(target_path.parent)
|
|
target_path.touch()
|
|
|
|
try:
|
|
# === READ DATA ===
|
|
if file_created and not safe:
|
|
data = bytearray()
|
|
else:
|
|
with open(working_path, 'rb') as f:
|
|
data = bytearray(f.read())
|
|
|
|
bytes_affected = len(content_bytes)
|
|
|
|
# === PERFORM EDIT ===
|
|
if position == "start":
|
|
data = bytearray(content_bytes) + data
|
|
elif position == "end":
|
|
data.extend(content_bytes)
|
|
elif position == "at":
|
|
data = data[:offset] + bytearray(content_bytes) + data[offset:]
|
|
elif position == "replace":
|
|
end_offset = min(offset + length, len(data))
|
|
bytes_affected = end_offset - offset
|
|
data = data[:offset] + bytearray(content_bytes) + data[end_offset:]
|
|
|
|
# === WRITE RESULT ===
|
|
with open(working_path, 'wb') as f:
|
|
f.write(data)
|
|
|
|
# === SAFE MODE FINALIZE ===
|
|
if safe:
|
|
self._ensure_dir(target_path.parent)
|
|
shutil.move(str(working_path), str(target_path))
|
|
lock_path.unlink(missing_ok=True)
|
|
|
|
# === GIT COMMIT ===
|
|
if git_commit:
|
|
commit_msg = message or f"Patch bytes {path}: {position}"
|
|
if group_id:
|
|
self._git_commit_as_user(zone_root, commit_msg, user_id)
|
|
else:
|
|
self._git_commit(zone_root, commit_msg)
|
|
|
|
# === SET GROUP OWNERSHIP ===
|
|
if group_id and file_created:
|
|
effective_mode = mode or self.valves.group_default_mode
|
|
if effective_mode not in ("owner", "group", "owner_ro"):
|
|
effective_mode = "group"
|
|
self._set_file_ownership(group_id, path, user_id, effective_mode)
|
|
|
|
# === BUILD RESPONSE ===
|
|
result = {
|
|
"path": path,
|
|
"zone": zone,
|
|
"position": position,
|
|
"bytes_written": len(content_bytes),
|
|
"bytes_affected": bytes_affected,
|
|
"created": file_created,
|
|
"file_size": target_path.stat().st_size,
|
|
"safe_mode": safe,
|
|
"content_format": content_format,
|
|
}
|
|
if offset is not None:
|
|
result["offset"] = offset
|
|
if group_id:
|
|
result["group"] = group_id
|
|
|
|
return self._format_response(True, data=result,
|
|
message=f"File {'created' if file_created else 'patched'}: {len(content_bytes)} bytes written")
|
|
|
|
finally:
|
|
if safe and lock_path and lock_path.exists():
|
|
lock_path.unlink(missing_ok=True)
|
|
|
|
|
|
# =========================================================================
|
|
# UNIFIED ZONE FUNCTIONS
|
|
# =========================================================================
|
|
# These 10 functions replace 32 zone-specific functions.
|
|
# All operations now use: shed_xxx(zone="...", ...)
|
|
#
|
|
# ⚠️ IMPORTANT: Use shed_exec() for ALL shell-doable operations!
|
|
# Only use shed_patch_text() to CREATE or MODIFY file CONTENT.
|
|
#
|
|
# ✓ Create directory: shed_exec(zone="storage", cmd="mkdir", args=["-p", "dir"])
|
|
# ✗ WRONG: shed_patch_text(path="dir/.keep", content="")
|
|
# =========================================================================
|
|
|
|
|
|
class Tools:
|
|
"""
|
|
Fileshed - Persistent file management with collaboration.
|
|
|
|
╔═══════════════════════════════════════════════════════════════════════════╗
|
|
║ CRITICAL: UPLOADED FILES WORKFLOW ║
|
|
║ When user uploads files, ALWAYS do this FIRST: ║
|
|
║ 1. shed_import(import_all=True) ║
|
|
║ 2. shed_exec(zone="uploads", cmd="ls", args=["-la"]) ║
|
|
║ Then move files to Storage or Documents as needed. ║
|
|
╚═══════════════════════════════════════════════════════════════════════════╝
|
|
|
|
╔═══════════════════════════════════════════════════════════════════════════╗
|
|
║ PATH RULES - IMPORTANT! ║
|
|
║ Each function already works INSIDE its zone. Use relative paths only. ║
|
|
║ ║
|
|
║ ✓ CORRECT: shed_exec(zone="storage", cmd="cat", args=["projects/f.txt"])║
|
|
║ ✗ WRONG: shed_exec(zone="storage", cmd="cat", args=["Storage/..."]) ║
|
|
║ ║
|
|
║ The zone name is NOT part of the path! ║
|
|
╚═══════════════════════════════════════════════════════════════════════════╝
|
|
|
|
╔═══════════════════════════════════════════════════════════════════════════╗
|
|
║ ⚠️ SHELL COMMANDS FIRST! ║
|
|
║ ║
|
|
║ Use shed_exec(zone, cmd, args) for ALL shell-doable operations: ║
|
|
║ • mkdir: shed_exec(zone="storage", cmd="mkdir", args=["-p", "dir"]) ║
|
|
║ • Copy: shed_exec(zone="storage", cmd="cp", args=["src", "dst"]) ║
|
|
║ • Move: shed_exec(zone="storage", cmd="mv", args=["old", "new"]) ║
|
|
║ • Read: shed_exec(zone="storage", cmd="cat", args=["file.txt"]) ║
|
|
║ • Delete: shed_exec(zone="storage", cmd="rm", args=["file.txt"]) ║
|
|
║ • Git: shed_exec(zone="documents", cmd="git", args=["log"]) ║
|
|
║ ║
|
|
║ Use shed_patch_text() ONLY for file CONTENT operations: ║
|
|
║ • Create: shed_patch_text(zone, path, content, overwrite=True) ║
|
|
║ • Append: shed_patch_text(zone, path, content, position="end") ║
|
|
║ ║
|
|
║ ❌ WRONG: shed_patch_text(path="dir/.keep") to create directories ║
|
|
║ ✓ RIGHT: shed_exec(cmd="mkdir", args=["-p", "dir"]) ║
|
|
╚═══════════════════════════════════════════════════════════════════════════╝
|
|
|
|
ZONES (use zone= parameter):
|
|
• zone="uploads" -> works in Uploads/ (temporary, per-conversation)
|
|
• zone="storage" -> works in Storage/ (permanent, no versioning)
|
|
• zone="documents" -> works in Documents/ (permanent, Git versioned)
|
|
• zone="group" -> works in Groups/ (requires group= parameter)
|
|
|
|
CROSS-ZONE TRANSFERS (special functions):
|
|
• shed_move_uploads_to_storage(src, dest)
|
|
• shed_move_uploads_to_documents(src, dest)
|
|
• shed_copy_storage_to_documents(src, dest)
|
|
• shed_move_documents_to_storage(src, dest)
|
|
• shed_copy_to_group(src_zone, src_path, group, dest_path)
|
|
|
|
GROUP WRITE MODES:
|
|
• "group" -> Anyone can edit (default)
|
|
• "owner" -> Only owner can edit
|
|
• "owner_ro" -> Read-only for everyone
|
|
"""
|
|
|
|
class Valves(BaseModel):
|
|
"""Tool configuration via Open WebUI interface."""
|
|
storage_base_path: str = Field(
|
|
default="/app/backend/data/user_files",
|
|
description="Storage root path (contains users/ and groups/)"
|
|
)
|
|
quota_per_user_mb: int = Field(
|
|
default=1000,
|
|
description="Quota per user in MB (personal space)"
|
|
)
|
|
quota_per_group_mb: int = Field(
|
|
default=2000,
|
|
description="Quota per group in MB (shared space)"
|
|
)
|
|
max_file_size_mb: int = Field(
|
|
default=300,
|
|
description="Max file size in MB"
|
|
)
|
|
lock_max_age_hours: int = Field(
|
|
default=24,
|
|
description="Max lock duration before expiration"
|
|
)
|
|
exec_timeout_default: int = Field(
|
|
default=30,
|
|
description="Default command timeout (seconds)"
|
|
)
|
|
exec_timeout_max: int = Field(
|
|
default=300,
|
|
description="Maximum allowed timeout (seconds)"
|
|
)
|
|
group_default_mode: str = Field(
|
|
default="group",
|
|
description="Default write mode for new group files: 'owner', 'group', or 'owner_ro'"
|
|
)
|
|
network_mode: str = Field(
|
|
default="disabled",
|
|
description="Network access mode: 'disabled' (default, no network), 'safe' (downloads only, blocks exfiltration), or 'all' ([!] unrestricted, enables data exfiltration)"
|
|
)
|
|
openwebui_api_url: str = Field(
|
|
default="http://localhost:8080",
|
|
description="Open WebUI base URL for shed_link_create(). Use ONLY the base URL without /api path. Examples: http://localhost:8080, https://chat.example.com. The /api/v1/files/ path is added automatically."
|
|
)
|
|
max_output_default: int = Field(
|
|
default=50000,
|
|
description="Default max output size in bytes (~50KB). Use 0 for no limit (up to max_output_absolute)."
|
|
)
|
|
max_output_absolute: int = Field(
|
|
default=5000000,
|
|
description="Absolute max output size in bytes (~5MB). Even max_output=0 cannot exceed this."
|
|
)
|
|
exec_memory_limit_mb: int = Field(
|
|
default=512,
|
|
description="Memory limit for subprocess execution in MB (0 = no limit). Protects against DoS."
|
|
)
|
|
exec_cpu_limit_seconds: int = Field(
|
|
default=60,
|
|
description="CPU time limit for subprocess in seconds (0 = no limit). Protects against CPU exhaustion."
|
|
)
|
|
sqlite_readonly: bool = Field(
|
|
default=False,
|
|
description="If True, SQLite queries are restricted to SELECT only (no INSERT/UPDATE/DELETE/DROP). Safer for untrusted data."
|
|
)
|
|
|
|
class UserValves(BaseModel):
|
|
"""Per-user configuration. Users can set these in Tools > Fileshed > Settings."""
|
|
# Note: shed_link_* functions use internal API, no user configuration needed
|
|
pass
|
|
|
|
def __init__(self):
|
|
self.valves = self.Valves()
|
|
self._core = _FileshedCore(self)
|
|
|
|
# =========================================================================
|
|
# INTERNAL IMPLEMENTATION METHODS FOR patch_* FUNCTIONS
|
|
# =========================================================================
|
|
|
|
async def shed_exec(
|
|
self,
|
|
zone: str,
|
|
cmd: str,
|
|
args: list = [],
|
|
timeout: int = None,
|
|
max_output: int = None,
|
|
stdout_file: str = None,
|
|
stderr_file: str = None,
|
|
redirect_stderr_to_stdout: bool = False,
|
|
group: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Execute a command in the specified zone.
|
|
|
|
:param zone: Target zone ("uploads", "storage", "documents", or "group")
|
|
:param cmd: Command to execute (must be in whitelist)
|
|
:param args: Command arguments - file paths go here
|
|
:param timeout: Timeout in seconds (default: 30, max: 300)
|
|
:param max_output: Max output bytes (None=50KB, 0=5MB max)
|
|
:param stdout_file: Save stdout to this file instead of returning it
|
|
:param stderr_file: Save stderr to this file instead of returning it
|
|
:param redirect_stderr_to_stdout: Merge stderr into stdout (like 2>&1)
|
|
:param group: Group name/ID (required if zone="group")
|
|
:param allow_zone_in_path: Allow paths starting with zone name (default: False).
|
|
By default, paths like "Documents/folder" in zone="documents" are rejected
|
|
to prevent accidental duplication. Set True only if you really want a
|
|
subfolder named after the zone.
|
|
:return: Command output as JSON
|
|
|
|
Examples:
|
|
shed_exec(zone="uploads", cmd="cat", args=["file.txt"])
|
|
shed_exec(zone="storage", cmd="ls", args=["-la"])
|
|
shed_exec(zone="storage", cmd="mkdir", args=["-p", "projects/2024"])
|
|
shed_exec(zone="storage", cmd="grep", args=["-r", "TODO", "."])
|
|
shed_exec(zone="documents", cmd="git", args=["log", "--oneline"])
|
|
shed_exec(zone="group", group="team", cmd="ls", args=["-la"])
|
|
|
|
# Redirect output to file (like shell > redirection)
|
|
shed_exec(zone="storage", cmd="jq", args=["-r", ".[]", "data.json"], stdout_file="output.txt")
|
|
|
|
Notes:
|
|
- uploads: read-only commands only
|
|
- documents/group: git commands allowed
|
|
- File paths in args are relative to zone root (don't include zone name!)
|
|
- Use mkdir -p to create directories (NOT patch_text with .keep files!)
|
|
- stdout_file/stderr_file: paths relative to zone root
|
|
"""
|
|
try:
|
|
ctx = self._core._resolve_zone(zone, group, __user__, __metadata__)
|
|
|
|
# Validate command against zone whitelist
|
|
self._core._validate_command(cmd, ctx.whitelist, args)
|
|
|
|
# Validate arguments (path escapes, network, etc.)
|
|
self._core._validate_args(args, ctx.readonly, cmd)
|
|
validated_args = self._core._validate_path_args(
|
|
args, ctx.zone_root, cmd,
|
|
zone_name=ctx.zone_name,
|
|
allow_zone_in_path=allow_zone_in_path
|
|
)
|
|
|
|
# Validate and resolve output file paths
|
|
stdout_path = None
|
|
stderr_path = None
|
|
|
|
if stdout_file:
|
|
if ctx.readonly:
|
|
raise StorageError(
|
|
"ZONE_READONLY",
|
|
"Cannot write stdout_file in read-only zone",
|
|
{"zone": zone},
|
|
"Use a writable zone (storage, documents)"
|
|
)
|
|
# Validate path doesn't escape
|
|
self._core._validate_path_args(
|
|
[stdout_file], ctx.zone_root, cmd,
|
|
zone_name=ctx.zone_name,
|
|
allow_zone_in_path=allow_zone_in_path
|
|
)
|
|
stdout_path = ctx.zone_root / stdout_file
|
|
|
|
if stderr_file:
|
|
if ctx.readonly:
|
|
raise StorageError(
|
|
"ZONE_READONLY",
|
|
"Cannot write stderr_file in read-only zone",
|
|
{"zone": zone},
|
|
"Use a writable zone (storage, documents)"
|
|
)
|
|
# Validate path doesn't escape
|
|
self._core._validate_path_args(
|
|
[stderr_file], ctx.zone_root, cmd,
|
|
zone_name=ctx.zone_name,
|
|
allow_zone_in_path=allow_zone_in_path
|
|
)
|
|
stderr_path = ctx.zone_root / stderr_file
|
|
|
|
# Execute
|
|
result = self._core._exec_command(
|
|
cmd, validated_args,
|
|
cwd=ctx.zone_root,
|
|
timeout=self._core._clamp_timeout(timeout),
|
|
max_output=max_output,
|
|
stdout_file=stdout_path,
|
|
stderr_file=stderr_path,
|
|
redirect_stderr_to_stdout=redirect_stderr_to_stdout,
|
|
)
|
|
|
|
# Security: neutralize git hooks after clone to prevent code execution
|
|
# from malicious repositories
|
|
if cmd == "git" and args and args[0] == "clone" and result["returncode"] == 0:
|
|
# Determine the cloned repo directory
|
|
# git clone <url> [target] - target is last non-flag arg, or derived from URL
|
|
clone_target = None
|
|
for arg in reversed(args[1:]):
|
|
arg_str = str(arg)
|
|
if not arg_str.startswith("-"):
|
|
clone_target = arg_str
|
|
break
|
|
|
|
if clone_target:
|
|
# Check if it's a URL (last arg is URL, so repo dir is derived from URL)
|
|
if "://" in clone_target or clone_target.endswith(".git"):
|
|
# Extract repo name from URL
|
|
repo_name = clone_target.rstrip("/").split("/")[-1]
|
|
if repo_name.endswith(".git"):
|
|
repo_name = repo_name[:-4]
|
|
clone_path = ctx.zone_root / repo_name
|
|
else:
|
|
clone_path = ctx.zone_root / clone_target
|
|
|
|
if clone_path.exists() and (clone_path / ".git").exists():
|
|
self._core._neutralize_git_hooks(clone_path)
|
|
|
|
response_data = {
|
|
"zone": ctx.zone_name,
|
|
"command": cmd,
|
|
"args": args,
|
|
"stdout": result["stdout"],
|
|
"stderr": result["stderr"],
|
|
"returncode": result["returncode"],
|
|
"truncated": result.get("truncated", False),
|
|
}
|
|
|
|
if stdout_file:
|
|
response_data["stdout_file"] = stdout_file
|
|
if stdout_path and stdout_path.exists():
|
|
response_data["stdout_file_size"] = stdout_path.stat().st_size
|
|
if stderr_file:
|
|
response_data["stderr_file"] = stderr_file
|
|
if stderr_path and stderr_path.exists():
|
|
response_data["stderr_file_size"] = stderr_path.stat().st_size
|
|
|
|
return self._core._format_response(True, data=response_data)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_exec")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_patch_text(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
content: str,
|
|
position: str = "end",
|
|
line: int = None,
|
|
end_line: int = None,
|
|
pattern: str = None,
|
|
regex_flags: str = "",
|
|
match_all: bool = False,
|
|
overwrite: bool = False,
|
|
safe: bool = True,
|
|
group: str = None,
|
|
message: str = None,
|
|
mode: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Edit a text file in the specified zone.
|
|
|
|
⚠️ Use this ONLY for file CONTENT operations!
|
|
For creating directories, use: shed_exec(zone, cmd="mkdir", args=["-p", "dir"])
|
|
|
|
:param zone: Target zone ("storage", "documents", or "group")
|
|
:param path: File path relative to zone (don't include zone name!)
|
|
:param content: Content to write
|
|
:param position: "start", "end", "before", "after", or "replace" (NOT "overwrite" or "at"!)
|
|
:param line: Line number for "before"/"after"/"replace" (first line is 1, not 0)
|
|
:param end_line: End line for range replacement (only with position="replace")
|
|
:param pattern: Regex pattern for "replace"
|
|
:param regex_flags: Regex flags (i=ignore case, m=multiline, s=dotall)
|
|
:param match_all: Replace all pattern matches (default: first only)
|
|
:param overwrite: Set to True to replace entire file (use this, NOT position="overwrite")
|
|
:param safe: Lock file during edit
|
|
:param group: Group name/ID (required if zone="group")
|
|
:param message: Git commit message (documents/group only, ignored for storage)
|
|
:param mode: Ownership mode for new files in group: "owner", "group", "owner_ro"
|
|
:param allow_zone_in_path: Allow path starting with zone name (default: False)
|
|
:return: Edit result as JSON
|
|
|
|
Examples:
|
|
shed_patch_text(zone="storage", path="notes.txt", content="New line\\n", position="end")
|
|
shed_patch_text(zone="storage", path="file.txt", content="inserted\\n", position="before", line=5)
|
|
shed_patch_text(zone="storage", path="config.py", content="DEBUG=True", pattern="DEBUG=.*", position="replace")
|
|
shed_patch_text(zone="documents", path="README.md", content="# Title", overwrite=True, message="Init")
|
|
"""
|
|
try:
|
|
return await self._core._patch_text_impl(
|
|
zone=zone, path=path, content=content,
|
|
position=position, line=line, end_line=end_line,
|
|
pattern=pattern, regex_flags=regex_flags, match_all=match_all,
|
|
overwrite=overwrite, safe=safe, group=group,
|
|
message=message, mode=mode,
|
|
allow_zone_in_path=allow_zone_in_path,
|
|
__user__=__user__, __metadata__=__metadata__,
|
|
)
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_patch_text")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_patch_bytes(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
content: str,
|
|
content_format: str = "hex",
|
|
position: str = "end",
|
|
offset: int = None,
|
|
length: int = None,
|
|
safe: bool = True,
|
|
group: str = None,
|
|
message: str = None,
|
|
mode: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Edit a binary file in the specified zone.
|
|
|
|
:param zone: Target zone ("storage", "documents", or "group")
|
|
:param path: File path relative to zone (don't include zone name!)
|
|
:param content: Content to write (format depends on content_format)
|
|
:param content_format: "hex" (default), "base64", or "raw"
|
|
:param position: "start", "end", "at", or "replace"
|
|
:param offset: Byte offset for "at"/"replace"
|
|
:param length: Bytes to replace for "replace"
|
|
:param safe: Lock file during edit
|
|
:param group: Group name/ID (required if zone="group")
|
|
:param message: Git commit message (documents/group only)
|
|
:param mode: Ownership mode for new files in group
|
|
:param allow_zone_in_path: Allow path starting with zone name (default: False)
|
|
:return: Edit result as JSON
|
|
|
|
Examples:
|
|
shed_patch_bytes(zone="storage", path="data.bin", content="48454C4C4F")
|
|
shed_patch_bytes(zone="storage", path="img.png", content="89504E47", position="start")
|
|
"""
|
|
try:
|
|
return await self._core._patch_bytes_impl(
|
|
zone=zone, path=path, content=content,
|
|
content_format=content_format, position=position,
|
|
offset=offset, length=length, safe=safe,
|
|
group=group, message=message, mode=mode,
|
|
allow_zone_in_path=allow_zone_in_path,
|
|
__user__=__user__, __metadata__=__metadata__,
|
|
)
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_patch_bytes")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_delete(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
group: str = None,
|
|
message: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Delete a file or folder in the specified zone.
|
|
|
|
:param zone: Target zone ("uploads", "storage", "documents", or "group")
|
|
:param path: Path to delete (relative to zone, don't include zone name!)
|
|
:param group: Group name/ID (required if zone="group")
|
|
:param message: Git commit message (documents/group only)
|
|
:param allow_zone_in_path: Allow path starting with zone name (default: False)
|
|
:return: Deletion result as JSON
|
|
|
|
Examples:
|
|
shed_delete(zone="uploads", path="temp.txt")
|
|
shed_delete(zone="storage", path="old_project/")
|
|
shed_delete(zone="documents", path="draft.md", message="Remove draft")
|
|
shed_delete(zone="group", group="team", path="obsolete.txt", message="Cleanup")
|
|
|
|
Note: uploads allows delete to clean up imported files.
|
|
"""
|
|
try:
|
|
# uploads allows delete even though readonly for other ops
|
|
ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=False)
|
|
|
|
path = self._core._validate_relative_path(path, ctx.zone_name, allow_zone_in_path)
|
|
target = self._core._resolve_chroot_path(ctx.zone_root, path)
|
|
|
|
if not target.exists():
|
|
raise StorageError("FILE_NOT_FOUND", f"Path not found: {path}")
|
|
|
|
# Group: check delete permission
|
|
user_id = __user__.get("id", "")
|
|
if ctx.group_id:
|
|
can_delete, reason = self._core._can_delete_group_file(ctx.group_id, path, user_id)
|
|
if not can_delete:
|
|
raise StorageError("PERMISSION_DENIED", reason, {"path": path})
|
|
|
|
# Delete
|
|
was_dir = target.is_dir()
|
|
if was_dir:
|
|
shutil.rmtree(target)
|
|
if ctx.group_id:
|
|
self._core._delete_file_ownership_recursive(ctx.group_id, path)
|
|
else:
|
|
target.unlink()
|
|
if ctx.group_id:
|
|
self._core._delete_file_ownership(ctx.group_id, path)
|
|
|
|
# Git commit if needed
|
|
if ctx.git_commit:
|
|
self._core._git_run(["add", "-A"], ctx.zone_root)
|
|
commit_msg = message or f"Delete {path}"
|
|
self._core._git_commit_as_user(ctx.zone_root, commit_msg, user_id)
|
|
|
|
return self._core._format_response(True, data={
|
|
"zone": ctx.zone_name,
|
|
"deleted": path,
|
|
"was_directory": was_dir,
|
|
}, message=f"Deleted: {path}")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_delete")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_rename(
|
|
self,
|
|
zone: str,
|
|
old_path: str,
|
|
new_path: str,
|
|
group: str = None,
|
|
message: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Rename or move a file/folder within the specified zone.
|
|
|
|
:param zone: Target zone ("storage", "documents", or "group")
|
|
:param old_path: Current path (relative to zone, don't include zone name!)
|
|
:param new_path: New path (relative to zone, don't include zone name!)
|
|
:param group: Group name/ID (required if zone="group")
|
|
:param message: Git commit message (documents/group only)
|
|
:param allow_zone_in_path: Allow paths starting with zone name (default: False)
|
|
:return: Rename result as JSON
|
|
|
|
Examples:
|
|
shed_rename(zone="storage", old_path="draft.txt", new_path="final.txt")
|
|
shed_rename(zone="documents", old_path="old/", new_path="archive/", message="Reorganize")
|
|
shed_rename(zone="group", group="team", old_path="v1.doc", new_path="v2.doc")
|
|
"""
|
|
try:
|
|
ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=True)
|
|
|
|
old_path = self._core._validate_relative_path(old_path, ctx.zone_name, allow_zone_in_path)
|
|
new_path = self._core._validate_relative_path(new_path, ctx.zone_name, allow_zone_in_path)
|
|
|
|
old_target = self._core._resolve_chroot_path(ctx.zone_root, old_path)
|
|
new_target = self._core._resolve_chroot_path(ctx.zone_root, new_path)
|
|
|
|
if not old_target.exists():
|
|
raise StorageError("FILE_NOT_FOUND", f"Source not found: {old_path}")
|
|
|
|
if new_target.exists():
|
|
raise StorageError("FILE_EXISTS", f"Destination exists: {new_path}")
|
|
|
|
# Group: check write permission
|
|
user_id = __user__.get("id", "")
|
|
if ctx.group_id:
|
|
can_write, reason = self._core._can_write_group_file(ctx.group_id, old_path, user_id)
|
|
if not can_write:
|
|
raise StorageError("PERMISSION_DENIED", reason, {"path": old_path})
|
|
|
|
# Create parent directories
|
|
new_target.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Rename
|
|
old_target.rename(new_target)
|
|
|
|
# Update ownership records
|
|
if ctx.group_id:
|
|
self._core._update_file_ownership_paths(ctx.group_id, old_path, new_path)
|
|
|
|
# Git commit
|
|
if ctx.git_commit:
|
|
self._core._git_run(["add", "-A"], ctx.zone_root)
|
|
commit_msg = message or f"Rename {old_path} -> {new_path}"
|
|
self._core._git_commit_as_user(ctx.zone_root, commit_msg, user_id)
|
|
|
|
return self._core._format_response(True, data={
|
|
"zone": ctx.zone_name,
|
|
"old_path": old_path,
|
|
"new_path": new_path,
|
|
}, message=f"Renamed: {old_path} -> {new_path}")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_rename")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_lockedit_open(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
group: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Open a file for safe editing (locks file, creates working copy).
|
|
|
|
⚠️ COMPLETE WORKFLOW (must follow all steps):
|
|
1. shed_lockedit_open(zone, path) → Lock file, get content
|
|
2. shed_lockedit_overwrite(zone, path, content) → Modify (NOT shed_patch_text!)
|
|
3. shed_lockedit_save(zone, path) → Save + unlock (CLOSES edit mode!)
|
|
|
|
OR to cancel: shed_lockedit_cancel(zone, path) → Discard changes + unlock
|
|
|
|
:param zone: Target zone ("storage", "documents", or "group")
|
|
:param path: File path to edit (relative to zone, don't include zone name!)
|
|
:param group: Group name/ID (required if zone="group")
|
|
:param allow_zone_in_path: Allow path starting with zone name (default: False)
|
|
:return: File content and lock info as JSON
|
|
|
|
Examples:
|
|
shed_lockedit_open(zone="storage", path="config.json")
|
|
shed_lockedit_open(zone="documents", path="report.md")
|
|
shed_lockedit_open(zone="group", group="team", path="shared.txt")
|
|
"""
|
|
try:
|
|
ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=True)
|
|
|
|
path = self._core._validate_relative_path(path, ctx.zone_name, allow_zone_in_path)
|
|
target = self._core._resolve_chroot_path(ctx.zone_root, path)
|
|
|
|
if not target.exists():
|
|
raise StorageError("FILE_NOT_FOUND", f"File not found: {path}")
|
|
|
|
if target.is_dir():
|
|
raise StorageError("NOT_A_FILE", f"Cannot edit a directory: {path}")
|
|
|
|
# Group: check write permission
|
|
user_id = __user__.get("id", "")
|
|
if ctx.group_id:
|
|
can_write, reason = self._core._can_write_group_file(ctx.group_id, path, user_id)
|
|
if not can_write:
|
|
raise StorageError("PERMISSION_DENIED", reason, {"path": path})
|
|
|
|
# Create lock
|
|
lock_path = self._core._get_lock_path(ctx.editzone_base, path)
|
|
self._core._acquire_lock(lock_path, ctx.conv_id, user_id, path)
|
|
|
|
# Copy to editzone
|
|
editzone_path = self._core._get_editzone_path(ctx.editzone_base, ctx.conv_id, path)
|
|
editzone_path.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(target, editzone_path)
|
|
|
|
# Read content
|
|
try:
|
|
with open(editzone_path, 'r', encoding='utf-8', errors='replace') as f:
|
|
content = f.read()
|
|
is_binary = False
|
|
except (OSError, UnicodeDecodeError):
|
|
content = None
|
|
is_binary = True
|
|
|
|
return self._core._format_response(True, data={
|
|
"zone": ctx.zone_name,
|
|
"path": path,
|
|
"content": content,
|
|
"is_binary": is_binary,
|
|
"size": target.stat().st_size,
|
|
"locked_by": user_id,
|
|
}, message=f"File opened for editing: {path}")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_lockedit_open")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_lockedit_exec(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
cmd: str,
|
|
args: list = [],
|
|
timeout: int = None,
|
|
group: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Execute a command on file in editzone (working copy).
|
|
|
|
:param zone: Target zone ("storage", "documents", or "group")
|
|
:param path: File path (must be opened with shed_lockedit_open, relative to zone)
|
|
:param cmd: Command to execute
|
|
:param args: Command arguments (use "." for the file being edited)
|
|
:param timeout: Timeout in seconds
|
|
:param group: Group name/ID (required if zone="group")
|
|
:param allow_zone_in_path: Allow path starting with zone name (default: False)
|
|
:return: Command output as JSON
|
|
|
|
Examples:
|
|
shed_lockedit_exec(zone="storage", path="data.txt", cmd="sed", args=["-i", "s/old/new/g", "."])
|
|
shed_lockedit_exec(zone="storage", path="code.py", cmd="cat", args=["."])
|
|
"""
|
|
try:
|
|
ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=True)
|
|
|
|
path = self._core._validate_relative_path(path, ctx.zone_name, allow_zone_in_path)
|
|
user_id = __user__.get("id", "")
|
|
|
|
# Verify lock ownership
|
|
lock_path = self._core._get_lock_path(ctx.editzone_base, path)
|
|
self._core._check_lock_owner(lock_path, user_id)
|
|
|
|
# Get editzone path
|
|
editzone_path = self._core._get_editzone_path(ctx.editzone_base, ctx.conv_id, path)
|
|
|
|
if not editzone_path.exists():
|
|
raise StorageError("NOT_IN_EDIT_MODE", f"File not open for editing: {path}",
|
|
hint="Use shed_lockedit_open() first. Note: shed_lockedit_save() CLOSES edit mode!")
|
|
|
|
# Validate command
|
|
self._core._validate_command(cmd, ctx.whitelist, args)
|
|
|
|
# Replace "." with actual filename
|
|
processed_args = [editzone_path.name if a == "." else a for a in args]
|
|
|
|
# Execute in editzone directory
|
|
result = self._core._exec_command(
|
|
cmd, processed_args,
|
|
cwd=editzone_path.parent,
|
|
timeout=self._core._clamp_timeout(timeout),
|
|
)
|
|
|
|
return self._core._format_response(True, data={
|
|
"zone": ctx.zone_name,
|
|
"path": path,
|
|
"command": cmd,
|
|
"stdout": result["stdout"],
|
|
"stderr": result["stderr"],
|
|
"returncode": result["returncode"],
|
|
}, message="Command executed in editzone")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_lockedit_exec")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_lockedit_overwrite(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
content: str,
|
|
append: bool = False,
|
|
group: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Write content to file in editzone (working copy).
|
|
|
|
⚠️ REQUIRES: File must be opened first with shed_lockedit_open()
|
|
⚠️ DO NOT use position, pattern, line, overwrite params - those are for shed_patch_text!
|
|
|
|
:param zone: Target zone ("storage", "documents", or "group")
|
|
:param path: File path (must be opened with shed_lockedit_open, relative to zone)
|
|
:param content: Content to write (replaces entire file by default)
|
|
:param append: If True, append instead of replace
|
|
:param group: Group name/ID (required if zone="group")
|
|
:param allow_zone_in_path: Allow path starting with zone name (default: False)
|
|
:return: Write result as JSON
|
|
|
|
Examples:
|
|
shed_lockedit_overwrite(zone="storage", path="config.json", content='{"key": "value"}')
|
|
shed_lockedit_overwrite(zone="storage", path="log.txt", content="New entry\\n", append=True)
|
|
"""
|
|
try:
|
|
ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=True)
|
|
|
|
path = self._core._validate_relative_path(path, ctx.zone_name, allow_zone_in_path)
|
|
user_id = __user__.get("id", "")
|
|
|
|
# Verify lock ownership
|
|
lock_path = self._core._get_lock_path(ctx.editzone_base, path)
|
|
self._core._check_lock_owner(lock_path, user_id)
|
|
|
|
# Get editzone path
|
|
editzone_path = self._core._get_editzone_path(ctx.editzone_base, ctx.conv_id, path)
|
|
|
|
if not editzone_path.exists():
|
|
raise StorageError("NOT_IN_EDIT_MODE", f"File not open for editing: {path}",
|
|
hint="Use shed_lockedit_open() first. Note: shed_lockedit_save() CLOSES edit mode!")
|
|
|
|
# Check content size
|
|
self._core._validate_content_size(content)
|
|
|
|
# Write
|
|
mode = "a" if append else "w"
|
|
with open(editzone_path, mode, encoding="utf-8") as f:
|
|
f.write(content)
|
|
|
|
new_size = editzone_path.stat().st_size
|
|
|
|
return self._core._format_response(True, data={
|
|
"zone": ctx.zone_name,
|
|
"path": path,
|
|
"bytes_written": len(content.encode('utf-8')),
|
|
"new_size": new_size,
|
|
"mode": "append" if append else "overwrite",
|
|
}, message="Content written to editzone")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_lockedit_overwrite")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_lockedit_save(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
group: str = None,
|
|
message: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Save edited file back to zone and release lock.
|
|
|
|
⚠️ THIS CLOSES EDIT MODE! After save, the file is unlocked.
|
|
To edit again, you must call shed_lockedit_open() first.
|
|
|
|
Workflow: shed_lockedit_open → shed_lockedit_overwrite → shed_lockedit_save (done!)
|
|
|
|
:param zone: Target zone ("storage", "documents", or "group")
|
|
:param path: File path (relative to zone, don't include zone name!)
|
|
:param group: Group name/ID (required if zone="group")
|
|
:param message: Git commit message (documents/group only)
|
|
:param allow_zone_in_path: Allow path starting with zone name (default: False)
|
|
:return: Save result as JSON
|
|
|
|
Examples:
|
|
shed_lockedit_save(zone="storage", path="config.json")
|
|
shed_lockedit_save(zone="documents", path="report.md", message="Final version")
|
|
"""
|
|
try:
|
|
ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=True)
|
|
|
|
path = self._core._validate_relative_path(path, ctx.zone_name, allow_zone_in_path)
|
|
user_id = __user__.get("id", "")
|
|
|
|
# Verify lock ownership
|
|
lock_path = self._core._get_lock_path(ctx.editzone_base, path)
|
|
self._core._check_lock_owner(lock_path, user_id)
|
|
|
|
# Get paths
|
|
editzone_path = self._core._get_editzone_path(ctx.editzone_base, ctx.conv_id, path)
|
|
target = self._core._resolve_chroot_path(ctx.zone_root, path)
|
|
|
|
if not editzone_path.exists():
|
|
raise StorageError("NOT_IN_EDIT_MODE", f"File not open for editing: {path}",
|
|
hint="Use shed_lockedit_open() first. Note: shed_lockedit_save() CLOSES edit mode!")
|
|
|
|
# Check quota
|
|
size_diff = editzone_path.stat().st_size - (target.stat().st_size if target.exists() else 0)
|
|
if size_diff > 0:
|
|
if ctx.group_id:
|
|
self._core._check_group_quota(ctx.group_id, size_diff)
|
|
else:
|
|
self._core._check_quota(__user__, size_diff)
|
|
|
|
# Copy back to zone
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(editzone_path, target)
|
|
|
|
# Git commit if needed
|
|
if ctx.git_commit:
|
|
self._core._git_run(["add", "-A"], ctx.zone_root)
|
|
commit_msg = message or f"Edit {path}"
|
|
self._core._git_commit_as_user(ctx.zone_root, commit_msg, user_id)
|
|
|
|
# Cleanup
|
|
self._core._rm_with_empty_parents(editzone_path, ctx.editzone_base / "editzone")
|
|
lock_path.unlink(missing_ok=True)
|
|
|
|
return self._core._format_response(True, data={
|
|
"zone": ctx.zone_name,
|
|
"path": path,
|
|
"size": target.stat().st_size,
|
|
"committed": ctx.git_commit,
|
|
}, message=f"Saved and unlocked: {path}")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_lockedit_save")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_lockedit_cancel(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
group: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Cancel editing and release lock (discards changes).
|
|
|
|
:param zone: Target zone ("storage", "documents", or "group")
|
|
:param path: File path (relative to zone, don't include zone name!)
|
|
:param group: Group name/ID (required if zone="group")
|
|
:param allow_zone_in_path: Allow path starting with zone name (default: False)
|
|
:return: Cancel result as JSON
|
|
|
|
Examples:
|
|
shed_lockedit_cancel(zone="storage", path="config.json")
|
|
"""
|
|
try:
|
|
ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=True)
|
|
|
|
path = self._core._validate_relative_path(path, ctx.zone_name, allow_zone_in_path)
|
|
user_id = __user__.get("id", "")
|
|
|
|
# Verify lock ownership
|
|
lock_path = self._core._get_lock_path(ctx.editzone_base, path)
|
|
self._core._check_lock_owner(lock_path, user_id)
|
|
|
|
# Get editzone path
|
|
editzone_path = self._core._get_editzone_path(ctx.editzone_base, ctx.conv_id, path)
|
|
|
|
# Cleanup
|
|
if editzone_path.exists():
|
|
self._core._rm_with_empty_parents(editzone_path, ctx.editzone_base / "editzone")
|
|
lock_path.unlink(missing_ok=True)
|
|
|
|
return self._core._format_response(True, data={
|
|
"zone": ctx.zone_name,
|
|
"path": path,
|
|
"changes_discarded": True,
|
|
}, message=f"Edit cancelled, changes discarded: {path}")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_lockedit_cancel")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_move_uploads_to_storage(
|
|
self,
|
|
src: str,
|
|
dest: str,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Moves file from Uploads to Storage.
|
|
IMPORTANT: Call shed_import() first to import uploaded files!
|
|
|
|
:param src: Source path in Uploads (don't include zone name!)
|
|
:param dest: Destination path in Storage (don't include zone name!)
|
|
:param allow_zone_in_path: Allow paths starting with zone name (default: False)
|
|
:return: Confirmation as JSON
|
|
"""
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
conv_id = self._core._get_conv_id(__metadata__)
|
|
|
|
# Validate paths with zone name check
|
|
src = self._core._validate_relative_path(src, "Uploads", allow_zone_in_path)
|
|
dest = self._core._validate_relative_path(dest, "Storage", allow_zone_in_path)
|
|
|
|
src_chroot = user_root / "Uploads" / conv_id
|
|
dest_chroot = user_root / "Storage" / "data"
|
|
|
|
source = self._core._resolve_chroot_path(src_chroot, src)
|
|
target = self._core._resolve_chroot_path(dest_chroot, dest)
|
|
|
|
if not source.exists():
|
|
raise StorageError(
|
|
"FILE_NOT_FOUND",
|
|
f"File not found: {src}",
|
|
{"path": src, "uploads_dir": str(src_chroot)},
|
|
"Did you call shed_import(import_all=True) first? Files must be imported before moving."
|
|
)
|
|
|
|
# No quota check needed: move within user space doesn't change total usage
|
|
|
|
self._core._ensure_dir(dest_chroot)
|
|
self._core._ensure_dir(target.parent)
|
|
|
|
shutil.move(str(source), str(target))
|
|
|
|
return self._core._format_response(True, message=f"Moved: Uploads/{src} -> Storage/{dest}")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_move_uploads_to_storage")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_move_uploads_to_documents(
|
|
self,
|
|
src: str,
|
|
dest: str,
|
|
message: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Moves file from Uploads to Documents with Git commit.
|
|
IMPORTANT: Call shed_import() first to import uploaded files!
|
|
|
|
:param src: Source path in Uploads (don't include zone name!)
|
|
:param dest: Destination path in Documents (don't include zone name!)
|
|
:param message: Commit message
|
|
:param allow_zone_in_path: Allow paths starting with zone name (default: False)
|
|
:return: Confirmation as JSON
|
|
"""
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
conv_id = self._core._get_conv_id(__metadata__)
|
|
|
|
# Validate paths with zone name check
|
|
src = self._core._validate_relative_path(src, "Uploads", allow_zone_in_path)
|
|
dest = self._core._validate_relative_path(dest, "Documents", allow_zone_in_path)
|
|
|
|
src_chroot = user_root / "Uploads" / conv_id
|
|
dest_chroot = user_root / "Documents" / "data"
|
|
|
|
source = self._core._resolve_chroot_path(src_chroot, src)
|
|
target = self._core._resolve_chroot_path(dest_chroot, dest)
|
|
|
|
if not source.exists():
|
|
raise StorageError(
|
|
"FILE_NOT_FOUND",
|
|
f"File not found: {src}",
|
|
{"path": src, "uploads_dir": str(src_chroot)},
|
|
"Did you call shed_import(import_all=True) first? Files must be imported before moving."
|
|
)
|
|
|
|
# No quota check needed: move within user space doesn't change total usage
|
|
|
|
# Init Git
|
|
self._core._init_git_repo(dest_chroot)
|
|
|
|
self._core._ensure_dir(target.parent)
|
|
|
|
shutil.move(str(source), str(target))
|
|
|
|
# Commit
|
|
if not message:
|
|
message = f"Import {src}"
|
|
self._core._git_commit(dest_chroot, message)
|
|
|
|
return self._core._format_response(True, message=f"Moved and committed: Uploads/{src} -> Documents/{dest}")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_move_uploads_to_documents")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_copy_storage_to_documents(
|
|
self,
|
|
src: str,
|
|
dest: str,
|
|
message: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Copies from Storage to Documents with Git commit.
|
|
|
|
:param src: Source path in Storage (don't include zone name!)
|
|
:param dest: Destination path in Documents (don't include zone name!)
|
|
:param message: Commit message
|
|
:param allow_zone_in_path: Allow paths starting with zone name (default: False)
|
|
:return: Confirmation as JSON
|
|
"""
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
|
|
# Validate paths with zone name check
|
|
src = self._core._validate_relative_path(src, "Storage", allow_zone_in_path)
|
|
dest = self._core._validate_relative_path(dest, "Documents", allow_zone_in_path)
|
|
|
|
src_chroot = user_root / "Storage" / "data"
|
|
dest_chroot = user_root / "Documents" / "data"
|
|
|
|
source = self._core._resolve_chroot_path(src_chroot, src)
|
|
target = self._core._resolve_chroot_path(dest_chroot, dest)
|
|
|
|
if not source.exists():
|
|
raise StorageError("FILE_NOT_FOUND", f"File not found: {src}")
|
|
|
|
# Check quota before copy
|
|
self._core._check_quota(__user__, self._core._get_path_size(source))
|
|
|
|
# Init Git
|
|
self._core._init_git_repo(dest_chroot)
|
|
|
|
self._core._ensure_dir(target.parent)
|
|
|
|
if source.is_dir():
|
|
shutil.copytree(source, target)
|
|
else:
|
|
shutil.copy2(source, target)
|
|
|
|
# Commit
|
|
if not message:
|
|
message = f"Import from Storage: {src}"
|
|
self._core._git_commit(dest_chroot, message)
|
|
|
|
return self._core._format_response(True, message=f"Copied and committed: Storage/{src} -> Documents/{dest}")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_copy_storage_to_documents")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_move_documents_to_storage(
|
|
self,
|
|
src: str,
|
|
dest: str,
|
|
message: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Moves from Documents to Storage with git rm + commit.
|
|
|
|
:param src: Source path in Documents (don't include zone name!)
|
|
:param dest: Destination path in Storage (don't include zone name!)
|
|
:param message: Commit message
|
|
:param allow_zone_in_path: Allow paths starting with zone name (default: False)
|
|
:return: Confirmation as JSON
|
|
"""
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
|
|
# Validate paths with zone name check
|
|
src = self._core._validate_relative_path(src, "Documents", allow_zone_in_path)
|
|
dest = self._core._validate_relative_path(dest, "Storage", allow_zone_in_path)
|
|
|
|
src_chroot = user_root / "Documents" / "data"
|
|
dest_chroot = user_root / "Storage" / "data"
|
|
|
|
source = self._core._resolve_chroot_path(src_chroot, src)
|
|
target = self._core._resolve_chroot_path(dest_chroot, dest)
|
|
|
|
if not source.exists():
|
|
raise StorageError("FILE_NOT_FOUND", f"File not found: {src}")
|
|
|
|
# Check quota (move requires temporary duplication)
|
|
self._core._check_quota(__user__, self._core._get_path_size(source))
|
|
|
|
self._core._ensure_dir(dest_chroot)
|
|
self._core._ensure_dir(target.parent)
|
|
|
|
# Copy to Storage
|
|
if source.is_dir():
|
|
shutil.copytree(source, target)
|
|
else:
|
|
shutil.copy2(source, target)
|
|
|
|
# git rm in Documents via Layer 2
|
|
self._core._git_run(["rm", "-rf", src], src_chroot)
|
|
|
|
# Commit
|
|
if not message:
|
|
message = f"Move to Storage: {src}"
|
|
self._core._git_commit(src_chroot, message)
|
|
|
|
return self._core._format_response(True, message=f"Moved: Documents/{src} -> Storage/{dest}")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_move_documents_to_storage")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
# =========================================================================
|
|
# UTILITIES (5 functions)
|
|
# =========================================================================
|
|
|
|
async def shed_import(
|
|
self,
|
|
filename: str = "",
|
|
import_all: bool = False,
|
|
dest_subdir: str = "",
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
__files__: list = None,
|
|
__event_emitter__=None,
|
|
) -> str:
|
|
"""
|
|
STEP 1: Imports files from chat to Uploads/.
|
|
|
|
ALWAYS call this function first when user uploads a file!
|
|
|
|
:param filename: Import only this specific file
|
|
:param import_all: True to import ALL attached files
|
|
:param dest_subdir: Optional subdirectory in Uploads/
|
|
:return: List of imported files
|
|
|
|
Examples:
|
|
shed_import(import_all=True) -> import all attached files
|
|
shed_import(filename="report.pdf") -> import only report.pdf
|
|
"""
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
conv_id = self._core._get_conv_id(__metadata__)
|
|
uploads_dir = user_root / "Uploads" / conv_id
|
|
|
|
if dest_subdir:
|
|
# Validate dest_subdir
|
|
dest_subdir = self._core._validate_relative_path(dest_subdir, "Uploads", allow_zone_in_path)
|
|
if dest_subdir:
|
|
uploads_dir = uploads_dir / dest_subdir
|
|
|
|
self._core._ensure_dir(uploads_dir)
|
|
|
|
# Get files (try multiple sources)
|
|
files = __files__ or []
|
|
|
|
if not files:
|
|
files = __metadata__.get("files", [])
|
|
|
|
if not files:
|
|
return self._core._format_response(
|
|
False,
|
|
message="No files attached to conversation"
|
|
)
|
|
|
|
imported = []
|
|
errors = []
|
|
|
|
# Possible paths for Open WebUI files
|
|
owui_upload_paths = [
|
|
Path("/app/backend/data/uploads"),
|
|
Path("/app/backend/data/files"),
|
|
Path("/app/backend/data/cache/files"),
|
|
Path("/app/backend/data/cache/uploads"),
|
|
]
|
|
|
|
for file_info in files:
|
|
try:
|
|
file_path = None
|
|
file_name = None
|
|
file_id = None
|
|
user_id_from_file = None
|
|
|
|
if isinstance(file_info, dict):
|
|
# Open WebUI structure detected
|
|
file_name = file_info.get("name") or file_info.get("filename")
|
|
file_id = file_info.get("id")
|
|
|
|
# Search in nested "file" dict
|
|
nested_file = file_info.get("file")
|
|
if isinstance(nested_file, dict):
|
|
file_path = nested_file.get("path") or nested_file.get("file_path")
|
|
user_id_from_file = nested_file.get("user_id")
|
|
if not file_name:
|
|
file_name = nested_file.get("filename") or nested_file.get("name")
|
|
if not file_id:
|
|
file_id = nested_file.get("id")
|
|
|
|
# Try direct keys if not found
|
|
if not file_path:
|
|
file_path = file_info.get("path") or file_info.get("file_path")
|
|
|
|
# If no direct path, search file by ID
|
|
if not file_path and file_id:
|
|
# Search in different possible paths
|
|
for base_path in owui_upload_paths:
|
|
if not base_path.exists():
|
|
continue
|
|
|
|
# Format Open WebUI: {id}_{name}
|
|
if file_name:
|
|
candidate = base_path / f"{file_id}_{file_name}"
|
|
if candidate.exists():
|
|
file_path = str(candidate)
|
|
break
|
|
|
|
# Try: /base/file_id
|
|
candidate = base_path / file_id
|
|
if candidate.exists():
|
|
file_path = str(candidate)
|
|
break
|
|
|
|
# Try: /base/user_id/file_id
|
|
if user_id_from_file:
|
|
candidate = base_path / user_id_from_file / file_id
|
|
if candidate.exists():
|
|
file_path = str(candidate)
|
|
break
|
|
|
|
# Search by pattern {id}_*
|
|
for f in base_path.glob(f"{file_id}_*"):
|
|
file_path = str(f)
|
|
if not file_name:
|
|
file_name = f.name.split("_", 1)[1] if "_" in f.name else f.name
|
|
break
|
|
if file_path:
|
|
break
|
|
|
|
elif isinstance(file_info, str):
|
|
file_path = file_info
|
|
file_name = Path(file_info).name
|
|
|
|
if not file_name:
|
|
file_name = file_id or "unknown"
|
|
|
|
# Security: clean filename (prevent traversal)
|
|
file_name = Path(file_name).name # Keep only the name, not the path
|
|
if not file_name or file_name in (".", ".."):
|
|
file_name = file_id or "unknown"
|
|
|
|
# Filter if filename specified
|
|
if filename and file_name != filename:
|
|
continue
|
|
|
|
if not import_all and not filename:
|
|
continue
|
|
|
|
# Copy the file
|
|
if file_path and isinstance(file_path, str):
|
|
source = Path(file_path)
|
|
if source.exists():
|
|
# Security: reject symlinks
|
|
if source.is_symlink():
|
|
errors.append(f"{file_name}: symlinks not allowed")
|
|
continue
|
|
|
|
# Security: only regular files
|
|
if not source.is_file():
|
|
errors.append(f"{file_name}: not a regular file")
|
|
continue
|
|
|
|
# Check file size
|
|
file_size = source.stat().st_size
|
|
max_size = self.valves.max_file_size_mb * 1024 * 1024
|
|
if file_size > max_size:
|
|
errors.append(f"{file_name}: too large ({file_size / 1024 / 1024:.1f} MB > {self.valves.max_file_size_mb} MB)")
|
|
continue
|
|
|
|
# Check quota
|
|
try:
|
|
self._core._check_quota(__user__, file_size)
|
|
except StorageError as quota_error:
|
|
errors.append(f"{file_name}: {quota_error.message}")
|
|
continue
|
|
|
|
dest = uploads_dir / file_name
|
|
shutil.copy2(source, dest)
|
|
imported.append(file_name)
|
|
else:
|
|
errors.append(f"{file_name}: file not found")
|
|
else:
|
|
errors.append(f"{file_name}: source file not found")
|
|
|
|
except Exception as e:
|
|
errors.append(f"Error: {str(e)}")
|
|
|
|
if not imported:
|
|
return self._core._format_response(
|
|
False,
|
|
message="No matching files found",
|
|
data={"errors": errors} if errors else None
|
|
)
|
|
|
|
result_data = {"imported": imported, "count": len(imported)}
|
|
if errors:
|
|
result_data["errors"] = errors
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data=result_data,
|
|
message=f"Imported {len(imported)} file(s) to Uploads. Use shed_delete(zone='uploads', path='...') to remove."
|
|
)
|
|
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
# =========================================================================
|
|
# BUILTIN ZIP/UNZIP (Python zipfile - no external dependency)
|
|
# =========================================================================
|
|
|
|
async def shed_unzip(
|
|
self,
|
|
zone: str,
|
|
src: str,
|
|
dest: str = "",
|
|
src_zone: str = "",
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Extracts a ZIP archive using Python zipfile (builtin, no external dependency).
|
|
Works in Storage or Documents zones.
|
|
|
|
:param zone: Destination zone for extraction (Storage or Documents)
|
|
:param src: Path to ZIP file (relative to src_zone, or zone if src_zone is empty)
|
|
:param dest: Destination folder (relative to zone). Empty = same folder as ZIP
|
|
:param src_zone: Source zone where ZIP is located (Uploads, Storage, or Documents). Empty = same as zone
|
|
:return: List of extracted files as JSON
|
|
|
|
Example:
|
|
shed_unzip(zone="storage", src="downloads/repo.zip", dest="projects/repo")
|
|
shed_unzip(zone="storage", src="archive.zip", dest="extracted", src_zone="uploads")
|
|
"""
|
|
# Canonical zone names (with capital)
|
|
ZONE_NAMES = {"uploads": "Uploads", "storage": "Storage", "documents": "Documents"}
|
|
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
zone_lower = zone.lower()
|
|
|
|
# Validate destination zone (must be writable)
|
|
if zone_lower not in ("storage", "documents"):
|
|
raise StorageError(
|
|
"ZONE_FORBIDDEN",
|
|
f"Zone '{zone}' not allowed for unzip destination",
|
|
{"zone": zone},
|
|
"Use Storage or Documents"
|
|
)
|
|
zone_name = ZONE_NAMES[zone_lower]
|
|
|
|
# Get destination zone path
|
|
if zone_lower == "storage":
|
|
zone_root = user_root / "Storage" / "data"
|
|
else:
|
|
zone_root = user_root / "Documents" / "data"
|
|
|
|
# Determine source zone (defaults to destination zone if not specified)
|
|
src_zone_lower = src_zone.lower() if src_zone else zone_lower
|
|
|
|
# Validate source zone
|
|
if src_zone_lower not in ("uploads", "storage", "documents"):
|
|
raise StorageError(
|
|
"ZONE_FORBIDDEN",
|
|
f"Source zone '{src_zone}' not allowed",
|
|
{"src_zone": src_zone},
|
|
"Use Uploads, Storage, or Documents"
|
|
)
|
|
src_zone_name = ZONE_NAMES[src_zone_lower]
|
|
|
|
# Get source zone path
|
|
if src_zone_lower == "uploads":
|
|
conv_id = self._core._get_conv_id(__metadata__)
|
|
src_zone_root = user_root / "Uploads" / conv_id
|
|
elif src_zone_lower == "storage":
|
|
src_zone_root = user_root / "Storage" / "data"
|
|
else:
|
|
src_zone_root = user_root / "Documents" / "data"
|
|
|
|
# Validate and resolve paths
|
|
src = self._core._validate_relative_path(src, src_zone_name, allow_zone_in_path)
|
|
src_path = self._core._resolve_chroot_path(src_zone_root, src)
|
|
|
|
if not src_path.exists():
|
|
raise StorageError("FILE_NOT_FOUND", f"ZIP file not found: {src}")
|
|
|
|
if not src_path.suffix.lower() == ".zip":
|
|
raise StorageError(
|
|
"INVALID_FORMAT",
|
|
"File is not a ZIP archive",
|
|
{"file": src},
|
|
"Only .zip files are supported"
|
|
)
|
|
|
|
# Determine destination
|
|
if dest:
|
|
dest = self._core._validate_relative_path(dest, zone_name, allow_zone_in_path)
|
|
dest_path = self._core._resolve_chroot_path(zone_root, dest)
|
|
else:
|
|
dest_path = src_path.parent
|
|
|
|
# Check quota before extraction (estimate: 3x zip size)
|
|
zip_size = src_path.stat().st_size
|
|
self._core._check_quota(__user__, zip_size * 3)
|
|
|
|
# Extract
|
|
self._core._ensure_dir(dest_path)
|
|
extracted_files = []
|
|
|
|
with zipfile.ZipFile(src_path, 'r') as zf:
|
|
# Security: check for path traversal in zip entries (ZIP Slip prevention)
|
|
dest_resolved = dest_path.resolve()
|
|
for member in zf.namelist():
|
|
# Block absolute paths
|
|
if member.startswith('/'):
|
|
raise StorageError(
|
|
"PATH_ESCAPE",
|
|
f"ZIP contains absolute path: {member}",
|
|
{"member": member},
|
|
"ZIP file may be malicious (path traversal attempt)"
|
|
)
|
|
|
|
# Resolve the target path and verify it stays within dest
|
|
# This catches cases like "foo/../../../etc/passwd"
|
|
member_path = (dest_path / member).resolve()
|
|
try:
|
|
member_path.relative_to(dest_resolved)
|
|
except ValueError:
|
|
raise StorageError(
|
|
"PATH_ESCAPE",
|
|
f"ZIP contains path traversal: {member}",
|
|
{"member": member, "resolved": str(member_path)},
|
|
"ZIP file may be malicious (escapes destination directory)"
|
|
)
|
|
|
|
# Extract all files (safe after validation)
|
|
zf.extractall(dest_path)
|
|
extracted_files = zf.namelist()
|
|
|
|
# Git commit if Documents
|
|
if zone_lower == "documents":
|
|
docs_data = user_root / "Documents" / "data"
|
|
self._core._git_run(["add", "-A"], cwd=docs_data)
|
|
src_info = f"{src_zone_name}:{src}" if src_zone_lower != zone_lower else src
|
|
self._core._git_run(
|
|
["commit", "-m", f"Extracted {src_info} to {dest or 'same folder'}", "--allow-empty"],
|
|
cwd=docs_data
|
|
)
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data={
|
|
"source": src,
|
|
"source_zone": src_zone_name,
|
|
"destination": str(dest_path.relative_to(zone_root)),
|
|
"destination_zone": zone_name,
|
|
"files_count": len(extracted_files),
|
|
"files": extracted_files[:50], # Limit to first 50
|
|
"truncated": len(extracted_files) > 50,
|
|
},
|
|
message=f"Extracted {len(extracted_files)} files from {src_zone_name} to {zone_name}"
|
|
)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_unzip")
|
|
except zipfile.BadZipFile:
|
|
return self._core._format_response(False, message="Invalid or corrupted ZIP file")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_zip(
|
|
self,
|
|
zone: str,
|
|
src: str,
|
|
dest: str = "",
|
|
include_empty_dirs: bool = False,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Creates a ZIP archive using Python zipfile (builtin, no external dependency).
|
|
Works in Storage or Documents zones.
|
|
|
|
:param zone: Source zone ("storage" or "documents")
|
|
:param src: File or folder to compress (relative to zone)
|
|
:param dest: Destination ZIP path (relative to zone). Empty = src + ".zip"
|
|
:param include_empty_dirs: Include empty directories in archive (default: False, like standard zip)
|
|
:return: Path to created ZIP as JSON
|
|
|
|
Example:
|
|
shed_zip(zone="storage", src="projects/myapp", dest="archives/myapp.zip")
|
|
shed_zip(zone="storage", src="projects", dest="backup.zip", include_empty_dirs=True)
|
|
"""
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
zone_lower = zone.lower()
|
|
|
|
# Validate zone
|
|
if zone_lower not in ("storage", "documents"):
|
|
raise StorageError(
|
|
"ZONE_FORBIDDEN",
|
|
f"Zone '{zone}' not allowed for zip",
|
|
{"zone": zone},
|
|
"Use 'storage' or 'documents'"
|
|
)
|
|
|
|
# Get zone path
|
|
if zone_lower == "storage":
|
|
zone_root = user_root / "Storage" / "data"
|
|
zone_name = "Storage"
|
|
else:
|
|
zone_root = user_root / "Documents" / "data"
|
|
zone_name = "Documents"
|
|
|
|
# Validate and resolve source path
|
|
src = self._core._validate_relative_path(src, zone_name, allow_zone_in_path)
|
|
src_path = self._core._resolve_chroot_path(zone_root, src)
|
|
|
|
if not src_path.exists():
|
|
raise StorageError("FILE_NOT_FOUND", f"Source not found: {src}")
|
|
|
|
# Determine destination
|
|
if dest:
|
|
dest = self._core._validate_relative_path(dest, zone_name, allow_zone_in_path)
|
|
if not dest.endswith('.zip'):
|
|
dest += '.zip'
|
|
dest_path = self._core._resolve_chroot_path(zone_root, dest)
|
|
else:
|
|
dest_path = src_path.parent / (src_path.name + ".zip")
|
|
|
|
# Check quota (estimate: same size as source)
|
|
src_size = self._core._get_path_size(src_path)
|
|
self._core._check_quota(__user__, src_size)
|
|
|
|
# Create ZIP
|
|
self._core._ensure_dir(dest_path.parent)
|
|
files_added = 0
|
|
dirs_added = 0
|
|
|
|
with zipfile.ZipFile(dest_path, 'w', zipfile.ZIP_DEFLATED) as zf:
|
|
if src_path.is_file():
|
|
zf.write(src_path, src_path.name)
|
|
files_added = 1
|
|
else:
|
|
# Add directory recursively
|
|
for item_path in src_path.rglob('*'):
|
|
arcname = item_path.relative_to(src_path.parent)
|
|
if item_path.is_file():
|
|
zf.write(item_path, arcname)
|
|
files_added += 1
|
|
elif item_path.is_dir() and include_empty_dirs:
|
|
# Check if directory is empty (no files, only subdirs or nothing)
|
|
has_files = any(p.is_file() for p in item_path.rglob('*'))
|
|
if not has_files:
|
|
# Add empty directory entry (path must end with /)
|
|
zf.writestr(str(arcname) + '/', '')
|
|
dirs_added += 1
|
|
|
|
# Git commit if Documents
|
|
if zone_lower == "documents":
|
|
docs_data = user_root / "Documents" / "data"
|
|
self._core._git_run(["add", "-A"], cwd=docs_data)
|
|
self._core._git_run(
|
|
["commit", "-m", f"Created ZIP: {dest_path.name}", "--allow-empty"],
|
|
cwd=docs_data
|
|
)
|
|
|
|
zip_size = dest_path.stat().st_size
|
|
|
|
response_data = {
|
|
"source": src,
|
|
"zip_path": str(dest_path.relative_to(zone_root)),
|
|
"files_count": files_added,
|
|
"size_bytes": zip_size,
|
|
"size_human": f"{zip_size / 1024 / 1024:.2f} MB" if zip_size > 1024*1024 else f"{zip_size / 1024:.1f} KB",
|
|
}
|
|
|
|
if dirs_added > 0:
|
|
response_data["empty_dirs_count"] = dirs_added
|
|
|
|
message = f"Created ZIP with {files_added} files"
|
|
if dirs_added > 0:
|
|
message += f" and {dirs_added} empty directories"
|
|
|
|
return self._core._format_response(True, data=response_data, message=message)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_zip")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
# =========================================================================
|
|
# BUILTIN UTILITIES - Replace missing system commands (5 functions)
|
|
# =========================================================================
|
|
|
|
async def shed_tree(
|
|
self,
|
|
zone: str,
|
|
path: str = ".",
|
|
depth: int = 3,
|
|
group: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Displays directory tree structure (replaces missing 'tree' command).
|
|
|
|
:param zone: Target zone ("uploads", "storage", "documents", or "group")
|
|
:param path: Starting path (default: root of zone)
|
|
:param depth: Maximum depth to display (default: 3, max: 10)
|
|
:param group: Group name (required if zone="group")
|
|
:return: Tree structure as text
|
|
|
|
Example:
|
|
shed_tree(zone="storage", path="projects", depth=2)
|
|
shed_tree(zone="group", group="MyTeam", path="docs")
|
|
"""
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
conv_id = self._core._get_conv_id(__metadata__)
|
|
zone_lower = zone.lower()
|
|
|
|
# Validate zone
|
|
if zone_lower == "uploads":
|
|
zone_root = user_root / "Uploads" / conv_id
|
|
zone_name = "Uploads"
|
|
elif zone_lower == "storage":
|
|
zone_root = user_root / "Storage" / "data"
|
|
zone_name = "Storage"
|
|
elif zone_lower == "documents":
|
|
zone_root = user_root / "Documents" / "data"
|
|
zone_name = "Documents"
|
|
elif zone_lower == "group":
|
|
if not group:
|
|
raise StorageError(
|
|
"MISSING_PARAMETER",
|
|
"Group name is required for zone='group'",
|
|
hint="Use: shed_tree(zone='group', group='GroupName', path='...')"
|
|
)
|
|
# Resolve group and check membership
|
|
group_id = self._core._validate_group_id(group)
|
|
self._core._check_group_access(__user__, group_id)
|
|
zone_root = self._core._get_group_data_path(group_id)
|
|
zone_name = f"Group:{group_id}"
|
|
else:
|
|
raise StorageError(
|
|
"ZONE_FORBIDDEN",
|
|
f"Invalid zone: {zone}",
|
|
hint="Use 'uploads', 'storage', 'documents', or 'group'"
|
|
)
|
|
|
|
if not zone_root.exists():
|
|
return self._core._format_response(True, data={"tree": "(empty)"}, message="Zone is empty")
|
|
|
|
# Validate and resolve path
|
|
path = self._core._validate_relative_path(path, zone_name, allow_zone_in_path) if path and path != "." else ""
|
|
start_path = self._core._resolve_chroot_path(zone_root, path) if path else zone_root
|
|
|
|
if not start_path.exists():
|
|
raise StorageError("FILE_NOT_FOUND", f"Path not found: {path}")
|
|
|
|
# Clamp depth
|
|
depth = max(1, min(depth, 10))
|
|
|
|
# Build tree
|
|
def build_tree(current: Path, prefix: str = "", current_depth: int = 0) -> list:
|
|
if current_depth >= depth:
|
|
return []
|
|
|
|
lines = []
|
|
try:
|
|
items = sorted(current.iterdir(), key=lambda x: (not x.is_dir(), x.name.lower()))
|
|
except PermissionError:
|
|
return [f"{prefix}[permission denied]"]
|
|
|
|
# Filter out hidden files and limit items
|
|
items = [i for i in items if not i.name.startswith('.')]
|
|
total = len(items)
|
|
|
|
for idx, item in enumerate(items[:100]): # Limit to 100 items per dir
|
|
is_last = (idx == len(items[:100]) - 1) or (idx == 99 and total > 100)
|
|
connector = "└── " if is_last else "├── "
|
|
|
|
if item.is_dir():
|
|
lines.append(f"{prefix}{connector}{item.name}/")
|
|
if current_depth + 1 < depth:
|
|
extension = " " if is_last else "│ "
|
|
lines.extend(build_tree(item, prefix + extension, current_depth + 1))
|
|
else:
|
|
try:
|
|
size = item.stat().st_size
|
|
size_str = f"{size / 1024 / 1024:.1f}M" if size > 1024*1024 else f"{size / 1024:.1f}K" if size > 1024 else f"{size}B"
|
|
except (OSError, FileNotFoundError):
|
|
size_str = "?"
|
|
lines.append(f"{prefix}{connector}{item.name} ({size_str})")
|
|
|
|
if total > 100:
|
|
lines.append(f"{prefix}... and {total - 100} more items")
|
|
|
|
return lines
|
|
|
|
# Generate tree
|
|
root_name = start_path.name if path else (group if zone_lower == "group" else zone_lower.capitalize())
|
|
tree_lines = [f"{root_name}/"]
|
|
tree_lines.extend(build_tree(start_path))
|
|
tree_output = "\n".join(tree_lines)
|
|
|
|
zone_display = f"Group:{group}" if zone_lower == "group" else zone_lower.capitalize()
|
|
return self._core._format_response(
|
|
True,
|
|
data={"tree": tree_output, "depth": depth, "path": path or ".", "zone": zone_display},
|
|
message=f"Tree of {zone_display}/{path or '.'} (depth={depth})"
|
|
)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_tree")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_zipinfo(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Shows ZIP archive contents and metadata (replaces missing 'zipinfo' command).
|
|
|
|
:param zone: Target zone ("uploads", "storage", or "documents")
|
|
:param path: Path to ZIP file
|
|
:return: ZIP contents and metadata as JSON
|
|
|
|
Example:
|
|
shed_zipinfo(zone="storage", path="backup.zip")
|
|
"""
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
zone_lower = zone.lower()
|
|
|
|
# Validate zone
|
|
if zone_lower == "storage":
|
|
zone_root = user_root / "Storage" / "data"
|
|
zone_name = "Storage"
|
|
elif zone_lower == "documents":
|
|
zone_root = user_root / "Documents" / "data"
|
|
zone_name = "Documents"
|
|
elif zone_lower == "uploads":
|
|
conv_id = self._core._get_conv_id(__metadata__)
|
|
zone_root = user_root / "Uploads" / conv_id
|
|
zone_name = "Uploads"
|
|
else:
|
|
raise StorageError(
|
|
"ZONE_FORBIDDEN",
|
|
f"Invalid zone: {zone}",
|
|
hint="Use 'uploads', 'storage', or 'documents'"
|
|
)
|
|
|
|
# Validate and resolve path
|
|
path = self._core._validate_relative_path(path, zone_name, allow_zone_in_path)
|
|
zip_path = self._core._resolve_chroot_path(zone_root, path)
|
|
|
|
if not zip_path.exists():
|
|
raise StorageError("FILE_NOT_FOUND", f"File not found: {path}")
|
|
|
|
if not zip_path.suffix.lower() == ".zip":
|
|
raise StorageError(
|
|
"INVALID_FORMAT",
|
|
"File is not a ZIP archive",
|
|
hint="Only .zip files are supported"
|
|
)
|
|
|
|
# Read ZIP info
|
|
with zipfile.ZipFile(zip_path, 'r') as zf:
|
|
files = []
|
|
total_size = 0
|
|
total_compressed = 0
|
|
|
|
for info in zf.infolist():
|
|
total_size += info.file_size
|
|
total_compressed += info.compress_size
|
|
|
|
files.append({
|
|
"name": info.filename,
|
|
"size": info.file_size,
|
|
"compressed": info.compress_size,
|
|
"ratio": f"{(1 - info.compress_size / info.file_size) * 100:.1f}%" if info.file_size > 0 else "0%",
|
|
"date": f"{info.date_time[0]:04d}-{info.date_time[1]:02d}-{info.date_time[2]:02d} {info.date_time[3]:02d}:{info.date_time[4]:02d}",
|
|
"is_dir": info.filename.endswith('/'),
|
|
})
|
|
|
|
# Summary
|
|
ratio = (1 - total_compressed / total_size) * 100 if total_size > 0 else 0
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data={
|
|
"path": path,
|
|
"files_count": len(files),
|
|
"total_size": total_size,
|
|
"total_size_human": f"{total_size / 1024 / 1024:.2f} MB" if total_size > 1024*1024 else f"{total_size / 1024:.1f} KB",
|
|
"compressed_size": total_compressed,
|
|
"compression_ratio": f"{ratio:.1f}%",
|
|
"files": files[:100], # Limit to 100
|
|
"truncated": len(files) > 100,
|
|
},
|
|
message=f"ZIP contains {len(files)} files ({ratio:.1f}% compression)"
|
|
)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_zipinfo")
|
|
except zipfile.BadZipFile:
|
|
return self._core._format_response(False, message="Invalid or corrupted ZIP file")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_file_type(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Identifies file MIME type (replaces missing 'file' command).
|
|
|
|
:param zone: Target zone ("uploads", "storage", or "documents")
|
|
:param path: Path to file
|
|
:return: File type information as JSON
|
|
|
|
Example:
|
|
shed_file_type(zone="storage", path="document.pdf")
|
|
"""
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
conv_id = self._core._get_conv_id(__metadata__)
|
|
zone_lower = zone.lower()
|
|
|
|
# Validate zone
|
|
if zone_lower == "uploads":
|
|
zone_root = user_root / "Uploads" / conv_id
|
|
zone_name = "Uploads"
|
|
elif zone_lower == "storage":
|
|
zone_root = user_root / "Storage" / "data"
|
|
zone_name = "Storage"
|
|
elif zone_lower == "documents":
|
|
zone_root = user_root / "Documents" / "data"
|
|
zone_name = "Documents"
|
|
else:
|
|
raise StorageError(
|
|
"ZONE_FORBIDDEN",
|
|
f"Invalid zone: {zone}",
|
|
hint="Use 'uploads', 'storage', or 'documents'"
|
|
)
|
|
|
|
# Validate and resolve path
|
|
path = self._core._validate_relative_path(path, zone_name, allow_zone_in_path)
|
|
file_path = self._core._resolve_chroot_path(zone_root, path)
|
|
|
|
if not file_path.exists():
|
|
raise StorageError("FILE_NOT_FOUND", f"File not found: {path}")
|
|
|
|
if file_path.is_dir():
|
|
return self._core._format_response(
|
|
True,
|
|
data={"path": path, "type": "directory", "mime": "inode/directory"},
|
|
message="Directory"
|
|
)
|
|
|
|
# Get MIME type from extension
|
|
mime_type, encoding = mimetypes.guess_type(str(file_path))
|
|
|
|
# Read magic bytes for common formats
|
|
magic_info = None
|
|
try:
|
|
with open(file_path, 'rb') as f:
|
|
header = f.read(16)
|
|
|
|
# Common magic bytes
|
|
magic_signatures = {
|
|
b'\x89PNG\r\n\x1a\n': 'PNG image',
|
|
b'\xff\xd8\xff': 'JPEG image',
|
|
b'GIF87a': 'GIF image (87a)',
|
|
b'GIF89a': 'GIF image (89a)',
|
|
b'PK\x03\x04': 'ZIP archive (or DOCX/XLSX/PPTX/JAR)',
|
|
b'PK\x05\x06': 'ZIP archive (empty)',
|
|
b'%PDF': 'PDF document',
|
|
b'\x7fELF': 'ELF executable',
|
|
b'#!': 'Script (shebang)',
|
|
b'\x1f\x8b': 'Gzip compressed',
|
|
b'BZ': 'Bzip2 compressed',
|
|
b'\xfd7zXZ': 'XZ compressed',
|
|
b'Rar!': 'RAR archive',
|
|
b'7z\xbc\xaf': '7-Zip archive',
|
|
b'\x00\x00\x00\x1c\x66\x74\x79\x70': 'MP4/MOV video',
|
|
b'\x00\x00\x00\x20\x66\x74\x79\x70': 'MP4 video',
|
|
b'ID3': 'MP3 audio (ID3)',
|
|
b'\xff\xfb': 'MP3 audio',
|
|
b'OggS': 'Ogg container',
|
|
b'RIFF': 'RIFF container (WAV/AVI)',
|
|
b'SQLite format 3': 'SQLite database',
|
|
}
|
|
|
|
for sig, desc in magic_signatures.items():
|
|
if header.startswith(sig):
|
|
magic_info = desc
|
|
break
|
|
|
|
# Check for text
|
|
if not magic_info:
|
|
try:
|
|
# Try to decode as UTF-8
|
|
with open(file_path, 'r', encoding='utf-8') as tf:
|
|
tf.read(1024)
|
|
magic_info = "Text file (UTF-8)"
|
|
except UnicodeDecodeError:
|
|
magic_info = "Binary file"
|
|
except Exception:
|
|
pass
|
|
|
|
# Get file stats
|
|
stat = file_path.stat()
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data={
|
|
"path": path,
|
|
"mime": mime_type or "application/octet-stream",
|
|
"encoding": encoding,
|
|
"magic": magic_info,
|
|
"extension": file_path.suffix,
|
|
"size": stat.st_size,
|
|
"size_human": f"{stat.st_size / 1024 / 1024:.2f} MB" if stat.st_size > 1024*1024 else f"{stat.st_size / 1024:.1f} KB" if stat.st_size > 1024 else f"{stat.st_size} B",
|
|
},
|
|
message=f"{magic_info or mime_type or 'Unknown type'}"
|
|
)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_file_type")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_convert_eol(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
to: str = "unix",
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Converts line endings (replaces missing 'dos2unix'/'unix2dos' commands).
|
|
|
|
:param zone: Target zone ("storage" or "documents")
|
|
:param path: Path to text file
|
|
:param to: Target format: "unix" (LF) or "dos" (CRLF)
|
|
:return: Conversion result as JSON
|
|
|
|
Example:
|
|
shed_convert_eol(zone="storage", path="script.sh", to="unix")
|
|
shed_convert_eol(zone="storage", path="readme.txt", to="dos")
|
|
"""
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
zone_lower = zone.lower()
|
|
|
|
# Validate zone (not uploads - read-only)
|
|
if zone_lower == "storage":
|
|
zone_root = user_root / "Storage" / "data"
|
|
zone_name = "Storage"
|
|
elif zone_lower == "documents":
|
|
zone_root = user_root / "Documents" / "data"
|
|
zone_name = "Documents"
|
|
else:
|
|
raise StorageError(
|
|
"ZONE_FORBIDDEN",
|
|
f"Invalid zone for writing: {zone}",
|
|
hint="Use 'storage' or 'documents'"
|
|
)
|
|
|
|
# Validate target format
|
|
to_lower = to.lower()
|
|
if to_lower not in ("unix", "dos", "lf", "crlf"):
|
|
raise StorageError(
|
|
"INVALID_MODE",
|
|
f"Invalid EOL format: {to}",
|
|
hint="Use 'unix' (LF) or 'dos' (CRLF)"
|
|
)
|
|
|
|
# Normalize format name
|
|
to_unix = to_lower in ("unix", "lf")
|
|
|
|
# Validate and resolve path
|
|
path = self._core._validate_relative_path(path, zone_name, allow_zone_in_path)
|
|
file_path = self._core._resolve_chroot_path(zone_root, path)
|
|
|
|
if not file_path.exists():
|
|
raise StorageError("FILE_NOT_FOUND", f"File not found: {path}")
|
|
|
|
if file_path.is_dir():
|
|
raise StorageError("INVALID_FORMAT", "Cannot convert directory")
|
|
|
|
# Read file
|
|
try:
|
|
content = file_path.read_bytes()
|
|
except Exception as e:
|
|
raise StorageError("EXEC_ERROR", f"Cannot read file: {e}")
|
|
|
|
# Count existing line endings
|
|
crlf_count = content.count(b'\r\n')
|
|
lf_only_count = content.count(b'\n') - crlf_count
|
|
cr_only_count = content.count(b'\r') - crlf_count
|
|
|
|
original_format = "mixed"
|
|
if crlf_count > 0 and lf_only_count == 0:
|
|
original_format = "dos"
|
|
elif lf_only_count > 0 and crlf_count == 0:
|
|
original_format = "unix"
|
|
elif cr_only_count > 0 and crlf_count == 0 and lf_only_count == 0:
|
|
original_format = "mac (old)"
|
|
|
|
# Convert
|
|
if to_unix:
|
|
# To Unix: CRLF -> LF, CR -> LF
|
|
new_content = content.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
|
|
target_format = "unix"
|
|
else:
|
|
# To DOS: First normalize to LF, then convert to CRLF
|
|
normalized = content.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
|
|
new_content = normalized.replace(b'\n', b'\r\n')
|
|
target_format = "dos"
|
|
|
|
# Check if changed
|
|
if new_content == content:
|
|
return self._core._format_response(
|
|
True,
|
|
data={"path": path, "format": original_format, "changed": False},
|
|
message=f"File already in {target_format} format"
|
|
)
|
|
|
|
# Write back
|
|
file_path.write_bytes(new_content)
|
|
|
|
# Git commit if Documents
|
|
if zone_lower == "documents":
|
|
self._core._git_commit(zone_root, f"Convert EOL to {target_format}: {path}")
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data={
|
|
"path": path,
|
|
"original_format": original_format,
|
|
"new_format": target_format,
|
|
"changed": True,
|
|
"lines_converted": crlf_count if to_unix else lf_only_count,
|
|
},
|
|
message=f"Converted {path} from {original_format} to {target_format}"
|
|
)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_convert_eol")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_hexdump(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
offset: int = 0,
|
|
length: int = 256,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Shows hexadecimal dump of file (replaces missing 'xxd'/'hexdump' commands).
|
|
|
|
:param zone: Target zone ("uploads", "storage", or "documents")
|
|
:param path: Path to file
|
|
:param offset: Starting offset in bytes (default: 0)
|
|
:param length: Number of bytes to display (default: 256, max: 4096)
|
|
:return: Hex dump as text
|
|
|
|
Example:
|
|
shed_hexdump(zone="storage", path="binary.dat", offset=0, length=128)
|
|
"""
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
conv_id = self._core._get_conv_id(__metadata__)
|
|
zone_lower = zone.lower()
|
|
|
|
# Validate zone
|
|
if zone_lower == "uploads":
|
|
zone_root = user_root / "Uploads" / conv_id
|
|
zone_name = "Uploads"
|
|
elif zone_lower == "storage":
|
|
zone_root = user_root / "Storage" / "data"
|
|
zone_name = "Storage"
|
|
elif zone_lower == "documents":
|
|
zone_root = user_root / "Documents" / "data"
|
|
zone_name = "Documents"
|
|
else:
|
|
raise StorageError(
|
|
"ZONE_FORBIDDEN",
|
|
f"Invalid zone: {zone}",
|
|
hint="Use 'uploads', 'storage', or 'documents'"
|
|
)
|
|
|
|
# Validate and resolve path
|
|
path = self._core._validate_relative_path(path, zone_name, allow_zone_in_path)
|
|
file_path = self._core._resolve_chroot_path(zone_root, path)
|
|
|
|
if not file_path.exists():
|
|
raise StorageError("FILE_NOT_FOUND", f"File not found: {path}")
|
|
|
|
if file_path.is_dir():
|
|
raise StorageError("INVALID_FORMAT", "Cannot hexdump directory")
|
|
|
|
# Clamp values
|
|
offset = max(0, offset)
|
|
length = max(1, min(length, 4096))
|
|
|
|
# Read file portion
|
|
file_size = file_path.stat().st_size
|
|
|
|
if offset >= file_size:
|
|
return self._core._format_response(
|
|
True,
|
|
data={"path": path, "offset": offset, "size": file_size, "hexdump": "(offset beyond file)"},
|
|
message="Offset is beyond end of file"
|
|
)
|
|
|
|
with open(file_path, 'rb') as f:
|
|
f.seek(offset)
|
|
data = f.read(length)
|
|
|
|
# Format hex dump (xxd style)
|
|
lines = []
|
|
for i in range(0, len(data), 16):
|
|
chunk = data[i:i+16]
|
|
addr = f"{offset + i:08x}"
|
|
|
|
# Hex part
|
|
hex_parts = []
|
|
for j in range(0, 16, 2):
|
|
if j < len(chunk):
|
|
if j + 1 < len(chunk):
|
|
hex_parts.append(f"{chunk[j]:02x}{chunk[j+1]:02x}")
|
|
else:
|
|
hex_parts.append(f"{chunk[j]:02x} ")
|
|
else:
|
|
hex_parts.append(" ")
|
|
hex_str = " ".join(hex_parts)
|
|
|
|
# ASCII part
|
|
ascii_str = ""
|
|
for b in chunk:
|
|
if 32 <= b < 127:
|
|
ascii_str += chr(b)
|
|
else:
|
|
ascii_str += "."
|
|
|
|
lines.append(f"{addr}: {hex_str} {ascii_str}")
|
|
|
|
hexdump_output = "\n".join(lines)
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data={
|
|
"path": path,
|
|
"offset": offset,
|
|
"length": len(data),
|
|
"file_size": file_size,
|
|
"hexdump": hexdump_output,
|
|
},
|
|
message=f"Hexdump of {path} ({len(data)} bytes from offset {offset})"
|
|
)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_hexdump")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_sqlite(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
query: str = None,
|
|
params: list = None,
|
|
limit: int = None,
|
|
output_csv: str = None,
|
|
import_csv: str = None,
|
|
table: str = None,
|
|
if_exists: str = "fail",
|
|
delimiter: str = None,
|
|
encoding: str = None,
|
|
date_columns: list = None,
|
|
date_format: str = None,
|
|
decimal: str = None,
|
|
skip_rows: int = 0,
|
|
has_header: bool = True,
|
|
group: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Executes SQL query on a SQLite database file OR imports a CSV file.
|
|
Python builtin using sqlite3 module, no external dependency.
|
|
|
|
⚠️ CONTEXT PROTECTION: SELECT queries without LIMIT return only 10 rows by default!
|
|
Use limit=N for more rows, or output_csv="file.csv" to export all results to file.
|
|
|
|
:param zone: Target zone ("uploads", "storage", "documents", or "group")
|
|
:param path: Path to .db file (created if not exists for write queries)
|
|
:param query: SQL query to execute (optional if import_csv is provided)
|
|
:param params: Optional list of parameters for parameterized queries (prevents SQL injection)
|
|
:param limit: Max rows to return for SELECT (default: 10 if no LIMIT in query). Use limit=0 for no limit (dangerous!).
|
|
:param output_csv: Export ALL results to this CSV file instead of returning rows (prevents context pollution)
|
|
:param import_csv: Path to CSV file to import (in same zone). If provided, imports CSV instead of running query.
|
|
:param table: Table name for CSV import (required if import_csv is provided)
|
|
:param if_exists: What to do if table exists: "fail" (error), "replace" (drop+recreate), "append" (add rows)
|
|
:param delimiter: CSV delimiter. None=auto-detect, or specify: ",", ";", "\\t", "|"
|
|
:param encoding: File encoding. None=auto-detect (tries utf-8, latin-1, cp1252), or specify: "utf-8", "latin-1", "cp1252", etc.
|
|
:param date_columns: List of column names to parse as dates, e.g. ["created_at", "updated_at"]
|
|
:param date_format: Date format: None=auto, "dayfirst" (DD/MM/YYYY), "monthfirst" (MM/DD/YYYY), or strptime format like "%d/%m/%Y"
|
|
:param decimal: Decimal separator for numbers. None="." (default), or "," for European format (1.234,56)
|
|
:param skip_rows: Number of rows to skip at the beginning (before header). Default: 0
|
|
:param has_header: True if first row (after skip_rows) contains column names, False if data only. Default: True. ⚠️ If False, columns are named col_1, col_2, etc.
|
|
:param group: Group ID or name (required if zone="group")
|
|
:return: Query results or import stats as JSON
|
|
|
|
Examples:
|
|
# === SQL QUERIES ===
|
|
|
|
# Basic SELECT (returns max 10 rows by default)
|
|
shed_sqlite(zone="storage", path="data.db", query="SELECT * FROM users")
|
|
# → Returns 10 rows + warning if more exist
|
|
|
|
# Request more rows explicitly
|
|
shed_sqlite(zone="storage", path="data.db", query="SELECT * FROM users", limit=100)
|
|
# → Returns up to 100 rows
|
|
|
|
# Use LIMIT in SQL (respected as-is)
|
|
shed_sqlite(zone="storage", path="data.db", query="SELECT * FROM users LIMIT 50")
|
|
# → Returns 50 rows, no warning
|
|
|
|
# Export ALL results to CSV (no context pollution!)
|
|
shed_sqlite(zone="storage", path="data.db",
|
|
query="SELECT * FROM users", output_csv="users_export.csv")
|
|
# → Writes all rows to file, returns stats only
|
|
|
|
# Create a table
|
|
shed_sqlite(zone="storage", path="data.db",
|
|
query="CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY, name TEXT, email TEXT)")
|
|
|
|
# Insert with parameters (safe from SQL injection)
|
|
shed_sqlite(zone="storage", path="data.db",
|
|
query="INSERT INTO users (name, email) VALUES (?, ?)",
|
|
params=["Alice", "alice@example.com"])
|
|
|
|
# === CSV IMPORT (FAST, NO CONTEXT POLLUTION) ===
|
|
|
|
# Basic import (auto-detects delimiter and encoding)
|
|
shed_sqlite(zone="storage", path="data.db",
|
|
import_csv="users.csv", table="users")
|
|
|
|
# French/European CSV (semicolon, comma decimal, latin-1)
|
|
shed_sqlite(zone="storage", path="data.db",
|
|
import_csv="french_data.csv", table="sales",
|
|
delimiter=";", decimal=",", encoding="latin-1")
|
|
|
|
# With date parsing (European format DD/MM/YYYY)
|
|
shed_sqlite(zone="storage", path="data.db",
|
|
import_csv="orders.csv", table="orders",
|
|
date_columns=["order_date", "ship_date"], date_format="dayfirst")
|
|
|
|
# TSV file (tab-separated)
|
|
shed_sqlite(zone="storage", path="data.db",
|
|
import_csv="data.tsv", table="data", delimiter="\\t")
|
|
|
|
# Skip header rows (e.g., file has title + empty row before headers)
|
|
shed_sqlite(zone="storage", path="data.db",
|
|
import_csv="report.csv", table="report", skip_rows=2)
|
|
|
|
# CSV without header row (data only, columns named col_1, col_2, ...)
|
|
shed_sqlite(zone="storage", path="data.db",
|
|
import_csv="raw_data.csv", table="raw", has_header=False)
|
|
|
|
# Replace existing table
|
|
shed_sqlite(zone="storage", path="data.db",
|
|
import_csv="updated.csv", table="users", if_exists="replace")
|
|
|
|
Note: Uses pandas if available (best auto-detection), falls back to csv module.
|
|
CSV import keeps data on disk - no context pollution!
|
|
"""
|
|
try:
|
|
# Use centralized zone resolution
|
|
ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=False)
|
|
zone_root = ctx.zone_root
|
|
zone_name = ctx.zone_name
|
|
readonly = ctx.readonly
|
|
|
|
# Validate and resolve path
|
|
path = self._core._validate_relative_path(path, zone_name, allow_zone_in_path)
|
|
db_path = self._core._resolve_chroot_path(zone_root, path)
|
|
|
|
# Ensure parent directory exists
|
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# =====================================================
|
|
# CSV IMPORT MODE
|
|
# =====================================================
|
|
if import_csv:
|
|
# Validate parameters
|
|
if not table:
|
|
raise StorageError(
|
|
"MISSING_PARAMETER",
|
|
"table parameter required for CSV import",
|
|
hint="Add table='tablename' parameter"
|
|
)
|
|
|
|
if if_exists not in ("fail", "replace", "append"):
|
|
raise StorageError(
|
|
"INVALID_PARAMETER",
|
|
f"Invalid if_exists value: {if_exists}",
|
|
hint="Use 'fail', 'replace', or 'append'"
|
|
)
|
|
|
|
# Block CSV import in readonly zones
|
|
if readonly:
|
|
raise StorageError(
|
|
"ZONE_FORBIDDEN",
|
|
"CSV import not allowed in Uploads zone",
|
|
hint="Move the CSV to Storage or Documents first, then import"
|
|
)
|
|
|
|
# Validate table name (prevent SQL injection)
|
|
if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', table):
|
|
raise StorageError(
|
|
"INVALID_PARAMETER",
|
|
"Invalid table name",
|
|
{"table": table},
|
|
hint="Table name must be alphanumeric with underscores, starting with letter or underscore"
|
|
)
|
|
|
|
# Resolve CSV path (in same zone)
|
|
import_csv_path = self._core._validate_relative_path(import_csv, zone_name, allow_zone_in_path)
|
|
csv_path = self._core._resolve_chroot_path(zone_root, import_csv_path)
|
|
|
|
if not csv_path.exists():
|
|
raise StorageError(
|
|
"FILE_NOT_FOUND",
|
|
f"CSV file not found: {import_csv}",
|
|
hint="Check the path is correct and relative to the zone"
|
|
)
|
|
|
|
# Try to use pandas (best auto-detection), fallback to csv module
|
|
use_pandas = False
|
|
try:
|
|
import pandas as pd
|
|
use_pandas = True
|
|
except ImportError:
|
|
pass
|
|
|
|
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
|
|
|
try:
|
|
cursor = conn.cursor()
|
|
|
|
# Check if table exists
|
|
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?", (table,))
|
|
table_exists = cursor.fetchone() is not None
|
|
|
|
if table_exists:
|
|
if if_exists == "fail":
|
|
raise StorageError(
|
|
"TABLE_EXISTS",
|
|
f"Table '{table}' already exists",
|
|
hint="Use if_exists='replace' or if_exists='append'"
|
|
)
|
|
elif if_exists == "replace":
|
|
cursor.execute(f"DROP TABLE IF EXISTS {table}")
|
|
table_exists = False
|
|
|
|
import_info = {"method": "unknown"}
|
|
|
|
if use_pandas:
|
|
# =====================================================
|
|
# PANDAS IMPORT (best auto-detection)
|
|
# =====================================================
|
|
import_info["method"] = "pandas"
|
|
|
|
# Build pandas read_csv arguments
|
|
pd_kwargs = {}
|
|
|
|
# Delimiter: auto-detect or specified
|
|
if delimiter:
|
|
pd_kwargs['sep'] = delimiter.replace('\\t', '\t')
|
|
else:
|
|
pd_kwargs['sep'] = None # Auto-detect
|
|
pd_kwargs['engine'] = 'python' # Required for sep=None
|
|
|
|
# Encoding: auto-detect or specified
|
|
if encoding:
|
|
pd_kwargs['encoding'] = encoding
|
|
else:
|
|
# Try common encodings
|
|
detected_encoding = None
|
|
for enc in ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1']:
|
|
try:
|
|
with open(csv_path, 'r', encoding=enc) as test_f:
|
|
test_f.read(8192)
|
|
detected_encoding = enc
|
|
break
|
|
except (UnicodeDecodeError, UnicodeError):
|
|
continue
|
|
pd_kwargs['encoding'] = detected_encoding or 'utf-8'
|
|
import_info['detected_encoding'] = pd_kwargs['encoding']
|
|
|
|
# Skip rows
|
|
if skip_rows > 0:
|
|
pd_kwargs['skiprows'] = skip_rows
|
|
|
|
# Decimal separator
|
|
if decimal:
|
|
pd_kwargs['decimal'] = decimal
|
|
|
|
# Date parsing
|
|
if date_columns:
|
|
pd_kwargs['parse_dates'] = date_columns
|
|
if date_format:
|
|
if date_format == 'dayfirst':
|
|
pd_kwargs['dayfirst'] = True
|
|
elif date_format == 'monthfirst':
|
|
pd_kwargs['dayfirst'] = False
|
|
elif date_format != 'auto':
|
|
pd_kwargs['date_format'] = date_format
|
|
|
|
# Handle NA values
|
|
pd_kwargs['na_values'] = ['', 'NA', 'N/A', 'NULL', 'null', 'None', 'none', '#N/A', '#NA']
|
|
pd_kwargs['keep_default_na'] = True
|
|
|
|
# Handle header
|
|
if not has_header:
|
|
pd_kwargs['header'] = None # No header row in file
|
|
|
|
# Read CSV
|
|
try:
|
|
df = pd.read_csv(str(csv_path), **pd_kwargs)
|
|
except Exception:
|
|
raise StorageError(
|
|
"CSV_PARSE_ERROR",
|
|
"Failed to parse CSV with pandas",
|
|
{"csv": import_csv},
|
|
hint="Try specifying delimiter, encoding, or skip_rows explicitly"
|
|
)
|
|
|
|
# Get detected delimiter if auto-detected
|
|
if 'sep' in pd_kwargs and pd_kwargs['sep'] is None:
|
|
# pandas doesn't expose detected delimiter easily, so we sniff it
|
|
import csv as csv_module
|
|
try:
|
|
with open(csv_path, 'r', encoding=pd_kwargs.get('encoding', 'utf-8')) as sniff_f:
|
|
sample = sniff_f.read(8192)
|
|
dialect = csv_module.Sniffer().sniff(sample)
|
|
import_info['detected_delimiter'] = repr(dialect.delimiter)
|
|
except Exception:
|
|
pass
|
|
|
|
# Generate or sanitize column names
|
|
if not has_header:
|
|
# No header: generate col_1, col_2, ...
|
|
df.columns = [f"col_{i+1}" for i in range(len(df.columns))]
|
|
import_info['generated_columns'] = True
|
|
else:
|
|
# Sanitize column names from header
|
|
clean_columns = []
|
|
for col in df.columns:
|
|
clean = re.sub(r'[^\w]', '_', str(col).strip())
|
|
if not clean or clean[0].isdigit():
|
|
clean = '_' + clean
|
|
clean_columns.append(clean)
|
|
df.columns = clean_columns
|
|
|
|
# Import to SQLite
|
|
pandas_if_exists = 'append' if if_exists == 'append' and table_exists else 'replace'
|
|
df.to_sql(table, conn, if_exists=pandas_if_exists, index=False)
|
|
|
|
total_rows = len(df)
|
|
clean_headers = list(df.columns)
|
|
|
|
# Get column types
|
|
import_info['column_types'] = {col: str(df[col].dtype) for col in df.columns}
|
|
|
|
else:
|
|
# =====================================================
|
|
# CSV MODULE FALLBACK (always available)
|
|
# =====================================================
|
|
import csv as csv_module
|
|
import_info["method"] = "csv_module"
|
|
|
|
# Detect encoding if not specified
|
|
file_encoding = encoding
|
|
if not file_encoding:
|
|
for enc in ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1']:
|
|
try:
|
|
with open(csv_path, 'r', encoding=enc) as test_f:
|
|
test_f.read(8192)
|
|
file_encoding = enc
|
|
break
|
|
except (UnicodeDecodeError, UnicodeError):
|
|
continue
|
|
file_encoding = file_encoding or 'utf-8'
|
|
import_info['detected_encoding'] = file_encoding
|
|
|
|
# Detect delimiter using Sniffer if not specified
|
|
csv_delimiter = delimiter.replace('\\t', '\t') if delimiter else None
|
|
if not csv_delimiter:
|
|
try:
|
|
with open(csv_path, 'r', encoding=file_encoding) as sniff_f:
|
|
sample = sniff_f.read(8192)
|
|
dialect = csv_module.Sniffer().sniff(sample, delimiters=',;\t|')
|
|
csv_delimiter = dialect.delimiter
|
|
import_info['detected_delimiter'] = repr(csv_delimiter)
|
|
except csv_module.Error:
|
|
csv_delimiter = ',' # Default to comma
|
|
|
|
# Read and import CSV
|
|
with open(csv_path, 'r', newline='', encoding=file_encoding) as f:
|
|
# Skip rows if needed
|
|
for _ in range(skip_rows):
|
|
next(f, None)
|
|
|
|
reader = csv_module.reader(f, delimiter=csv_delimiter)
|
|
|
|
# Handle header row
|
|
if has_header:
|
|
headers = next(reader) # First row = column names
|
|
# Sanitize column names
|
|
clean_headers = []
|
|
for h in headers:
|
|
clean = re.sub(r'[^\w]', '_', h.strip())
|
|
if not clean or clean[0].isdigit():
|
|
clean = '_' + clean
|
|
clean_headers.append(clean)
|
|
first_data_row = None
|
|
else:
|
|
# No header: first row is data, generate column names
|
|
first_data_row = next(reader, None)
|
|
if first_data_row is None:
|
|
raise StorageError(
|
|
"CSV_EMPTY",
|
|
"CSV file is empty (no data rows)",
|
|
{"csv": import_csv}
|
|
)
|
|
clean_headers = [f"col_{i+1}" for i in range(len(first_data_row))]
|
|
import_info['generated_columns'] = True
|
|
|
|
# Create table if needed
|
|
if not table_exists or if_exists == "replace":
|
|
columns_def = ", ".join(f"{col} TEXT" for col in clean_headers)
|
|
cursor.execute(f"CREATE TABLE {table} ({columns_def})")
|
|
|
|
# Prepare INSERT statement
|
|
placeholders = ", ".join("?" * len(clean_headers))
|
|
insert_sql = f"INSERT INTO {table} VALUES ({placeholders})"
|
|
|
|
# Date parsing setup
|
|
date_col_indices = []
|
|
if date_columns:
|
|
for dc in date_columns:
|
|
# Find column index (case-insensitive, sanitized)
|
|
dc_clean = re.sub(r'[^\w]', '_', dc.strip())
|
|
if not dc_clean or dc_clean[0].isdigit():
|
|
dc_clean = '_' + dc_clean
|
|
try:
|
|
idx = clean_headers.index(dc_clean)
|
|
date_col_indices.append(idx)
|
|
except ValueError:
|
|
# Try original name
|
|
for i, h in enumerate(clean_headers):
|
|
if h.lower() == dc.lower() or h.lower() == dc_clean.lower():
|
|
date_col_indices.append(i)
|
|
break
|
|
|
|
# Batch insert for performance
|
|
batch_size = 1000
|
|
batch = []
|
|
total_rows = 0
|
|
|
|
# If has_header=False, we already read the first data row
|
|
# Create an iterator that includes it
|
|
if first_data_row is not None:
|
|
import itertools
|
|
all_rows = itertools.chain([first_data_row], reader)
|
|
else:
|
|
all_rows = reader
|
|
|
|
for row in all_rows:
|
|
# Pad or truncate row to match headers
|
|
if len(row) < len(clean_headers):
|
|
row = list(row) + [''] * (len(clean_headers) - len(row))
|
|
elif len(row) > len(clean_headers):
|
|
row = list(row[:len(clean_headers)])
|
|
else:
|
|
row = list(row)
|
|
|
|
# Process decimal separator if specified
|
|
if decimal and decimal != '.':
|
|
for i, val in enumerate(row):
|
|
if val and i not in date_col_indices:
|
|
# Try to convert European decimal format
|
|
try:
|
|
# Remove thousand separators (spaces or dots) and replace decimal
|
|
cleaned = val.replace(' ', '').replace('.', '')
|
|
cleaned = cleaned.replace(decimal, '.')
|
|
float(cleaned) # Test if it's a number
|
|
row[i] = cleaned
|
|
except (ValueError, AttributeError):
|
|
pass # Not a number, keep original
|
|
|
|
# Process date columns
|
|
if date_col_indices and date_format:
|
|
from datetime import datetime
|
|
for idx in date_col_indices:
|
|
if idx < len(row) and row[idx]:
|
|
try:
|
|
if date_format == 'dayfirst':
|
|
# Try common European formats
|
|
for fmt in ['%d/%m/%Y', '%d-%m-%Y', '%d.%m.%Y', '%d/%m/%y', '%d-%m-%y']:
|
|
try:
|
|
dt = datetime.strptime(row[idx], fmt)
|
|
row[idx] = dt.strftime('%Y-%m-%d')
|
|
break
|
|
except ValueError:
|
|
continue
|
|
elif date_format == 'monthfirst':
|
|
# Try common US formats
|
|
for fmt in ['%m/%d/%Y', '%m-%d-%Y', '%m/%d/%y', '%m-%d-%y']:
|
|
try:
|
|
dt = datetime.strptime(row[idx], fmt)
|
|
row[idx] = dt.strftime('%Y-%m-%d')
|
|
break
|
|
except ValueError:
|
|
continue
|
|
elif date_format != 'auto':
|
|
dt = datetime.strptime(row[idx], date_format)
|
|
row[idx] = dt.strftime('%Y-%m-%d')
|
|
except (ValueError, TypeError):
|
|
pass # Keep original value
|
|
|
|
batch.append(tuple(row))
|
|
|
|
if len(batch) >= batch_size:
|
|
cursor.executemany(insert_sql, batch)
|
|
total_rows += len(batch)
|
|
batch = []
|
|
|
|
# Insert remaining rows
|
|
if batch:
|
|
cursor.executemany(insert_sql, batch)
|
|
total_rows += len(batch)
|
|
|
|
conn.commit()
|
|
|
|
response_data = {
|
|
"db_path": path,
|
|
"csv_path": import_csv,
|
|
"table": table,
|
|
"columns": clean_headers,
|
|
"rows_imported": total_rows,
|
|
"if_exists": if_exists,
|
|
"import_info": import_info,
|
|
}
|
|
|
|
if delimiter:
|
|
response_data["delimiter"] = delimiter
|
|
if encoding:
|
|
response_data["encoding"] = encoding
|
|
if date_columns:
|
|
response_data["date_columns"] = date_columns
|
|
if decimal:
|
|
response_data["decimal"] = decimal
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data=response_data,
|
|
message=f"Imported {total_rows} rows from '{import_csv}' into table '{table}' (using {import_info['method']})"
|
|
)
|
|
|
|
except StorageError:
|
|
raise
|
|
except sqlite3.Error:
|
|
raise StorageError(
|
|
"EXEC_ERROR",
|
|
"SQLite error during CSV import",
|
|
{"csv": import_csv, "table": table}
|
|
)
|
|
except Exception:
|
|
raise StorageError(
|
|
"EXEC_ERROR",
|
|
"CSV import failed",
|
|
{"csv": import_csv, "table": table},
|
|
hint="Try specifying delimiter, encoding, or check CSV format"
|
|
)
|
|
finally:
|
|
conn.close()
|
|
|
|
# =====================================================
|
|
# SQL QUERY MODE
|
|
# =====================================================
|
|
if not query:
|
|
raise StorageError(
|
|
"MISSING_PARAMETER",
|
|
"Either 'query' or 'import_csv' parameter is required",
|
|
hint="Provide a SQL query or a CSV file to import"
|
|
)
|
|
|
|
# Check if this is a read or write query
|
|
query_stripped = query.strip().upper()
|
|
is_read_query = query_stripped.startswith(("SELECT", "PRAGMA", "EXPLAIN"))
|
|
|
|
# Block write operations if sqlite_readonly valve is enabled
|
|
if self.valves.sqlite_readonly and not is_read_query:
|
|
raise StorageError(
|
|
"COMMAND_FORBIDDEN",
|
|
"Write operations are disabled (sqlite_readonly=True)",
|
|
{"query_type": query_stripped.split()[0] if query_stripped else "unknown"},
|
|
hint="Only SELECT, PRAGMA, and EXPLAIN queries are allowed. Ask admin to disable sqlite_readonly."
|
|
)
|
|
|
|
# Block write operations in readonly zones
|
|
if readonly and not is_read_query:
|
|
raise StorageError(
|
|
"ZONE_FORBIDDEN",
|
|
"Write operations not allowed in Uploads zone",
|
|
hint="Move the database to Storage or Documents first"
|
|
)
|
|
|
|
# Block dangerous operations
|
|
dangerous_patterns = [
|
|
"ATTACH", "DETACH", # Could access other databases
|
|
"LOAD_EXTENSION", # Could load malicious code
|
|
]
|
|
for pattern in dangerous_patterns:
|
|
if pattern in query_stripped:
|
|
raise StorageError(
|
|
"COMMAND_FORBIDDEN",
|
|
f"SQL operation '{pattern}' is not allowed for security reasons"
|
|
)
|
|
|
|
# Execute the query
|
|
params = params or []
|
|
conn = sqlite3.connect(str(db_path), timeout=10.0)
|
|
conn.row_factory = sqlite3.Row
|
|
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute(query, params)
|
|
|
|
if is_read_query:
|
|
# Get column names first
|
|
columns = [description[0] for description in cursor.description] if cursor.description else []
|
|
|
|
# Check if user wants CSV export (all results, no context pollution)
|
|
if output_csv:
|
|
# Block CSV export in readonly zones
|
|
if readonly:
|
|
raise StorageError(
|
|
"ZONE_READONLY",
|
|
"Cannot export CSV to read-only zone",
|
|
{"zone": zone_name, "output_csv": output_csv},
|
|
hint="Use 'storage' or 'documents' zone for CSV export"
|
|
)
|
|
# Export all results to CSV file
|
|
import csv as csv_module
|
|
|
|
output_csv_path = self._core._validate_relative_path(output_csv, zone_name, allow_zone_in_path)
|
|
csv_path = self._core._resolve_chroot_path(zone_root, output_csv_path)
|
|
self._core._ensure_dir(csv_path.parent)
|
|
|
|
row_count = 0
|
|
with open(csv_path, 'w', newline='', encoding='utf-8') as f:
|
|
writer = csv_module.writer(f)
|
|
writer.writerow(columns) # Header
|
|
|
|
# Fetch and write in batches to handle large results
|
|
while True:
|
|
batch = cursor.fetchmany(1000)
|
|
if not batch:
|
|
break
|
|
for row in batch:
|
|
writer.writerow(list(row))
|
|
row_count += 1
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data={
|
|
"path": path,
|
|
"query": query,
|
|
"output_csv": output_csv,
|
|
"rows_exported": row_count,
|
|
"columns": columns,
|
|
},
|
|
message=f"Exported {row_count} row(s) to {output_csv}"
|
|
)
|
|
|
|
# Check if query already has LIMIT
|
|
has_limit = bool(re.search(r'\bLIMIT\s+\d+', query_stripped))
|
|
|
|
# Determine effective limit
|
|
DEFAULT_LIMIT = 10
|
|
if has_limit:
|
|
# User specified LIMIT in SQL - respect it
|
|
rows = cursor.fetchall()
|
|
total_rows = len(rows)
|
|
results = [dict(zip(columns, row)) for row in rows] if rows else []
|
|
truncated = False
|
|
effective_limit = None
|
|
else:
|
|
# No LIMIT in query - apply protection
|
|
if limit is None:
|
|
effective_limit = DEFAULT_LIMIT
|
|
elif limit == 0:
|
|
effective_limit = None # No limit (dangerous but explicit)
|
|
else:
|
|
effective_limit = limit
|
|
|
|
if effective_limit:
|
|
# First count total rows (for user info)
|
|
rows = cursor.fetchall()
|
|
total_rows = len(rows)
|
|
|
|
# Truncate if needed
|
|
if total_rows > effective_limit:
|
|
results = [dict(zip(columns, row)) for row in rows[:effective_limit]]
|
|
truncated = True
|
|
else:
|
|
results = [dict(zip(columns, row)) for row in rows]
|
|
truncated = False
|
|
else:
|
|
# limit=0: no limit (user explicitly requested all)
|
|
rows = cursor.fetchall()
|
|
total_rows = len(rows)
|
|
results = [dict(zip(columns, row)) for row in rows] if rows else []
|
|
truncated = False
|
|
|
|
# Build response
|
|
response_data = {
|
|
"path": path,
|
|
"query": query,
|
|
"row_count": len(results),
|
|
"columns": columns,
|
|
"rows": results,
|
|
}
|
|
|
|
if truncated:
|
|
response_data["total_rows"] = total_rows
|
|
response_data["truncated"] = True
|
|
response_data["limit_applied"] = effective_limit
|
|
message = (
|
|
f"⚠️ No LIMIT in query. Showing {len(results)}/{total_rows} rows to protect context. "
|
|
f"Options: (1) Add LIMIT to SQL, (2) Use limit=N parameter for more rows, "
|
|
f"(3) Use output_csv='results.csv' to export ALL {total_rows} rows to file."
|
|
)
|
|
else:
|
|
message = f"Query returned {len(results)} row(s)"
|
|
|
|
return self._core._format_response(True, data=response_data, message=message)
|
|
else:
|
|
# For INSERT/UPDATE/DELETE/CREATE/etc.
|
|
conn.commit()
|
|
rowcount = cursor.rowcount
|
|
lastrowid = cursor.lastrowid
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data={
|
|
"path": path,
|
|
"query": query,
|
|
"rows_affected": rowcount,
|
|
"last_row_id": lastrowid,
|
|
},
|
|
message=f"Query executed successfully ({rowcount} row(s) affected)"
|
|
)
|
|
|
|
except sqlite3.Error as e:
|
|
raise StorageError(
|
|
"EXEC_ERROR",
|
|
"SQLite query failed",
|
|
{"query": query},
|
|
hint="Check your SQL syntax"
|
|
)
|
|
finally:
|
|
conn.close()
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_sqlite")
|
|
except Exception:
|
|
return self._core._format_response(False, message="An unexpected error occurred in shed_sqlite")
|
|
|
|
# =========================================================================
|
|
# DOWNLOAD LINKS (3 functions)
|
|
# =========================================================================
|
|
|
|
|
|
async def shed_link_create(
|
|
self,
|
|
zone: str,
|
|
path: str,
|
|
group: str = None,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Create a download link for a file.
|
|
|
|
Uploads the file to Open WebUI's file system and returns a download URL.
|
|
The link works while the user is logged in (uses session cookie).
|
|
Works with any file type: PDF, images, ZIP, CSV, etc.
|
|
|
|
:param zone: Zone to read from ("uploads", "storage", "documents", "group")
|
|
:param path: Relative path to the file
|
|
:param group: Group name (required if zone="group")
|
|
:return: JSON with download_url and file_id
|
|
|
|
Related functions:
|
|
shed_link_list() - List all download links
|
|
shed_link_delete() - Remove a download link
|
|
|
|
Examples:
|
|
shed_link_create(zone="storage", path="exports/report.pdf")
|
|
shed_link_create(zone="storage", path="archives/data.zip")
|
|
shed_link_create(zone="group", group="team", path="shared/presentation.pptx")
|
|
"""
|
|
try:
|
|
# Resolve zone and path
|
|
zone_lower = zone.lower()
|
|
user_root = self._core._get_user_root(__user__)
|
|
|
|
if zone_lower == "uploads":
|
|
conv_id = self._core._get_conv_id(__metadata__)
|
|
chroot = user_root / "Uploads" / conv_id
|
|
elif zone_lower == "storage":
|
|
chroot = user_root / "Storage" / "data"
|
|
elif zone_lower == "documents":
|
|
chroot = user_root / "Documents" / "data"
|
|
elif zone_lower == "group":
|
|
if not group:
|
|
raise StorageError(
|
|
"MISSING_GROUP",
|
|
"Group name required for group zone",
|
|
{"zone": zone},
|
|
"Provide group parameter: shed_link_create(zone='group', group='team', path='...')"
|
|
)
|
|
group_id = self._core._validate_group_id(group)
|
|
self._core._check_group_access(__user__, group_id)
|
|
chroot = Path(self.valves.storage_base_path) / "groups" / group_id / "data"
|
|
else:
|
|
raise StorageError(
|
|
"INVALID_ZONE",
|
|
f"Invalid zone: {zone}",
|
|
{"zone": zone, "valid_zones": ["uploads", "storage", "documents", "group"]},
|
|
"Use one of: uploads, storage, documents, group"
|
|
)
|
|
|
|
# Resolve and validate path
|
|
filepath = self._core._resolve_chroot_path(chroot, path)
|
|
|
|
if not filepath.exists():
|
|
raise StorageError(
|
|
"FILE_NOT_FOUND",
|
|
f"File not found: {path}",
|
|
{"zone": zone, "path": path}
|
|
)
|
|
|
|
if not filepath.is_file():
|
|
raise StorageError(
|
|
"NOT_A_FILE",
|
|
f"Path is not a file: {path}",
|
|
{"zone": zone, "path": path}
|
|
)
|
|
|
|
# Use Open WebUI internal Python API via Bridge (no HTTP request = no deadlock)
|
|
|
|
# Get user ID
|
|
user_id = __user__.get("id")
|
|
if not user_id:
|
|
raise StorageError(
|
|
"NO_USER_ID",
|
|
"User ID not available",
|
|
{},
|
|
"This should not happen - contact administrator"
|
|
)
|
|
|
|
# Generate unique file ID
|
|
file_id = str(uuid.uuid4())
|
|
filename = filepath.name
|
|
file_size = filepath.stat().st_size
|
|
|
|
# Detect content type
|
|
content_type, _ = mimetypes.guess_type(filename)
|
|
if not content_type:
|
|
content_type = "application/octet-stream"
|
|
|
|
# Copy file to Open WebUI uploads directory
|
|
uploads_dir = Path("/app/backend/data/uploads")
|
|
uploads_dir.mkdir(parents=True, exist_ok=True)
|
|
dest_path = uploads_dir / f"{file_id}_{filename}"
|
|
shutil.copy2(filepath, dest_path)
|
|
|
|
try:
|
|
# Create database entry using Bridge (isolates Open WebUI API changes)
|
|
bridge = _OpenWebUIBridge()
|
|
file_item = bridge.insert_file(
|
|
user_id=user_id,
|
|
file_id=file_id,
|
|
filename=filename,
|
|
file_path=str(dest_path),
|
|
content_type=content_type,
|
|
file_size=file_size,
|
|
metadata={"fileshed_link": True, "source_zone": zone, "source_path": path}
|
|
)
|
|
|
|
if not file_item:
|
|
# Clean up copied file on failure
|
|
dest_path.unlink(missing_ok=True)
|
|
raise StorageError(
|
|
"DB_ERROR",
|
|
"Failed to create file entry in database",
|
|
{"file_id": file_id}
|
|
)
|
|
|
|
# Build download URL (full URL with base from valve)
|
|
base_url = self.valves.openwebui_api_url.rstrip('/')
|
|
download_url = f"{base_url}/api/v1/files/{file_id}/content"
|
|
|
|
# Markdown clickable link for easy copy-paste by LLM
|
|
clickable_link = f"[📥 Download {filename}]({download_url})"
|
|
|
|
return self._core._format_response(True, {
|
|
"file_id": file_id,
|
|
"download_url": download_url,
|
|
"clickable_link": clickable_link,
|
|
"filename": filename,
|
|
"size_bytes": file_size,
|
|
"zone": zone,
|
|
"path": path,
|
|
}, message=f"Link ready: {clickable_link}")
|
|
|
|
except StorageError:
|
|
# Clean up on failure
|
|
dest_path.unlink(missing_ok=True)
|
|
raise
|
|
except Exception as e:
|
|
dest_path.unlink(missing_ok=True)
|
|
raise StorageError(
|
|
"INTERNAL_API_ERROR",
|
|
f"Error calling Open WebUI API: {e}",
|
|
{},
|
|
"Check Open WebUI version compatibility"
|
|
)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_link_create")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_link_list(
|
|
self,
|
|
__user__: dict = {},
|
|
) -> str:
|
|
"""
|
|
List all download links created by the current user.
|
|
|
|
:return: JSON with list of links (file_id, filename, download_url, size)
|
|
|
|
Related functions:
|
|
shed_link_create() - Create a new download link
|
|
shed_link_delete() - Remove a download link
|
|
|
|
Examples:
|
|
shed_link_list()
|
|
"""
|
|
try:
|
|
user_id = __user__.get("id")
|
|
if not user_id:
|
|
raise StorageError(
|
|
"NO_USER_ID",
|
|
"User ID not available",
|
|
{},
|
|
"This should not happen - contact administrator"
|
|
)
|
|
|
|
# Use Bridge to get user's files
|
|
bridge = _OpenWebUIBridge()
|
|
bridge._ensure_initialized()
|
|
|
|
# Get files using the internal API
|
|
all_files = bridge._files_class.get_files_by_user_id(user_id)
|
|
|
|
# Filter only files created by Fileshed (have fileshed_link marker)
|
|
files = []
|
|
if all_files:
|
|
for f in all_files:
|
|
if hasattr(f, 'meta') and f.meta and isinstance(f.meta, dict):
|
|
meta_data = f.meta.get('data', {})
|
|
if isinstance(meta_data, dict) and meta_data.get('fileshed_link') == True:
|
|
files.append(f)
|
|
|
|
if not files:
|
|
return self._core._format_response(True, {
|
|
"files": [],
|
|
"count": 0
|
|
}, message="No download links found")
|
|
|
|
# Format file list
|
|
base_url = self.valves.openwebui_api_url.rstrip('/')
|
|
file_list = []
|
|
for f in files:
|
|
download_url = f"{base_url}/api/v1/files/{f.id}/content"
|
|
file_info = {
|
|
"file_id": f.id,
|
|
"filename": f.filename,
|
|
"download_url": download_url,
|
|
"clickable_link": f"[📥 {f.filename}]({download_url})",
|
|
"created_at": f.created_at,
|
|
}
|
|
# Add size and source info if available in meta
|
|
if hasattr(f, 'meta') and f.meta and isinstance(f.meta, dict):
|
|
file_info["size_bytes"] = f.meta.get("size")
|
|
file_info["content_type"] = f.meta.get("content_type")
|
|
meta_data = f.meta.get('data', {})
|
|
if isinstance(meta_data, dict):
|
|
file_info["source_zone"] = meta_data.get("source_zone")
|
|
file_info["source_path"] = meta_data.get("source_path")
|
|
file_list.append(file_info)
|
|
|
|
return self._core._format_response(True, {
|
|
"files": file_list,
|
|
"count": len(file_list)
|
|
}, message=f"Found {len(file_list)} download link(s)")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_link_list")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_link_delete(
|
|
self,
|
|
file_id: str,
|
|
__user__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Remove a download link from Open WebUI.
|
|
|
|
This deletes both the database entry and the physical file.
|
|
|
|
:param file_id: The file ID returned by shed_link_create() or shed_link_list()
|
|
:return: JSON with success status
|
|
|
|
Related functions:
|
|
shed_link_create() - Create a new download link
|
|
shed_link_list() - List all download links
|
|
|
|
Examples:
|
|
shed_link_delete(file_id="317ef925-c87a-44fd-8d29-acdccb8e6070")
|
|
"""
|
|
try:
|
|
user_id = __user__.get("id")
|
|
if not user_id:
|
|
raise StorageError(
|
|
"NO_USER_ID",
|
|
"User ID not available",
|
|
{},
|
|
"This should not happen - contact administrator"
|
|
)
|
|
|
|
if not file_id:
|
|
raise StorageError(
|
|
"MISSING_FILE_ID",
|
|
"file_id parameter is required",
|
|
{},
|
|
"Use shed_link_list() to get file IDs"
|
|
)
|
|
|
|
# Use Bridge to get and verify file ownership
|
|
bridge = _OpenWebUIBridge()
|
|
file_item = bridge.get_file_by_id(file_id)
|
|
|
|
if not file_item:
|
|
raise StorageError(
|
|
"FILE_NOT_FOUND",
|
|
f"File not found: {file_id}",
|
|
{"file_id": file_id},
|
|
"Use shed_link_list() to see your download links"
|
|
)
|
|
|
|
# Verify ownership
|
|
if file_item.user_id != user_id:
|
|
raise StorageError(
|
|
"ACCESS_DENIED",
|
|
"You can only delete your own download links",
|
|
{"file_id": file_id}
|
|
)
|
|
|
|
# Verify this is a Fileshed-created link (not a user upload)
|
|
is_fileshed_link = False
|
|
if hasattr(file_item, 'meta') and file_item.meta and isinstance(file_item.meta, dict):
|
|
meta_data = file_item.meta.get('data', {})
|
|
if isinstance(meta_data, dict) and meta_data.get('fileshed_link') == True:
|
|
is_fileshed_link = True
|
|
|
|
if not is_fileshed_link:
|
|
raise StorageError(
|
|
"NOT_A_FILESHED_LINK",
|
|
f"This file was not created by Fileshed: {file_id}",
|
|
{"file_id": file_id, "filename": file_item.filename},
|
|
"shed_link_delete only removes links created by shed_link_create. Use Open WebUI UI to manage other files."
|
|
)
|
|
|
|
# Get file path before deletion
|
|
file_path = file_item.path if hasattr(file_item, 'path') else None
|
|
filename = file_item.filename
|
|
|
|
# Delete from database
|
|
bridge.delete_file_by_id(file_id)
|
|
|
|
# Delete physical file if it exists
|
|
if file_path:
|
|
try:
|
|
Path(file_path).unlink(missing_ok=True)
|
|
except Exception:
|
|
pass # File might already be gone
|
|
|
|
return self._core._format_response(True, {
|
|
"file_id": file_id,
|
|
"filename": filename,
|
|
"deleted": True
|
|
}, message=f"Link deleted: {filename}")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_link_delete")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
# =========================================================================
|
|
# HOWTO GUIDES (targeted help to avoid context pollution)
|
|
# =========================================================================
|
|
|
|
|
|
|
|
async def shed_help(
|
|
self,
|
|
howto: str = None,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Get help for Fileshed. Call without arguments for quick reference,
|
|
or with a howto topic for detailed guides.
|
|
|
|
:param howto: Optional topic. Available: download, csv_to_sqlite, upload, share, edit, commands, network, paths, large_files, full
|
|
:return: Help text
|
|
|
|
Examples:
|
|
shed_help() # Quick reference + list of howtos
|
|
shed_help(howto="download") # How to download files (use curl, not fetch_url)
|
|
shed_help(howto="csv_to_sqlite") # How to import CSV into SQLite (fast)
|
|
shed_help(howto="large_files") # Process large files without context pollution
|
|
shed_help(howto="commands") # Available commands + workarounds
|
|
shed_help(howto="full") # Complete documentation
|
|
"""
|
|
|
|
# List of available howtos
|
|
available_howtos = list(self._core.HOWTO_GUIDES.keys()) + ["full"]
|
|
|
|
# No argument (or empty string): return quick help + howto list
|
|
if not howto:
|
|
help_text = """# 🛖 Fileshed - Quick Reference
|
|
|
|
## 🔥 WORKFLOWS (choose one!)
|
|
|
|
### Workflow 1: Direct Write (simple, no locking)
|
|
```
|
|
shed_patch_text(zone="storage", path="file.txt", content="Hello", overwrite=True)
|
|
```
|
|
One function, done. Use `overwrite=True` to replace entire file.
|
|
|
|
### Workflow 2: Locked Edit (with locking, for concurrent access)
|
|
```
|
|
1. shed_lockedit_open(zone="storage", path="file.txt") # Lock + copy to editzone
|
|
2. shed_lockedit_overwrite(zone="storage", path="file.txt", content="New content") # Edit copy
|
|
3. shed_lockedit_save(zone="storage", path="file.txt") # Save + unlock (CLOSES edit mode!)
|
|
```
|
|
⚠️ After shed_lockedit_save, the file is CLOSED. To edit again, start from step 1.
|
|
|
|
### Workflow 3: Shell Commands
|
|
```
|
|
shed_exec(zone="storage", cmd="ls", args=["-la"]) # List files
|
|
shed_exec(zone="storage", cmd="cat", args=["file.txt"]) # Read file
|
|
shed_exec(zone="storage", cmd="grep", args=["pattern", "file.txt"]) # Search
|
|
```
|
|
|
|
## 📁 ZONES
|
|
|
|
| Zone | Purpose | Git versioned? |
|
|
|------|---------|----------------|
|
|
| `storage` | General files, scripts, data | No |
|
|
| `documents` | Important docs, versioned | Yes (auto-commit) |
|
|
| `uploads` | User uploaded files (read-only) | No |
|
|
| `group` | Shared with team (requires group=) | Yes |
|
|
|
|
## ⚡ QUICK OPERATIONS
|
|
|
|
| Task | Command |
|
|
|------|---------|
|
|
| List files | `shed_exec(zone="storage", cmd="ls", args=["-la"])` |
|
|
| Read file | `shed_exec(zone="storage", cmd="cat", args=["file.txt"])` |
|
|
| Create folder | `shed_exec(zone="storage", cmd="mkdir", args=["-p", "folder"])` |
|
|
| Create/overwrite file | `shed_patch_text(zone="storage", path="f.txt", content="...", overwrite=True)` |
|
|
| Append to file | `shed_patch_text(zone="storage", path="f.txt", content="...", position="end")` |
|
|
| Delete file | `shed_delete(zone="storage", path="file.txt")` |
|
|
| Copy file | `shed_exec(zone="storage", cmd="cp", args=["src.txt", "dst.txt"])` |
|
|
| Move/rename | `shed_rename(zone="storage", old_path="a.txt", new_path="b.txt")` |
|
|
|
|
## 🔗 SHARE FILES
|
|
|
|
```
|
|
shed_link_create(zone="storage", path="report.pdf") # Get download link
|
|
shed_link_list() # List your links
|
|
shed_link_delete(file_id="...") # Remove link
|
|
```
|
|
|
|
## 📊 STATS & INFO
|
|
|
|
```
|
|
shed_stats() # Storage usage
|
|
shed_parameters() # Configuration limits
|
|
shed_allowed_commands() # Available shell commands
|
|
shed_tree(zone="storage") # Directory tree
|
|
```
|
|
|
|
## 📚 HOWTO GUIDES (for complex tasks)
|
|
|
|
""" + "\n".join(f"- `shed_help(howto=\"{h}\")` — {self._core._get_howto_description(h)}" for h in available_howtos) + """
|
|
|
|
## ⚠️ COMMON MISTAKES
|
|
|
|
```
|
|
❌ position="overwrite" → Use overwrite=True instead!
|
|
❌ position="at" in patch_text → "at" is for patch_bytes. Use "before"/"after" with line=N
|
|
❌ shed_lockedit_save twice → save CLOSES edit mode, reopen with shed_lockedit_open
|
|
❌ shed_patch_text for folders → Use shed_exec(cmd="mkdir", args=["-p", "folder"])
|
|
❌ paths with zone name → Paths are RELATIVE: "file.txt" not "Storage/file.txt"
|
|
```
|
|
"""
|
|
# Build howtos dict for structured access
|
|
howtos_dict = {h: self._core._get_howto_description(h) for h in available_howtos}
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data={
|
|
"help": help_text,
|
|
"howtos": howtos_dict
|
|
},
|
|
message="Help in data.help, howtos comprehensive list in data.howtos"
|
|
)
|
|
|
|
# Specific howto requested
|
|
howto_lower = howto.lower().strip()
|
|
|
|
if howto_lower == "full":
|
|
return self._core._format_response(True, data={"help": self._core._get_full_help()}, message="Full documentation in data.help")
|
|
|
|
if howto_lower in self._core.HOWTO_GUIDES:
|
|
return self._core._format_response(True, data={"help": self._core.HOWTO_GUIDES[howto_lower], "topic": howto_lower}, message=f"Guide '{howto_lower}' in data.help")
|
|
|
|
# Unknown howto
|
|
howtos_dict = {h: self._core._get_howto_description(h) for h in available_howtos}
|
|
return self._core._format_response(False, data={"howtos": howtos_dict}, message=f"Unknown howto '{howto}'. Available howtos in data.howtos")
|
|
|
|
|
|
|
|
async def shed_stats(
|
|
self,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Returns usage statistics.
|
|
|
|
:return: Statistics as JSON
|
|
"""
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
|
|
def get_dir_size(path: Path) -> int:
|
|
if not path.exists():
|
|
return 0
|
|
total = 0
|
|
for f in path.rglob("*"):
|
|
if f.is_file():
|
|
total += f.stat().st_size
|
|
return total
|
|
|
|
def count_files(path: Path) -> int:
|
|
if not path.exists():
|
|
return 0
|
|
return sum(1 for f in path.rglob("*") if f.is_file())
|
|
|
|
uploads_size = get_dir_size(user_root / "Uploads")
|
|
storage_size = get_dir_size(user_root / "Storage" / "data")
|
|
documents_size = get_dir_size(user_root / "Documents" / "data")
|
|
|
|
total_size = uploads_size + storage_size + documents_size
|
|
quota = self.valves.quota_per_user_mb * 1024 * 1024
|
|
|
|
stats = {
|
|
"uploads": {
|
|
"size_bytes": uploads_size,
|
|
"size_human": f"{uploads_size / 1024 / 1024:.2f} MB",
|
|
"files": count_files(user_root / "Uploads"),
|
|
},
|
|
"storage": {
|
|
"size_bytes": storage_size,
|
|
"size_human": f"{storage_size / 1024 / 1024:.2f} MB",
|
|
"files": count_files(user_root / "Storage" / "data"),
|
|
},
|
|
"documents": {
|
|
"size_bytes": documents_size,
|
|
"size_human": f"{documents_size / 1024 / 1024:.2f} MB",
|
|
"files": count_files(user_root / "Documents" / "data"),
|
|
},
|
|
"total": {
|
|
"size_bytes": total_size,
|
|
"size_human": f"{total_size / 1024 / 1024:.2f} MB",
|
|
"quota_mb": self.valves.quota_per_user_mb,
|
|
"usage_percent": f"{(total_size / quota) * 100:.1f}%",
|
|
},
|
|
}
|
|
|
|
return self._core._format_response(True, data=stats)
|
|
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_parameters(
|
|
self,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Returns current valve configuration (read-only).
|
|
|
|
Call this function to answer user questions about:
|
|
- "Is network access enabled?" -> check network_mode
|
|
- "Can I use curl/wget?" -> check network_mode != "disabled"
|
|
- "Can I git push?" -> check network_mode == "all"
|
|
- "What's my storage quota?" -> check quota_per_user_mb
|
|
- "What's the max file size?" -> check max_file_size_mb
|
|
- "What's the default timeout?" -> check exec_timeout_default
|
|
|
|
Values can only be changed by admins in Open WebUI (Workspace > Tools > Valves).
|
|
|
|
:return: All valve values as JSON (read-only)
|
|
|
|
Example:
|
|
shed_parameters()
|
|
|
|
Key fields:
|
|
- network_mode: "disabled" | "safe" | "all"
|
|
- "disabled": No network (curl/wget/git clone blocked)
|
|
- "safe": Downloads only (curl GET, git clone/pull, no push)
|
|
- "all": Full network access (uploads/push allowed)
|
|
- quota_per_user_mb: Storage limit per user
|
|
- quota_per_group_mb: Storage limit per group
|
|
- max_file_size_mb: Maximum single file size
|
|
- exec_timeout_default: Default command timeout (seconds)
|
|
"""
|
|
try:
|
|
params = {
|
|
"storage_base_path": self.valves.storage_base_path,
|
|
"quota_per_user_mb": self.valves.quota_per_user_mb,
|
|
"quota_per_group_mb": self.valves.quota_per_group_mb,
|
|
"max_file_size_mb": self.valves.max_file_size_mb,
|
|
"lock_max_age_hours": self.valves.lock_max_age_hours,
|
|
"exec_timeout_default": self.valves.exec_timeout_default,
|
|
"exec_timeout_max": self.valves.exec_timeout_max,
|
|
"group_default_mode": self.valves.group_default_mode,
|
|
"network_mode": self.valves.network_mode,
|
|
"openwebui_api_url": self.valves.openwebui_api_url,
|
|
}
|
|
|
|
# Add helpful derived info
|
|
params["_info"] = {
|
|
"network_enabled": self.valves.network_mode != "disabled",
|
|
"network_upload_allowed": self.valves.network_mode == "all",
|
|
"groups_available": GROUPS_AVAILABLE,
|
|
}
|
|
|
|
return self._core._format_response(True, data=params, message="Current valve configuration")
|
|
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_allowed_commands(
|
|
self,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Tests available commands in container.
|
|
|
|
:return: List of available/missing commands by zone
|
|
"""
|
|
try:
|
|
# Cache the result
|
|
if self._core._commands_cache is not None:
|
|
return self._core._format_response(True, data=self._core._commands_cache)
|
|
|
|
# Check all commands via Layer 2
|
|
all_commands = WHITELIST_READWRITE
|
|
available = []
|
|
missing = []
|
|
|
|
for cmd in sorted(all_commands):
|
|
if self._core._check_command_available(cmd):
|
|
available.append(cmd)
|
|
else:
|
|
missing.append(cmd)
|
|
|
|
# Check network commands status (curl/wget + git network ops + media tools)
|
|
network_status = {
|
|
"mode": self.valves.network_mode,
|
|
"curl": {
|
|
"installed": self._core._check_command_available("curl"),
|
|
"enabled": self.valves.network_mode in ("safe", "all"),
|
|
},
|
|
"wget": {
|
|
"installed": self._core._check_command_available("wget"),
|
|
"enabled": self.valves.network_mode in ("safe", "all"),
|
|
},
|
|
"git_clone": self.valves.network_mode in ("safe", "all"),
|
|
"git_fetch": self.valves.network_mode in ("safe", "all"),
|
|
"git_pull": self.valves.network_mode in ("safe", "all"),
|
|
"git_push": self.valves.network_mode == "all",
|
|
"ffmpeg": {
|
|
"installed": self._core._check_command_available("ffmpeg"),
|
|
"enabled": self.valves.network_mode in ("safe", "all"),
|
|
"output_protocols_blocked": self.valves.network_mode == "safe",
|
|
},
|
|
"pandoc": {
|
|
"installed": self._core._check_command_available("pandoc"),
|
|
"urls_enabled": self.valves.network_mode in ("safe", "all"),
|
|
},
|
|
}
|
|
|
|
result = {
|
|
"uploads": {
|
|
"available": [c for c in available if c in WHITELIST_READONLY],
|
|
"missing": [c for c in missing if c in WHITELIST_READONLY],
|
|
},
|
|
"storage": {
|
|
"available": [c for c in available if c in WHITELIST_READWRITE],
|
|
"missing": [c for c in missing if c in WHITELIST_READWRITE],
|
|
},
|
|
"documents": {
|
|
"available": [c for c in available if c in WHITELIST_READWRITE],
|
|
"missing": [c for c in missing if c in WHITELIST_READWRITE],
|
|
},
|
|
"network": network_status,
|
|
"summary": {
|
|
"total_whitelist": len(all_commands),
|
|
"available": len(available),
|
|
"missing": len(missing),
|
|
"coverage": f"{(len(available) / len(all_commands)) * 100:.1f}%",
|
|
},
|
|
}
|
|
|
|
self._core._commands_cache = result
|
|
return self._core._format_response(True, data=result)
|
|
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_force_unlock(
|
|
self,
|
|
zone: str = "",
|
|
path: str = "",
|
|
group: str = "",
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Forces file unlock (crash recovery).
|
|
|
|
Use this if a file is stuck in edit mode after a crash.
|
|
|
|
:param zone: "storage" or "documents" (for personal zones)
|
|
:param path: File path relative to zone
|
|
:param group: Group ID (for group zones - use instead of zone)
|
|
:return: Confirmation as JSON
|
|
|
|
Examples:
|
|
shed_force_unlock(zone="storage", path="stuck_file.txt")
|
|
shed_force_unlock(group="team", path="locked_doc.md")
|
|
"""
|
|
try:
|
|
# Validate path is provided
|
|
if not path:
|
|
raise StorageError("MISSING_PARAMETER", "path is required")
|
|
|
|
# Determine if group or personal zone
|
|
if group:
|
|
# Group mode
|
|
group = self._core._validate_group_id(group)
|
|
self._core._check_group_access(__user__, group)
|
|
zone_name = f"Group:{group}"
|
|
|
|
# Validate path with zone_name
|
|
path = self._core._validate_relative_path(path, zone_name, allow_zone_in_path)
|
|
|
|
group_path = self._core._get_groups_root() / group
|
|
lock_path = group_path / "locks" / (path + ".lock")
|
|
editzone_base = group_path / "editzone"
|
|
zone_display = zone_name
|
|
else:
|
|
# Personal zone mode
|
|
if not zone:
|
|
raise StorageError(
|
|
"MISSING_PARAMETER",
|
|
"Must specify either 'zone' or 'group'",
|
|
hint="Use zone='storage' or zone='documents', or group='group_id'"
|
|
)
|
|
|
|
if zone.lower() not in ("storage", "documents"):
|
|
raise StorageError(
|
|
"ZONE_FORBIDDEN",
|
|
f"Invalid zone: {zone}",
|
|
{},
|
|
"Use 'storage' or 'documents'"
|
|
)
|
|
|
|
user_root = self._core._get_user_root(__user__)
|
|
zone_name = "Storage" if zone.lower() == "storage" else "Documents"
|
|
|
|
# Validate path with zone_name
|
|
path = self._core._validate_relative_path(path, zone_name, allow_zone_in_path)
|
|
|
|
zone_root = user_root / zone_name
|
|
lock_path = self._core._get_lock_path(zone_root, path)
|
|
editzone_base = zone_root / "editzone"
|
|
zone_display = zone_name
|
|
|
|
# Find and delete all editzones for this path
|
|
if editzone_base.exists():
|
|
for conv_dir in editzone_base.iterdir():
|
|
if conv_dir.is_dir():
|
|
edit_path = conv_dir / path
|
|
if edit_path.exists():
|
|
self._core._rm_with_empty_parents(edit_path, editzone_base)
|
|
|
|
# Delete lock
|
|
if lock_path.exists():
|
|
self._core._rm_with_empty_parents(lock_path, lock_path.parent.parent / "locks")
|
|
|
|
return self._core._format_response(True, message=f"Unlocked: {path} in {zone_display}")
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_force_unlock")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_maintenance(
|
|
self,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Cleans expired locks and orphan editzones (personal and group spaces).
|
|
|
|
:return: Cleanup report as JSON
|
|
"""
|
|
try:
|
|
user_root = self._core._get_user_root(__user__)
|
|
max_age_hours = self.valves.lock_max_age_hours
|
|
now = datetime.now(timezone.utc)
|
|
|
|
cleaned = {
|
|
"expired_locks": [],
|
|
"corrupted_locks": [],
|
|
"orphan_editzones": [],
|
|
}
|
|
|
|
def clean_zone(zone_root: Path, zone_name: str):
|
|
"""Helper to clean locks and editzones in a zone."""
|
|
locks_dir = zone_root / "locks"
|
|
editzone_dir = zone_root / "editzone"
|
|
|
|
# 1. Clean expired and corrupted locks
|
|
if locks_dir.exists():
|
|
for lock_file in locks_dir.rglob("*.lock"):
|
|
try:
|
|
lock_data = json.loads(lock_file.read_text())
|
|
locked_at_str = lock_data.get("locked_at", "")
|
|
|
|
if locked_at_str:
|
|
locked_at = datetime.fromisoformat(
|
|
locked_at_str.replace("Z", "+00:00")
|
|
)
|
|
age_hours = (now - locked_at).total_seconds() / 3600
|
|
|
|
if age_hours > max_age_hours:
|
|
# Expired lock
|
|
rel_path = lock_file.relative_to(locks_dir)
|
|
path_str = str(rel_path)[:-5] # Remove .lock
|
|
|
|
# Delete associated editzone
|
|
conv_id = lock_data.get("conv_id", "")
|
|
if conv_id:
|
|
edit_path = editzone_dir / conv_id / path_str
|
|
if edit_path.exists():
|
|
self._core._rm_with_empty_parents(edit_path, editzone_dir)
|
|
|
|
# Delete lock
|
|
self._core._rm_with_empty_parents(lock_file, locks_dir)
|
|
cleaned["expired_locks"].append(f"{zone_name}/{path_str}")
|
|
|
|
except json.JSONDecodeError:
|
|
# Corrupted lock
|
|
rel_path = lock_file.relative_to(locks_dir)
|
|
self._core._rm_with_empty_parents(lock_file, locks_dir)
|
|
cleaned["corrupted_locks"].append(f"{zone_name}/{rel_path}")
|
|
except (ValueError, TypeError):
|
|
pass # Invalid date, ignore
|
|
|
|
# 2. Clean orphan editzones (without corresponding lock)
|
|
if editzone_dir.exists():
|
|
for conv_dir in editzone_dir.iterdir():
|
|
if conv_dir.is_dir():
|
|
for item in conv_dir.rglob("*"):
|
|
if item.is_file():
|
|
rel_path = item.relative_to(conv_dir)
|
|
lock_path = locks_dir / (str(rel_path) + ".lock")
|
|
|
|
if not lock_path.exists():
|
|
# Orphan editzone
|
|
self._core._rm_with_empty_parents(item, editzone_dir)
|
|
cleaned["orphan_editzones"].append(
|
|
f"{zone_name}/editzone/{conv_dir.name}/{rel_path}"
|
|
)
|
|
|
|
# Clean personal zones
|
|
for zone_name in ("Storage", "Documents"):
|
|
zone_root = user_root / zone_name
|
|
clean_zone(zone_root, zone_name)
|
|
|
|
# Clean group zones (for groups the user belongs to)
|
|
user_id = __user__.get("id", "")
|
|
user_groups = self._core._get_user_groups(user_id)
|
|
groups_root = self._core._get_groups_root()
|
|
|
|
for group in user_groups:
|
|
group_path = groups_root / group.id
|
|
if group_path.exists():
|
|
clean_zone(group_path, f"Group:{group.id}")
|
|
|
|
total = (len(cleaned["expired_locks"]) +
|
|
len(cleaned["corrupted_locks"]) +
|
|
len(cleaned["orphan_editzones"]))
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data=cleaned,
|
|
message=f"Maintenance complete: {total} element(s) cleaned"
|
|
)
|
|
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
# =========================================================================
|
|
# GROUP FUNCTIONS (14 functions)
|
|
# =========================================================================
|
|
|
|
# --- Discovery (2) ---
|
|
|
|
async def shed_group_list(
|
|
self,
|
|
__user__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Lists groups the user belongs to.
|
|
|
|
:return: List of groups with id, name, and member count
|
|
"""
|
|
try:
|
|
if not GROUPS_AVAILABLE:
|
|
return self._core._format_response(
|
|
False,
|
|
message="Group features are not available (Open WebUI Groups API not found)"
|
|
)
|
|
|
|
user_id = __user__.get("id", "")
|
|
groups = self._core._get_user_groups(user_id)
|
|
|
|
result = []
|
|
for g in groups:
|
|
# Use dedicated API method to get member count
|
|
member_count = 0
|
|
try:
|
|
member_count = Groups.get_group_member_count_by_id(g.id) or 0
|
|
except Exception:
|
|
pass
|
|
|
|
result.append({
|
|
"id": g.id,
|
|
"name": g.name,
|
|
"description": g.description or "",
|
|
"member_count": member_count,
|
|
})
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data={"groups": result, "count": len(result)},
|
|
message=f"Found {len(result)} group(s)"
|
|
)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_group_list")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_group_info(
|
|
self,
|
|
group: str,
|
|
__user__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Shows group files, ownership information, and statistics.
|
|
|
|
:param group: Group ID or group name
|
|
:return: Group information including files and their ownership
|
|
"""
|
|
try:
|
|
# Validate group_id
|
|
group = self._core._validate_group_id(group)
|
|
self._core._check_group_access(__user__, group)
|
|
|
|
# Get group info and member list using dedicated API methods
|
|
group_obj = Groups.get_group_by_id(group)
|
|
try:
|
|
member_ids = Groups.get_group_user_ids_by_id(group) or []
|
|
except Exception:
|
|
member_ids = []
|
|
|
|
data_path = self._core._get_group_data_path(group)
|
|
|
|
if not data_path.exists():
|
|
return self._core._format_response(
|
|
True,
|
|
data={
|
|
"group_id": group,
|
|
"name": group_obj.name if group_obj else group,
|
|
"members": member_ids,
|
|
"member_count": len(member_ids),
|
|
"files": [],
|
|
"total_files": 0,
|
|
"total_size": 0
|
|
},
|
|
message="Group space is empty"
|
|
)
|
|
|
|
# Get all ownership records for this group in a single query
|
|
all_ownership, _ = self._core._db_execute(
|
|
"SELECT file_path, owner_id, write_access FROM file_ownership WHERE group_id = ?",
|
|
(group,)
|
|
)
|
|
ownership_map = {row["file_path"]: {"owner_id": row["owner_id"], "write_access": row["write_access"]} for row in all_ownership}
|
|
|
|
# List files with ownership
|
|
files = []
|
|
total_size = 0
|
|
|
|
for item in data_path.rglob("*"):
|
|
if item.is_file() and ".git" not in item.parts:
|
|
rel_path = str(item.relative_to(data_path))
|
|
try:
|
|
size = item.stat().st_size
|
|
except (OSError, FileNotFoundError):
|
|
continue # Skip files that disappeared
|
|
total_size += size
|
|
|
|
ownership = ownership_map.get(rel_path)
|
|
files.append({
|
|
"path": rel_path,
|
|
"size": size,
|
|
"owner": ownership["owner_id"] if ownership else "unknown",
|
|
"mode": ownership["write_access"] if ownership else "unknown",
|
|
})
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data={
|
|
"group_id": group,
|
|
"name": group_obj.name if group_obj else group,
|
|
"members": member_ids,
|
|
"member_count": len(member_ids),
|
|
"files": files,
|
|
"total_files": len(files),
|
|
"total_size": total_size,
|
|
"total_size_human": f"{total_size / 1024 / 1024:.2f} MB",
|
|
},
|
|
message=f"Group has {len(files)} file(s) and {len(member_ids)} member(s)"
|
|
)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_group_info")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
# --- Operations (4) ---
|
|
|
|
async def shed_group_set_mode(
|
|
self,
|
|
group: str,
|
|
path: str,
|
|
mode: str,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Changes the write mode of a file (owner only).
|
|
|
|
:param group: Group ID or group name
|
|
:param path: File path
|
|
:param mode: New mode: 'owner', 'group', or 'owner_ro'
|
|
:return: Operation result as JSON
|
|
"""
|
|
try:
|
|
# Validate group_id
|
|
group = self._core._validate_group_id(group)
|
|
self._core._check_group_access(__user__, group)
|
|
user_id = __user__.get("id", "")
|
|
zone_name = f"Group:{group}"
|
|
|
|
# Validate path
|
|
path = self._core._validate_relative_path(path, zone_name, allow_zone_in_path)
|
|
|
|
# Validate mode
|
|
if mode not in ("owner", "group", "owner_ro"):
|
|
raise StorageError(
|
|
"INVALID_MODE",
|
|
f"Invalid mode: {mode}",
|
|
hint="Use 'owner', 'group', or 'owner_ro'"
|
|
)
|
|
|
|
# Check ownership
|
|
ownership = self._core._get_file_ownership(group, path)
|
|
if ownership is None:
|
|
raise StorageError("FILE_NOT_FOUND", f"No ownership record for: {path}")
|
|
|
|
if ownership["owner_id"] != user_id:
|
|
raise StorageError(
|
|
"NOT_FILE_OWNER",
|
|
"Only the file owner can change the write mode",
|
|
{"owner": ownership["owner_id"], "your_id": user_id}
|
|
)
|
|
|
|
# Update mode
|
|
old_mode = ownership["write_access"]
|
|
self._core._set_file_ownership(group, path, user_id, mode)
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data={"path": path, "old_mode": old_mode, "new_mode": mode},
|
|
message=f"File mode changed from '{old_mode}' to '{mode}'"
|
|
)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_group_set_mode")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
async def shed_group_chown(
|
|
self,
|
|
group: str,
|
|
path: str,
|
|
new_owner: str,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Transfers file ownership to another user (owner only).
|
|
|
|
:param group: Group ID or group name
|
|
:param path: File path
|
|
:param new_owner: User ID of new owner
|
|
:return: Operation result as JSON
|
|
"""
|
|
try:
|
|
# Validate group_id
|
|
group = self._core._validate_group_id(group)
|
|
self._core._check_group_access(__user__, group)
|
|
user_id = __user__.get("id", "")
|
|
zone_name = f"Group:{group}"
|
|
|
|
# Validate path
|
|
path = self._core._validate_relative_path(path, zone_name, allow_zone_in_path)
|
|
|
|
# Validate new_owner (sanitize)
|
|
if not new_owner or not isinstance(new_owner, str):
|
|
raise StorageError("INVALID_OWNER", "new_owner is required")
|
|
new_owner = new_owner.strip()
|
|
if not new_owner or len(new_owner) > 255:
|
|
raise StorageError("INVALID_OWNER", "Invalid new_owner format")
|
|
# Block dangerous characters for SQL and filesystem
|
|
if any(c in new_owner for c in [";", "'", '"', "\\", "/", "\n", "\r", "\0"]):
|
|
raise StorageError("INVALID_OWNER", "new_owner contains invalid characters")
|
|
|
|
# Check ownership
|
|
ownership = self._core._get_file_ownership(group, path)
|
|
if ownership is None:
|
|
raise StorageError("FILE_NOT_FOUND", f"No ownership record for: {path}")
|
|
|
|
if ownership["owner_id"] != user_id:
|
|
raise StorageError(
|
|
"NOT_FILE_OWNER",
|
|
"Only the file owner can transfer ownership",
|
|
{"owner": ownership["owner_id"], "your_id": user_id}
|
|
)
|
|
|
|
# Check new owner is group member
|
|
if not self._core._is_group_member(new_owner, group):
|
|
raise StorageError(
|
|
"INVALID_OWNER",
|
|
f"User '{new_owner}' is not a member of this group"
|
|
)
|
|
|
|
# Update ownership
|
|
self._core._db_execute(
|
|
"UPDATE file_ownership SET owner_id = ?, updated_at = CURRENT_TIMESTAMP WHERE group_id = ? AND file_path = ?",
|
|
(new_owner, group, path)
|
|
)
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data={"path": path, "old_owner": user_id, "new_owner": new_owner},
|
|
message=f"Ownership transferred to '{new_owner}'"
|
|
)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_group_chown")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|
|
|
|
# --- Bridge (1) ---
|
|
|
|
async def shed_copy_to_group(
|
|
self,
|
|
src_zone: str,
|
|
src_path: str,
|
|
group: str,
|
|
dest_path: str,
|
|
message: str = "Add file to group",
|
|
mode: str = None,
|
|
allow_zone_in_path: bool = False,
|
|
__user__: dict = {},
|
|
__metadata__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Copies a file from personal space to group.
|
|
|
|
:param src_zone: Source zone ('uploads', 'storage', or 'documents')
|
|
:param src_path: Source file path (relative to src_zone, don't include zone name!)
|
|
:param group: Target group ID
|
|
:param dest_path: Destination path in group (don't include zone name!)
|
|
:param message: Git commit message
|
|
:param mode: Write mode: 'owner', 'group', or 'owner_ro' (default from config)
|
|
:param allow_zone_in_path: Allow paths starting with zone name (default: False)
|
|
:return: Operation result as JSON
|
|
"""
|
|
try:
|
|
# Validate group_id
|
|
group = self._core._validate_group_id(group)
|
|
self._core._check_group_access(__user__, group)
|
|
user_id = __user__.get("id", "")
|
|
conv_id = self._core._get_conv_id(__metadata__)
|
|
|
|
# Resolve source zone first to get zone_name for validation
|
|
user_root = self._core._get_user_root(__user__)
|
|
src_zone_lower = src_zone.lower()
|
|
|
|
if src_zone_lower == "uploads":
|
|
src_base = user_root / "Uploads" / conv_id
|
|
src_zone_name = "Uploads"
|
|
elif src_zone_lower == "storage":
|
|
src_base = user_root / "Storage" / "data"
|
|
src_zone_name = "Storage"
|
|
elif src_zone_lower == "documents":
|
|
src_base = user_root / "Documents" / "data"
|
|
src_zone_name = "Documents"
|
|
else:
|
|
raise StorageError(
|
|
"ZONE_FORBIDDEN",
|
|
f"Invalid source zone: {src_zone}",
|
|
hint="Use 'uploads', 'storage', or 'documents'"
|
|
)
|
|
|
|
# Validate paths with zone name check
|
|
src_path = self._core._validate_relative_path(src_path, src_zone_name, allow_zone_in_path)
|
|
dest_path = self._core._validate_relative_path(dest_path, f"Group:{group}", allow_zone_in_path)
|
|
|
|
source = self._core._resolve_chroot_path(src_base, src_path)
|
|
|
|
if not source.exists():
|
|
raise StorageError("FILE_NOT_FOUND", f"File not found: {src_path}")
|
|
|
|
# Check file size
|
|
if source.is_file():
|
|
file_size = source.stat().st_size
|
|
max_size = self.valves.max_file_size_mb * 1024 * 1024
|
|
if file_size > max_size:
|
|
raise StorageError(
|
|
"FILE_TOO_LARGE",
|
|
f"File exceeds max size ({self.valves.max_file_size_mb} MB)",
|
|
{"size_mb": round(file_size / 1024 / 1024, 2), "max_mb": self.valves.max_file_size_mb}
|
|
)
|
|
|
|
# Check group quota
|
|
self._core._check_group_quota(group, self._core._get_path_size(source))
|
|
|
|
# Resolve destination
|
|
data_path = self._core._ensure_group_space(group)
|
|
dest = self._core._resolve_chroot_path(data_path, dest_path)
|
|
|
|
# Check if destination exists
|
|
existing = self._core._get_file_ownership(group, dest_path)
|
|
if existing:
|
|
can_write, error = self._core._can_write_group_file(group, dest_path, user_id)
|
|
if not can_write:
|
|
raise StorageError(error, f"Cannot overwrite file: {error}")
|
|
|
|
# Copy (handle both files and directories)
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
if source.is_dir():
|
|
shutil.copytree(source, dest)
|
|
else:
|
|
shutil.copy2(source, dest)
|
|
|
|
# Set ownership
|
|
effective_mode = mode or self.valves.group_default_mode
|
|
if effective_mode not in ("owner", "group", "owner_ro"):
|
|
effective_mode = "group"
|
|
self._core._set_file_ownership(group, dest_path, user_id, effective_mode)
|
|
|
|
# Git commit
|
|
self._core._git_commit_as_user(data_path, message, user_id)
|
|
|
|
return self._core._format_response(
|
|
True,
|
|
data={
|
|
"src_zone": src_zone,
|
|
"src_path": src_path,
|
|
"group": group,
|
|
"dest_path": dest_path,
|
|
"mode": effective_mode,
|
|
},
|
|
message=f"File copied to group '{group}' with mode '{effective_mode}'"
|
|
)
|
|
|
|
except StorageError as e:
|
|
return self._core._format_error(e, "shed_copy_to_group")
|
|
except Exception as e:
|
|
return self._core._format_response(False, message=str(e))
|