""" title: Fileshed description: Persistent file storage with group collaboration. FIRST: Run shed_help() for quick reference or shed_help(howto="...") for guides: download, csv_to_sqlite, upload, share, edit, commands, network, paths, full. Config: shed_parameters(). author: Fade78 (with Claude Opus 4.5) version: 1.0.0 license: MIT required_open_webui_version: 0.4.0 SETUP INSTRUCTIONS: ================== For this tool to work properly, you must enable Native Function Calling: Option 1 - Per Model (recommended): Admin Panel > Settings > Models > [Select Model] > Advanced Parameters > Function Calling > "Native" Option 2 - Per Chat: Chat Controls (gear icon) > Advanced Params > Function Calling > "Native" """ # ============================================================================= # ⚠️ LLM WARNING - READ THIS FIRST ⚠️ # ============================================================================= # # YOU CAN ONLY CALL FUNCTIONS STARTING WITH "shed_" ! # # ✅ CORRECT (public API): # shed_exec(zone="storage", cmd="ls", args=["-la"]) # shed_exec(zone="storage", cmd="mkdir", args=["-p", "projects"]) # shed_exec(zone="documents", cmd="git", args=["log"]) # shed_patch_text(zone="storage", path="notes.txt", content="Hello") # shed_import(import_all=True) # shed_sqlite(zone="storage", path="db.sqlite", query="SELECT * FROM t") # # ❌ WRONG (internal methods - will NOT work): # _exec_command(...) <- INTERNAL, don't call! # _git_run(...) <- INTERNAL, don't call! # _validate_path(...) <- INTERNAL, don't call! # # ⚠️ SHELL COMMANDS FIRST: # Use shed_exec() for ALL shell-doable operations! # ✓ mkdir: shed_exec(zone="storage", cmd="mkdir", args=["-p", "dir"]) # ✗ WRONG: shed_patch_text(path="dir/.keep", content="") # # Methods starting with "_" are INTERNAL IMPLEMENTATION DETAILS. # They are NOT callable by the LLM. Only "shed_*" functions are available. # # Run shed_help() for the list of available functions! # # ============================================================================= # ============================================================================= # 🔧 DEV NOTES # ============================================================================= # # 1. ARCHITECTURE: All public tools are in `class Tools`. Internal methods # MUST be in `class _FileshedCore` to hide them from Open WebUI/LLM. # # 2. PATHS: Internally we use {Zone}/data/ but externally it's just {Zone}/. # NEVER expose "data/" in help, messages, or errors. The LLM sees: # Storage/myfile.txt (not Storage/data/myfile.txt) # # ============================================================================= import json import mimetypes import os import re import shutil import sqlite3 import subprocess import unicodedata import uuid import zipfile from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path from typing import Any, Callable, Optional from pydantic import BaseModel, Field # Try to import Open WebUI Groups API GROUPS_AVAILABLE = False try: from open_webui.models.groups import Groups GROUPS_AVAILABLE = True except ImportError: pass # ============================================================================= # ZONE CONTEXT # ============================================================================= @dataclass class ZoneContext: """Result of zone resolution - contains all zone-specific info.""" zone_root: Path # Data directory path zone_name: str # Canonical name (Storage, Documents, Uploads, group:xxx) zone_lower: str # Lowercase (storage, documents, uploads, group) editzone_base: Path # Base for editzones (None for uploads) conv_id: str # Conversation ID group_id: Optional[str] # Group ID if zone=group, else None git_commit: bool # Auto-commit after modifications readonly: bool # True for uploads whitelist: set # Allowed commands for this zone # ============================================================================= # CONFIGURATION # ============================================================================= # Valves are defined inside Tools class (required by Open WebUI) # See: Tools.Valves (inner class) # ============================================================================= # WHITELISTS # ============================================================================= # Read-only commands (Uploads) WHITELIST_READONLY = { # Reading "cat", "head", "tail", "less", "more", "nl", "wc", "stat", "file", "du", "tac", # Navigation "ls", "tree", "find", # Text search "grep", "egrep", "fgrep", "rg", "awk", "sed", # Text transformation "sort", "uniq", "cut", "paste", "tr", "fold", "fmt", "column", "rev", "shuf", "expand", "unexpand", "pr", # Join "join", # Comparison "diff", "diff3", "cmp", "comm", # Archives (list) "tar", "unzip", "zipinfo", "7z", # Compression (stdout) "zcat", "bzcat", "xzcat", # Checksums "md5sum", "sha1sum", "sha256sum", "sha512sum", "b2sum", "cksum", # Encoding "base32", "base64", "basenc", # Binary/Hex "strings", "od", "hexdump", "xxd", # JSON/XML/YAML "jq", "xmllint", "yq", # Encoding conversion (stdout) "iconv", # Calculation "bc", "dc", "expr", "factor", "numfmt", # Paths "basename", "dirname", "realpath", # Misc "echo", "printf", # Media (info reading) "ffprobe", "identify", "exiftool", # Database "sqlite3", # REMOVED for security: # - xargs : can execute arbitrary commands # - timeout : can execute arbitrary commands (we have internal timeout) # - env : can execute commands and expose secrets } # Read/write commands (Storage, Documents) WHITELIST_READWRITE = WHITELIST_READONLY | { # Additional reading "df", "locate", "which", "whereis", # Split "split", "csplit", # Additional comparison "sdiff", "patch", "colordiff", # Archives (extraction/creation) "zip", "7za", # Compression "gzip", "gunzip", "bzip2", "bunzip2", "xz", "unxz", "lz4", "zstd", # Additional checksums "sum", # Additional encoding "uuencode", "uudecode", # File modification "touch", "mkdir", "rm", "rmdir", "mv", "cp", "ln", "truncate", "mktemp", "install", "shred", "rename", # Permissions "chmod", # Document conversion "pandoc", # Encoding conversion "dos2unix", "unix2dos", "recode", # Additional calculation "seq", # Date/Time "date", "cal", # Additional paths "readlink", "pathchk", "pwd", # System (info only - env removed, can execute commands) "uname", "nproc", "printenv", # Control (timeout removed - can execute commands, we have internal timeout) "sleep", # Misc (xargs removed - can execute arbitrary commands) "yes", "tee", "envsubst", "gettext", "tsort", "true", "false", # Media "ffmpeg", "magick", "convert", # Versioning "git", } # Allowed Git subcommands GIT_WHITELIST_READ = { "status", "log", "show", "diff", "branch", "tag", "blame", "ls-files", "ls-tree", "shortlog", "reflog", "describe", "rev-parse", "rev-list", "cat-file", } GIT_WHITELIST_WRITE = { "add", "commit", "reset", "restore", "checkout", "rm", "mv", "revert", "cherry-pick", "stash", "clean", } GIT_BLACKLIST = { "gc", "prune", "filter-branch", # Always forbidden (dangerous local ops) } # Git network commands (controlled by curl valves) GIT_NETWORK_GET = { "clone", "fetch", "pull", "submodule", "remote", # Download operations } GIT_NETWORK_PUSH = { "push", # Upload/exfiltration } # Curl/wget commands (controlled by valves) CURL_COMMANDS = {"curl", "wget"} # Curl/wget options forbidden in GET mode (allow data exfiltration) CURL_FORBIDDEN_GET_OPTS = { # HTTP methods "-X", "--request", # Data upload "-d", "--data", "--data-raw", "--data-binary", "--data-urlencode", "--data-ascii", "-F", "--form", "--form-string", "-T", "--upload-file", # POST behavior "--post301", "--post302", "--post303", # wget upload "--post-data", "--post-file", "--body-data", "--body-file", "--method", } # ffmpeg output protocols that allow data exfiltration # These protocols can SEND data to remote servers FFMPEG_OUTPUT_PROTOCOLS = { "rtmp://", "rtmps://", "rtmpe://", "rtmpt://", # Streaming upload "rtsp://", "rtsps://", # Real-time streaming "srt://", # Secure Reliable Transport "udp://", "tcp://", # Raw sockets "rtp://", "srtp://", # Real-time protocol "ftp://", "sftp://", # File transfer "http://", "https://", # Can POST/PUT with -method "icecast://", # Streaming server "tee:", # Can duplicate to multiple outputs including network } # Commands that can SEND data over network (exfiltration risk) # These are blocked unless network_mode is "all" NETWORK_OUTPUT_COMMANDS = {"ffmpeg"} # Commands that can RECEIVE data from network (read-only risk) # These require network_mode "safe" or "all", with URL blocking if "disabled" NETWORK_INPUT_COMMANDS = {"ffprobe", "pandoc", "magick", "convert", "identify"} # All network-capable commands (union of above) NETWORK_CAPABLE_COMMANDS = NETWORK_OUTPUT_COMMANDS | NETWORK_INPUT_COMMANDS # Forbidden commands BLACKLIST_COMMANDS = { # Interpreters/Shells "bash", "sh", "zsh", "fish", "dash", "csh", "tcsh", "ksh", "python", "python3", "perl", "ruby", "node", "php", "lua", "exec", "eval", "source", # Background / Fork "nohup", "disown", "setsid", "screen", "tmux", "at", "batch", "crontab", # System privileges "sudo", "su", "doas", "chown", "chgrp", # Network (curl/wget controlled separately via valves) "fetch", "ssh", "scp", "sftp", "rsync", "nc", "netcat", "ncat", "telnet", "ftp", "ping", "traceroute", # System / Dangerous "dd", "mount", "umount", "kill", "killall", "pkill", "reboot", "shutdown", "halt", "poweroff", "systemctl", "service", "mkfs", "fdisk", "parted", "iptables", "firewall-cmd", } # Pattern to detect dangerous arguments (shell metacharacters) # Blocks: ; & | ` $ \n \r && || >> << > < $( ${ DANGEROUS_ARGS_PATTERN = re.compile(r'[;&|`$\n\r]|&&|\|\||>>|<<|>|<|\$\(|\$\{') # Same pattern but allows | (for commands that use | in their internal syntax) # Used for: jq (pipe operator), awk (print | "cmd" - but we block system() separately) DANGEROUS_ARGS_PATTERN_ALLOW_PIPE = re.compile(r'[;&`$\n\r]|&&|>>|<<|>|<|\$\(|\$\{') # Commands that use | in their internal syntax (not shell pipes) COMMANDS_ALLOWING_PIPE = {"jq", "awk", "gawk", "mawk", "nawk"} # Pattern to detect URLs (network access via ffmpeg, pandoc, imagemagick, etc.) # Blocks: http://, https://, ftp://, rtmp://, rtsp://, smb://, file://, etc. URL_PATTERN = re.compile(r'^[a-zA-Z][a-zA-Z0-9+.-]*://', re.IGNORECASE) # find options that can execute commands (security risk) FIND_EXEC_OPTIONS = {"-exec", "-execdir", "-ok", "-okdir"} # awk patterns that can execute commands (security risk) # system() executes shell commands, getline can pipe from commands AWK_DANGEROUS_PATTERNS = re.compile(r'\bsystem\s*\(|\|\s*getline|\bgetline\s*<') # ============================================================================= # ERRORS # ============================================================================= class StorageError(Exception): """Base storage error with contextual help.""" def __init__(self, code: str, message: str, details: dict = None, hint: str = None, func: str = None): self.code = code self.message = message self.details = details or {} self.hint = hint self.func = func # Function name for contextual help super().__init__(message) def to_dict(self, function_help: str = None) -> dict: result = { "success": False, "error": self.code, "message": self.message, "details": self.details, "hint": self.hint, } if function_help: result["help"] = function_help return result # ============================================================================= # MAIN CLASS # ============================================================================= # ============================================================================= # OPEN WEBUI BRIDGE (isolates internal API calls) # ============================================================================= class _OpenWebUIBridge: """ Bridge to Open WebUI internal Python API. This class isolates all direct interactions with Open WebUI's internal modules. If Open WebUI's internal API changes between versions, only this class needs updating. Supported Open WebUI versions: 0.6.x (tested with 0.6.40+) """ _instance = None _initialized = False _files_module = None _files_class = None _file_form_class = None def __new__(cls): """Singleton pattern to avoid repeated imports.""" if cls._instance is None: cls._instance = super().__new__(cls) return cls._instance def _ensure_initialized(self): """Lazy initialization of Open WebUI imports.""" if self._initialized: return True try: # Open WebUI 0.6.x API from open_webui.models.files import Files, FileForm self._files_class = Files self._file_form_class = FileForm self._initialized = True return True except ImportError as e: # Try alternative import paths for different versions try: # Hypothetical future API path from open_webui.core.models.files import Files, FileForm self._files_class = Files self._file_form_class = FileForm self._initialized = True return True except ImportError: pass raise StorageError( "OPENWEBUI_API_UNAVAILABLE", f"Cannot import Open WebUI internal API: {e}", {"import_error": str(e)}, "Open WebUI internal modules not available. This feature requires running inside Open WebUI." ) def insert_file( self, user_id: str, file_id: str, filename: str, file_path: str, content_type: str, file_size: int, metadata: dict = None ): """ Insert a new file into Open WebUI's file system. Args: user_id: Open WebUI user ID file_id: Unique file ID (UUID) filename: Display name of the file file_path: Absolute path to the file on disk content_type: MIME type file_size: Size in bytes metadata: Optional additional metadata Returns: File model object or None on failure """ self._ensure_initialized() try: file_item = self._files_class.insert_new_file( user_id, self._file_form_class( **{ "id": file_id, "filename": filename, "path": file_path, "data": {}, # No RAG processing needed for download-only files "meta": { "name": filename, "content_type": content_type, "size": file_size, "data": metadata or {}, }, } ), ) return file_item except Exception as e: raise StorageError( "OPENWEBUI_INSERT_ERROR", f"Failed to insert file into Open WebUI: {e}", {"file_id": file_id, "error": str(e)} ) def get_file_by_id(self, file_id: str): """Get file metadata by ID.""" self._ensure_initialized() try: return self._files_class.get_file_by_id(file_id) except Exception as e: raise StorageError( "OPENWEBUI_GET_ERROR", f"Failed to get file from Open WebUI: {e}", {"file_id": file_id, "error": str(e)} ) def delete_file_by_id(self, file_id: str): """Delete a file by ID.""" self._ensure_initialized() try: return self._files_class.delete_file_by_id(file_id) except Exception as e: raise StorageError( "OPENWEBUI_DELETE_ERROR", f"Failed to delete file from Open WebUI: {e}", {"file_id": file_id, "error": str(e)} ) @classmethod def is_available(cls) -> bool: """Check if Open WebUI internal API is available.""" try: instance = cls() instance._ensure_initialized() return True except: return False @classmethod def get_api_version(cls) -> str: """Return the detected Open WebUI API version.""" try: from open_webui import __version__ return __version__ except: return "unknown" # ============================================================================= # INTERNAL CORE (not exposed to LLM) # ============================================================================= class _FileshedCore: """ Internal implementation class containing all private methods. This class is NOT exposed to the LLM - only Tools.shed_* methods are visible. """ def __init__(self, tools): self._tools = tools # Reference to parent Tools instance self._commands_cache = None self._db_initialized = False @property def valves(self): """Access valves from parent Tools (ensures sync with Open WebUI updates).""" return self._tools.valves # ========================================================================= # FUNCTION HELP INDEX (for contextual error messages) # ========================================================================= FUNCTION_HELP = { # === DIRECT WRITE FUNCTIONS === "shed_patch_text": { "usage": "shed_patch_text(zone, path, content, position='end', overwrite=False, ...)", "desc": "THE standard function to write/create text files. Use this for all file writing!", "workflows": ["Direct Write"], "howtos": ["edit"], "not_for": ["Locked Edit workflow (shed_lockedit_*)"], "tips": [ "Create new file: shed_patch_text(zone, path, content, overwrite=True)", "Append to file: shed_patch_text(zone, path, content) # position='end' by default", "To READ files: use shed_exec(cmd='cat', args=['file']) or head/tail/sed", "⚠️ CSV: quote fields with comma/newline/quotes. Escape quotes by doubling: \"\"", "position: 'start', 'end', 'before', 'after', 'replace' (NOT 'at' - that's for bytes!)", "For 'before'/'after'/'replace': use line=N (first line is 1) or pattern='...'", ], }, "shed_patch_bytes": { "usage": "shed_patch_bytes(zone, path, content, position='end', offset=None)", "desc": "Write binary data (hex string) to a file", "workflows": ["Direct Write"], "howtos": ["edit", "large_files"], "not_for": ["Locked Edit workflow (shed_lockedit_*)"], "tips": ["content must be a hex string like '48454C4C4F'"], }, # === SAFE EDIT WORKFLOW === "shed_lockedit_open": { "usage": "shed_lockedit_open(zone, path)", "desc": "Step 1/3: Lock file and create working copy", "workflows": ["Locked Edit"], "howtos": ["edit"], "not_for": ["Direct Write workflow (shed_patch_*)"], "tips": [ "WORKFLOW: shed_lockedit_open → shed_lockedit_overwrite → shed_lockedit_save", "After this, use shed_lockedit_overwrite (NOT shed_patch_text!)", ], }, "shed_lockedit_overwrite": { "usage": "shed_lockedit_overwrite(zone, path, content, append=False)", "desc": "Step 2/3: Write to locked file (working copy)", "workflows": ["Locked Edit"], "howtos": ["edit"], "not_for": ["Direct Write workflow (shed_patch_*)"], "tips": [ "REQUIRES shed_lockedit_open() first!", "NO position/pattern/line params - those are for shed_patch_text!", "Use append=True to append instead of overwrite", ], }, "shed_lockedit_save": { "usage": "shed_lockedit_save(zone, path, message=None)", "desc": "Step 3/3: Save changes and unlock file", "workflows": ["Locked Edit"], "howtos": ["edit"], "not_for": ["Direct Write workflow (shed_patch_*)"], "tips": [ "⚠️ THIS CLOSES EDIT MODE! File is unlocked after save.", "To edit again, call shed_lockedit_open() first!", ], }, "shed_lockedit_cancel": { "usage": "shed_lockedit_cancel(zone, path)", "desc": "Discard changes and unlock file", "workflows": ["Locked Edit"], "howtos": ["edit"], "not_for": ["Direct Write workflow (shed_patch_*)"], "tips": ["Use instead of shed_lockedit_save to discard changes"], }, "shed_lockedit_exec": { "usage": "shed_lockedit_exec(zone, path, cmd, args=[])", "desc": "Run command on locked file (in working copy)", "workflows": ["Locked Edit", "Shell Commands"], "howtos": ["edit", "commands"], "not_for": ["Direct Write workflow (shed_patch_*)"], "tips": ["REQUIRES shed_lockedit_open() first!"], }, # === SHELL COMMANDS === "shed_exec": { "usage": "shed_exec(zone, cmd, args=[], timeout=None, stdout_file=None, stderr_file=None, group=None)", "desc": "Execute shell command in zone. Use for reading files and shell operations!", "workflows": ["Shell Commands", "Download"], "howtos": ["commands", "large_files", "download"], "not_for": ["Locked Edit workflow (use shed_lockedit_exec instead if file is locked)"], "tips": [ "READ files: cmd='cat', args=['file.txt'] (or head/tail/sed for partial reads)", "DOWNLOAD files or call APIs: use curl (saves full content to disk for processing)", "Download: cmd='curl', args=['-L', '-o', 'data.csv', 'URL']", "REDIRECT output to file: stdout_file='output.txt' (like shell > redirection)", "Check available commands: shed_allowed_commands()", ], }, # === FILE OPERATIONS === "shed_delete": { "usage": "shed_delete(zone, path, group=None, message=None)", "desc": "Delete a file or empty directory", "workflows": ["File Operations"], "howtos": ["commands"], "not_for": ["Locked Edit workflow", "Direct Write workflow"], "tips": [ "Cannot delete non-empty directories", "message: Git commit message (for documents/group zones)", ], }, "shed_rename": { "usage": "shed_rename(zone, old_path, new_path)", "desc": "Rename or move a file within same zone", "workflows": ["File Operations"], "howtos": ["commands"], "not_for": ["Locked Edit workflow", "Direct Write workflow"], "tips": ["For cross-zone moves, use shed_copy_* or shed_move_* bridges"], }, "shed_tree": { "usage": "shed_tree(zone, path='.', depth=3)", "desc": "Show directory tree (builtin, always works)", "workflows": ["File Operations", "Shell Commands"], "howtos": ["commands"], "not_for": [], "tips": ["Use when 'tree' command is not available"], }, # === SQLITE === "shed_sqlite": { "usage": "shed_sqlite(zone, path, query=None, import_csv=None, table=None, ...)", "desc": "Execute SQL or import CSV into SQLite", "workflows": ["CSV Import", "Data Processing"], "howtos": ["csv_to_sqlite"], "not_for": ["Locked Edit workflow", "Direct Write workflow"], "tips": [ "For CSV import: shed_sqlite(..., import_csv='data.csv', table='mytable')", "Auto-detects delimiter and encoding!", "⚠️ Creating CSV: quote fields with comma/newline/quotes. Escape \" as \"\" (RFC 4180)", ], }, # === UPLOADS/BRIDGES === "shed_import": { "usage": "shed_import(filename=None, import_all=False, dest_subdir='')", "desc": "Import uploaded files to Uploads zone", "workflows": ["Upload Handling"], "howtos": ["upload"], "not_for": ["Locked Edit workflow", "Direct Write workflow"], "tips": [ "filename: specific file to import, or None to see available files", "import_all=True: import all files at once", "After import, use shed_move_uploads_to_storage() to move files", ], }, "shed_move_uploads_to_storage": { "usage": "shed_move_uploads_to_storage(src, dest)", "desc": "Move file from Uploads to Storage", "workflows": ["Upload Handling", "File Operations"], "howtos": ["upload"], "not_for": ["Locked Edit workflow"], "tips": ["Uploads zone is read-only, move files to Storage for editing"], }, "shed_move_uploads_to_documents": { "usage": "shed_move_uploads_to_documents(src, dest, message=None)", "desc": "Move file from Uploads to Documents (versioned)", "workflows": ["Upload Handling", "File Operations"], "howtos": ["upload"], "not_for": ["Locked Edit workflow"], "tips": ["Documents zone has Git versioning"], }, "shed_copy_storage_to_documents": { "usage": "shed_copy_storage_to_documents(src, dest, message=None)", "desc": "Copy file from Storage to Documents (versioned)", "workflows": ["File Operations"], "howtos": ["upload"], "not_for": ["Locked Edit workflow"], "tips": [], }, "shed_move_documents_to_storage": { "usage": "shed_move_documents_to_storage(src, dest, message=None)", "desc": "Move file from Documents to Storage (removes versioning)", "workflows": ["File Operations"], "howtos": ["upload"], "not_for": ["Locked Edit workflow"], "tips": ["message: Git commit message for the removal from Documents"], }, # === LINKS === "shed_link_create": { "usage": "shed_link_create(zone, path, group=None)", "desc": "Create download link for a file", "workflows": ["Share Files"], "howtos": ["share"], "not_for": ["Locked Edit workflow", "Direct Write workflow"], "tips": [ "Returns clickable_link in Markdown format - show it directly to user!", "Links require authentication - not public!", "Use group= for files in group zones", ], }, "shed_link_list": { "usage": "shed_link_list()", "desc": "List all your download links", "workflows": ["Share Files"], "howtos": ["share"], "not_for": [], "tips": [], }, "shed_link_delete": { "usage": "shed_link_delete(file_id)", "desc": "Delete a download link", "workflows": ["Share Files"], "howtos": ["share"], "not_for": [], "tips": ["Get file_id from shed_link_list()"], }, # === GROUPS === "shed_group_list": { "usage": "shed_group_list()", "desc": "List groups you belong to", "workflows": ["Collaboration"], "howtos": [], "not_for": [], "tips": ["Group names are case-sensitive!"], }, "shed_group_info": { "usage": "shed_group_info(group)", "desc": "Get group details and members", "workflows": ["Collaboration"], "howtos": [], "not_for": [], "tips": [], }, "shed_copy_to_group": { "usage": "shed_copy_to_group(src_zone, src_path, group, dest_path, message=None)", "desc": "Copy file to a group", "workflows": ["Collaboration", "File Operations"], "howtos": [], "not_for": ["Locked Edit workflow", "Direct Write workflow"], "tips": [], }, # === ZIP === "shed_zip": { "usage": "shed_zip(zone, src, dest='', include_empty_dirs=False)", "desc": "Create ZIP archive from file or folder", "workflows": ["File Operations"], "howtos": ["commands"], "not_for": ["Locked Edit workflow", "Direct Write workflow"], "tips": [ "src: file or folder to compress", "dest: output ZIP path (default: src + '.zip')", "include_empty_dirs=True: preserve empty directories in archive", ], }, "shed_unzip": { "usage": "shed_unzip(zone, src, dest='')", "desc": "Extract ZIP archive", "workflows": ["File Operations", "Download"], "howtos": ["commands", "download"], "not_for": ["Locked Edit workflow"], "tips": [ "src: path to ZIP file", "dest: extraction folder (default: same folder as ZIP)", "After curl download, use shed_unzip to extract", ], }, "shed_zipinfo": { "usage": "shed_zipinfo(zone, path)", "desc": "List ZIP contents without extracting", "workflows": ["File Operations"], "howtos": ["commands"], "not_for": [], "tips": [], }, # === INFO === "shed_stats": { "usage": "shed_stats()", "desc": "Show storage usage statistics", "workflows": ["Info"], "howtos": [], "not_for": [], "tips": [], }, "shed_parameters": { "usage": "shed_parameters()", "desc": "Show configuration limits (quota, file size, etc.)", "workflows": ["Info"], "howtos": ["network"], "not_for": [], "tips": [], }, "shed_allowed_commands": { "usage": "shed_allowed_commands()", "desc": "List available shell commands", "workflows": ["Info", "Shell Commands"], "howtos": ["commands"], "not_for": [], "tips": [], }, "shed_file_type": { "usage": "shed_file_type(zone, path)", "desc": "Detect file MIME type and extension", "workflows": ["Download", "File Operations"], "howtos": ["download"], "not_for": [], "tips": ["Useful after downloading files with unknown type"], }, "shed_convert_eol": { "usage": "shed_convert_eol(zone, path, to='unix')", "desc": "Convert line endings (CRLF ↔ LF)", "workflows": ["File Operations"], "howtos": ["commands"], "not_for": [], "tips": [ "to='unix': Convert to LF (\\n)", "to='windows': Convert to CRLF (\\r\\n)", ], }, "shed_hexdump": { "usage": "shed_hexdump(zone, path, offset=0, length=256)", "desc": "Show hex dump of file (for binary inspection)", "workflows": ["File Operations"], "howtos": ["large_files"], "not_for": [], "tips": ["Useful for inspecting binary files without loading them"], }, "shed_force_unlock": { "usage": "shed_force_unlock(zone, path, group=None)", "desc": "Force unlock a stuck file (crash recovery)", "workflows": ["Locked Edit"], "howtos": ["edit"], "not_for": [], "tips": [ "Use if a file is stuck in edit mode after a crash", "For group files, use group= instead of zone=", ], }, "shed_group_set_mode": { "usage": "shed_group_set_mode(group, path, mode)", "desc": "Change file permission mode in group", "workflows": ["Collaboration"], "howtos": [], "not_for": [], "tips": [ "mode: 'owner' (only owner), 'group' (all members), 'owner_ro' (read-only for others)", ], }, "shed_group_chown": { "usage": "shed_group_chown(group, path, new_owner)", "desc": "Transfer file ownership in group", "workflows": ["Collaboration"], "howtos": [], "not_for": [], "tips": ["new_owner: user ID of the new owner"], }, "shed_help": { "usage": "shed_help(howto=None)", "desc": "Get help (general or specific topic)", "workflows": ["Info"], "howtos": [], "not_for": [], "tips": [ "shed_help() for quick reference", "shed_help(howto='edit') for editing guide", "shed_help(howto='download') for download guide", ], }, "shed_maintenance": { "usage": "shed_maintenance()", "desc": "Cleanup expired locks and orphan editzones", "workflows": ["Info"], "howtos": [], "not_for": [], "tips": [ "Runs automatically, but can be called manually", "Cleans locks older than lock_max_age_hours", ], }, } def _get_function_help(self, func_name: str) -> str: """Generate contextual help for a function.""" if func_name not in self.FUNCTION_HELP: return "" info = self.FUNCTION_HELP[func_name] lines = [ f"\n📖 HELP for {func_name}:", f" Usage: {info['usage']}", f" → {info['desc']}", ] # Workflows this function belongs to (most important for LLM!) if info.get("workflows"): wf_list = ", ".join(info["workflows"]) lines.append(f" 🔧 Belongs to workflow(s): {wf_list}") # What this function is NOT for (avoid confusion) if info.get("not_for"): not_list = ", ".join(info["not_for"]) lines.append(f" ⛔ NOT for: {not_list}") # Tips if info.get("tips"): lines.append(" 💡 Tips:") for tip in info["tips"]: lines.append(f" • {tip}") # Related howto guides if info.get("howtos"): howto_list = ", ".join(f'shed_help(howto="{h}")' for h in info["howtos"]) lines.append(f" 📚 More info: {howto_list}") return "\n".join(lines) # HOWTO Guides HOWTO_GUIDES = { "download": """ # HOWTO: Download and work with files from the internet ## When to use curl **Use curl to download files you need to WORK with** (save, process, import, convert): - Datasets (CSV, JSON, XML) - API responses - Repositories - Any file to process locally curl saves the full content to disk for further processing. ## Quick Recipe ``` shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "data.csv", "URL"]) ``` ## Common data sources ### World/country data ``` # REST Countries API (JSON with population, area, etc.) shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "countries.json", "https://restcountries.com/v3.1/all?fields=name,area,population"]) ``` ### GitHub raw files ``` shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "data.csv", "https://raw.githubusercontent.com/user/repo/main/data.csv"]) ``` ## Download + import to SQLite ``` shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "data.csv", "URL"]) shed_sqlite(zone="storage", path="db.sqlite", import_csv="data.csv", table="mytable") ``` ## Important - `-o filename` saves to file (mandatory!) - `-L` follows redirects - Requires network_mode = "safe" or "all" """, "csv_to_sqlite": """ # HOWTO: Insert CSV data into SQLite ## ⚠️ CRITICAL: When CREATING CSV files (RFC 4180) If you generate a CSV file, follow these quoting rules: **Quote a field if it contains:** - The delimiter (`,` or `;`) - A newline - Double quotes **Inside quoted fields, escape quotes by doubling them:** ``` ✅ CORRECT: name,location,comment "Acme Corp","New York, NY","Contains ""quotes"" here" "Café du Monde","Paris, France","Great coffee" Simple Value,Boston,No quotes needed ❌ WRONG (extra columns / broken parsing): name,location,comment Acme Corp,New York, NY,No quotes Café,Said "hello",Unescaped quote ``` **Rules summary:** - Field with comma → `"New York, NY"` - Field with quote → `"Contains ""quotes"" inside"` - Field with newline → `"Line1\nLine2"` (whole field quoted) - Simple field → `Boston` (no quotes needed) ## Quick Recipe (RECOMMENDED) ``` # Basic import (auto-detects delimiter and encoding) shed_sqlite(zone="storage", path="database.db", import_csv="data.csv", table="users") ``` ## Why use shed_sqlite with import_csv? 1. **No context pollution**: CSV stays on disk, not in your context 2. **Fast**: Batch INSERT (1000 rows at a time), uses pandas if available 3. **Smart auto-detection**: Delimiter, encoding, separators detected automatically 4. **Auto-creates table**: Column names from CSV headers ## All CSV Parameters | Parameter | Values | Description | |-----------|--------|-------------| | `import_csv` | path | CSV file path (in same zone) | | `table` | name | Target table name (required) | | `if_exists` | `"fail"` / `"replace"` / `"append"` | What to do if table exists (default: "fail") | | `delimiter` | `","` `;` `"\\t"` `"|"` | CSV delimiter. `None` = auto-detect | | `encoding` | `"utf-8"` `"latin-1"` `"cp1252"` | File encoding. `None` = auto-detect | | `date_columns` | `["col1", "col2"]` | Columns to parse as dates | | `date_format` | `"dayfirst"` / `"monthfirst"` / `"%d/%m/%Y"` | Date format | | `decimal` | `","` | Decimal separator (European: `","`) | | `skip_rows` | `0`, `1`, `2`... | Skip N rows before header | | `has_header` | `True` / `False` | ⚠️ Does first row contain column names? Default: True. If False → columns named col_1, col_2... | ## Common Scenarios ### Standard CSV (comma-separated, UTF-8) ``` shed_sqlite(zone="storage", path="db.sqlite", import_csv="data.csv", table="data") ``` ### French/European CSV (semicolon, comma decimal, ISO-8859-1) ``` shed_sqlite(zone="storage", path="db.sqlite", import_csv="french.csv", table="ventes", delimiter=";", decimal=",", encoding="latin-1") ``` ### Excel export (often semicolon + Windows encoding) ``` shed_sqlite(zone="storage", path="db.sqlite", import_csv="export.csv", table="data", delimiter=";", encoding="cp1252") ``` ### TSV file (tab-separated) ``` shed_sqlite(zone="storage", path="db.sqlite", import_csv="data.tsv", table="data", delimiter="\\t") ``` ### With European dates (DD/MM/YYYY) ``` shed_sqlite(zone="storage", path="db.sqlite", import_csv="orders.csv", table="orders", date_columns=["order_date", "ship_date"], date_format="dayfirst") ``` ### With US dates (MM/DD/YYYY) ``` shed_sqlite(zone="storage", path="db.sqlite", import_csv="orders.csv", table="orders", date_columns=["order_date"], date_format="monthfirst") ``` ### Skip title rows (file has title + blank line before headers) ``` shed_sqlite(zone="storage", path="db.sqlite", import_csv="report.csv", table="report", skip_rows=2) ``` ### CSV without header row (data only) ⚠️ If your CSV has NO column names on first line, use `has_header=False`. Otherwise the first data row will be interpreted as column names! ``` shed_sqlite(zone="storage", path="db.sqlite", import_csv="raw_data.csv", table="raw", has_header=False) # → Creates columns: col_1, col_2, col_3, ... ``` ### Replace existing table ``` shed_sqlite(zone="storage", path="db.sqlite", import_csv="updated.csv", table="users", if_exists="replace") ``` ## Step-by-step Example ### 1. Check your CSV structure first ``` shed_exec(zone="storage", cmd="head", args=["-5", "data.csv"]) shed_exec(zone="storage", cmd="file", args=["data.csv"]) # Check encoding ``` ### 2. Import (auto-detection usually works) ``` shed_sqlite(zone="storage", path="mydb.db", import_csv="data.csv", table="users") ``` ### 3. Check the response for detection info ```json { "import_info": { "method": "pandas", "detected_encoding": "latin-1", "detected_delimiter": "';'" } } ``` ### 4. Query the data ``` shed_sqlite(zone="storage", path="mydb.db", query="SELECT COUNT(*) FROM users") shed_sqlite(zone="storage", path="mydb.db", query="SELECT * FROM users LIMIT 5") ``` ## ⚠️ SELECT Query Limits (Context Protection) **By default, SELECT without LIMIT returns only 10 rows** to protect your context. ### Getting more rows ``` # Request more rows explicitly shed_sqlite(..., query="SELECT * FROM users", limit=100) # Or use LIMIT in SQL (respected as-is) shed_sqlite(..., query="SELECT * FROM users LIMIT 50") ``` ### Export ALL results to CSV (no context pollution!) ``` # Writes all rows to file, returns only stats shed_sqlite(zone="storage", path="db.sqlite", query="SELECT * FROM users", output_csv="users_export.csv") # → {"rows_exported": 5432, "output_csv": "users_export.csv"} # Then use the CSV file shed_exec(zone="storage", cmd="head", args=["-20", "users_export.csv"]) ``` ### Disable limit (use with caution!) ``` shed_sqlite(..., query="SELECT * FROM users", limit=0) # → Returns ALL rows - be careful with large tables! ``` ## Troubleshooting ### "UnicodeDecodeError" → Try `encoding="latin-1"` or `encoding="cp1252"` ### Wrong columns / data in wrong fields → Check delimiter: `delimiter=";"` or `delimiter="\\t"` ### Numbers with commas (1.234,56) → Add `decimal=","` ### Dates not parsed correctly → Add `date_columns=["col"]` and `date_format="dayfirst"` ### Headers on line 3 (title rows above) → Add `skip_rows=2` ## NEVER do this (pollutes context, very slow): ``` # BAD: Reading file content then inserting row by row content = shed_exec(zone="storage", cmd="cat", args=["huge.csv"]) # Pollutes context! for row in content: # Slow! shed_sqlite(query="INSERT INTO...") # One call per row! ``` """, "upload": """ # HOWTO: Handle user-uploaded files ## Quick Recipe ``` # Step 1: Import the file (MANDATORY) shed_import(import_all=True) # Step 2: Move to Storage for processing shed_move_uploads_to_storage(src="filename.csv", dest="filename.csv") # Step 3: Now you can work with it shed_exec(zone="storage", cmd="head", args=["-10", "filename.csv"]) ``` ## Why is shed_import() mandatory? Files uploaded by users are NOT automatically accessible. They must be imported first. ## Workflow for multiple files ``` # Import all at once shed_import(import_all=True) # Move each file shed_move_uploads_to_storage(src="data.csv", dest="data.csv") shed_move_uploads_to_storage(src="config.json", dest="config.json") ``` ## Workflow for specific file ``` shed_import(filename="report.pdf") shed_move_uploads_to_storage(src="report.pdf", dest="reports/report.pdf") ``` ## Check what was uploaded ``` shed_exec(zone="uploads", cmd="ls", args=["-la"]) ``` ## Move to Documents (version-controlled) ``` shed_import(import_all=True) shed_move_uploads_to_documents(src="important.docx", dest="important.docx") ``` """, "share": """ # HOWTO: Share files with the user ## shed_link - Create download links Three functions to manage download links: | Function | Description | |----------|-------------| | `shed_link_create()` | Create download link | | `shed_link_list()` | List your download links | | `shed_link_delete()` | Remove a download link | ## shed_link_create - Create a link ``` shed_link_create(zone="storage", path="report.pdf") shed_link_create(zone="storage", path="archive.zip") shed_link_create(zone="documents", path="presentation.pptx") shed_link_create(zone="group", group="team", path="shared/data.csv") ``` - Works for ANY file type (PDF, ZIP, images, CSV, etc.) - Returns `clickable_link` in Markdown format → **show it directly to the user!** - Example response: `"clickable_link": "[📥 Download report.pdf](https://...)"` ## shed_link_list - List links ``` shed_link_list() ``` - Returns list of all download links you've created - Each link has a `clickable_link` field ready to display ## shed_link_delete - Remove a download link ``` shed_link_delete(file_id="abc123-...") ``` - Removes file from Open WebUI - Use file_id from shed_link_create() or shed_link_list() """, "edit": """ # HOWTO: Edit files ## 🔥 CHOOSE YOUR WORKFLOW ### WORKFLOW 1: Direct Write (simple, no locking) For quick edits when you don't need locking: ``` # Overwrite entire file shed_patch_text(zone="storage", path="file.txt", content="New content", overwrite=True) # Append to file shed_patch_text(zone="storage", path="file.txt", content="\\nNew line", position="end") # Replace pattern shed_patch_text(zone="storage", path="config.py", content="DEBUG=False", pattern="DEBUG=True", position="replace") ``` ⚠️ `overwrite` is a PARAMETER (True/False), NOT a position value! ``` ✅ CORRECT: shed_patch_text(..., overwrite=True) ❌ WRONG: shed_patch_text(..., position="overwrite") ``` ### WORKFLOW 2: Locked Edit (with locking) For concurrent access or when you need rollback: ``` # Step 1: Open (locks file) shed_lockedit_open(zone="storage", path="config.json") # Step 2: Modify (use shed_lockedit_overwrite, NOT shed_patch_text!) shed_lockedit_overwrite(zone="storage", path="config.json", content="new content") # Step 3: Save OR Cancel shed_lockedit_save(zone="storage", path="config.json") # ⚠️ CLOSES edit mode! # OR shed_lockedit_cancel(zone="storage", path="config.json") # Discard changes ``` ⚠️ **CRITICAL**: `shed_lockedit_save()` CLOSES edit mode! To edit again, you MUST call `shed_lockedit_open()` first. ``` ❌ WRONG (will fail): shed_lockedit_save(...) shed_lockedit_save(...) # ERROR: NOT_IN_EDIT_MODE ✅ CORRECT: shed_lockedit_save(...) shed_lockedit_open(...) # Reopen first! shed_lockedit_overwrite(...) shed_lockedit_save(...) ``` ## Quick Reference | Task | Command | |------|---------| | Overwrite file | `shed_patch_text(..., overwrite=True)` | | Append to file | `shed_patch_text(..., position="end")` | | Prepend to file | `shed_patch_text(..., position="start")` | | Insert before line N | `shed_patch_text(..., position="before", line=N)` | | Insert after line N | `shed_patch_text(..., position="after", line=N)` | | Replace pattern | `shed_patch_text(..., pattern="...", position="replace")` | | Replace line N | `shed_patch_text(..., position="replace", line=N)` | | Safe edit (lock) | `shed_lockedit_open` → `shed_lockedit_overwrite` → `shed_lockedit_save` | 📌 **Line numbers start at 1** (first line = line=1, NOT line=0) ⚠️ **position="at" is for shed_patch_bytes (binary), NOT for text!** ## ⚠️ Creating CSV files (RFC 4180) **Quote a field if it contains:** delimiter (`,`), newline, or double quotes. **Escape quotes by doubling them:** `""` inside quoted fields. ``` ✅ CORRECT: name,location,comment "Acme Corp","New York, NY","Has ""quotes"" inside" Simple,Boston,No quotes needed ❌ WRONG: name,location,comment Acme Corp,New York, NY,Unquoted comma ``` Unquoted special characters break CSV parsing! """, "commands": """ # HOWTO: Available commands by zone ## Check what's available ``` shed_allowed_commands() ``` ## Uploads zone (READ-ONLY) cat, head, tail, less, wc, stat, file, du, ls, find, grep, awk, sed (read), sort, uniq, cut, diff, tar (list), unzip (list), md5sum, sha256sum, jq, etc. ## Storage zone (READ-WRITE) All read-only commands PLUS: cp, mv, rm, mkdir, rmdir, touch, chmod, ln, tar (create/extract), zip, gzip, gunzip, patch, split, csplit, truncate, etc. Network commands (if enabled): curl, wget, git, rsync, scp, ssh ## Documents zone (READ-WRITE + VERSIONED) Same as Storage, with automatic Git commits. ## Group zone Same as Documents, with ownership/permission checks. ## Builtins (ALWAYS available, no system dependency) These work even if system commands are missing: | Builtin | Replaces | Use when... | |---------|----------|-------------| | `shed_tree(zone, path, depth)` | `tree` | tree command missing | | `shed_zip(zone, src, dest, include_empty_dirs=False)` | `zip` | zip command missing | | `shed_unzip(zone, src, dest)` | `unzip` | unzip command missing | | `shed_zipinfo(zone, path)` | `zipinfo` | zipinfo command missing | | `shed_file_type(zone, path)` | `file` | file command missing | | `shed_convert_eol(zone, path, to)` | `dos2unix` | dos2unix/unix2dos missing | | `shed_hexdump(zone, path, offset, length)` | `xxd`/`hexdump` | hex tools missing | | `shed_sqlite(zone, path, query, params)` | `sqlite3` | sqlite3 CLI missing | | `shed_sqlite(zone, path, import_csv, table)` | `sqlite3 .import` | Import CSV into SQLite (fast!) | ## Workarounds for missing commands ### No `jq`? Use grep/sed for simple JSON ``` shed_exec(zone="storage", cmd="grep", args=['"key":', "file.json"]) ``` ### No `tree`? Use builtin ``` shed_tree(zone="storage", path=".", depth=3) ``` ### No `sqlite3` CLI? Use builtin for queries ``` shed_sqlite(zone="storage", path="db.sqlite", query="SELECT * FROM users") ``` ### Import CSV to SQLite (RECOMMENDED - no context pollution!) ``` shed_sqlite(zone="storage", path="db.sqlite", import_csv="data.csv", table="mytable") ``` ### No `unzip`? Use builtin ``` shed_unzip(zone="storage", src="archive.zip", dest="extracted/") ``` ### No `file`? Use builtin ``` shed_file_type(zone="storage", path="unknown.bin") ``` ### Large file processing without loading into context ``` # Count lines shed_exec(zone="storage", cmd="wc", args=["-l", "huge.csv"]) # Get first/last lines shed_exec(zone="storage", cmd="head", args=["-100", "huge.csv"]) shed_exec(zone="storage", cmd="tail", args=["-100", "huge.csv"]) # Search without reading whole file shed_exec(zone="storage", cmd="grep", args=["pattern", "huge.csv"]) # Extract specific columns (CSV) shed_exec(zone="storage", cmd="cut", args=["-d,", "-f1,3", "data.csv"]) # Process with awk (no context pollution) shed_exec(zone="storage", cmd="awk", args=["-F,", "{sum+=$2} END {print sum}", "data.csv"]) ``` """, "network": """ # HOWTO: Network access ## Check if network is enabled ``` shed_parameters() # Look for: "network_mode": "disabled" | "safe" | "all" ``` ## Network modes ### disabled (default) - No network access - curl, wget, git clone all blocked ### safe (downloads only) - curl/wget GET requests allowed - git clone/fetch/pull allowed - git push BLOCKED - No data exfiltration possible ### all (full access - ⚠️ risky) - Everything allowed including uploads - git push allowed - Use with caution ## Download with curl (requires "safe" or "all") ``` shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "file.zip", "https://..."]) ``` ## Clone git repo (requires "safe" or "all") ``` shed_exec(zone="storage", cmd="git", args=["clone", "https://github.com/user/repo.git"]) ``` """, "paths": """ # HOWTO: Path rules ## Golden rule Paths are RELATIVE to the zone root. Never include the zone name! ## Correct vs Wrong ``` CORRECT: shed_exec(zone="storage", cmd="cat", args=["projects/file.txt"]) WRONG: shed_exec(zone="storage", cmd="cat", args=["Storage/projects/file.txt"]) CORRECT: shed_exec(zone="documents", cmd="ls", args=["reports"]) WRONG: shed_exec(zone="documents", cmd="ls", args=["Documents/reports"]) ``` ## Zone roots Paths are always relative to the zone root: - Uploads: per-conversation (auto-managed) - Storage: your personal workspace - Documents: your versioned documents - Group: shared group space ## Case sensitivity - **Zone parameter**: case-insensitive ("Storage" = "storage" = "STORAGE") - **Group name**: ⚠️ **CASE-SENSITIVE** ("MyTeam" ≠ "myteam" ≠ "MYTEAM") - **File paths**: depends on filesystem (usually case-sensitive on Linux) ## Creating folders ``` shed_exec(zone="storage", cmd="mkdir", args=["-p", "projects/webapp/src"]) ``` ## Listing contents ``` shed_exec(zone="storage", cmd="ls", args=["-la"]) # Root of Storage shed_exec(zone="storage", cmd="ls", args=["-la", "projects"]) # Subfolder shed_tree(zone="storage", path=".", depth=3) # Tree view ``` """, "large_files": """ # HOWTO: Process large files without context pollution ## Golden Rule NEVER read a large file into your context. Use tools that process files on disk. ## ❌ WRONG (pollutes context) ``` content = shed_exec(zone="storage", cmd="cat", args=["huge.csv"]) # 100MB in context! # Then trying to process it... ``` ## ✅ CORRECT (data stays on disk) ### Get file info without reading ``` shed_exec(zone="storage", cmd="wc", args=["-l", "huge.csv"]) # Line count shed_exec(zone="storage", cmd="du", args=["-h", "huge.csv"]) # File size shed_exec(zone="storage", cmd="head", args=["-5", "huge.csv"]) # First 5 lines (structure) ``` ### Search without loading ``` shed_exec(zone="storage", cmd="grep", args=["error", "huge.log"]) # Find lines shed_exec(zone="storage", cmd="grep", args=["-c", "error", "huge.log"]) # Count matches shed_exec(zone="storage", cmd="grep", args=["-n", "pattern", "file.txt"]) # With line numbers ``` ### Extract specific data (CSV) ``` # Get specific columns shed_exec(zone="storage", cmd="cut", args=["-d,", "-f1,3,5", "data.csv"]) # Filter rows shed_exec(zone="storage", cmd="awk", args=["-F,", "$3 > 100", "data.csv"]) # Sum a column shed_exec(zone="storage", cmd="awk", args=["-F,", "{sum+=$2} END {print sum}", "data.csv"]) # Count unique values shed_exec(zone="storage", cmd="cut", args=["-d,", "-f2", "data.csv"]) # then pipe conceptually via multiple commands or awk shed_exec(zone="storage", cmd="awk", args=["-F,", "{a[$2]++} END {for(k in a) print k,a[k]}", "data.csv"]) ``` ### Transform files on disk ``` # Sort a file (output to new file) shed_exec(zone="storage", cmd="sort", args=["input.csv", "-o", "sorted.csv"]) # Remove duplicates shed_exec(zone="storage", cmd="sort", args=["-u", "input.txt", "-o", "unique.txt"]) # Convert encoding shed_exec(zone="storage", cmd="iconv", args=["-f", "ISO-8859-1", "-t", "UTF-8", "old.txt", "-o", "new.txt"]) ``` ### Split large files ``` # Split by line count (1000 lines per file) shed_exec(zone="storage", cmd="split", args=["-l", "1000", "huge.csv", "part_"]) # Split by size (10MB per file) shed_exec(zone="storage", cmd="split", args=["-b", "10M", "huge.bin", "chunk_"]) ``` ### Process into database (best for structured data) ``` # Import CSV directly into SQLite (see howto="csv_to_sqlite") shed_exec(zone="storage", cmd="sqlite3", args=[ "data.db", "-cmd", ".mode csv", "-cmd", ".import huge.csv mytable" ]) # Then query without loading everything shed_sqlite(zone="storage", path="data.db", query="SELECT category, COUNT(*), AVG(value) FROM mytable GROUP BY category") ``` ### JSON processing ``` # Extract specific field (if jq available) shed_exec(zone="storage", cmd="jq", args=[".results[].name", "huge.json"]) # If jq missing, use grep for simple extraction shed_exec(zone="storage", cmd="grep", args=["-o", '"name": "[^"]*"', "data.json"]) ``` ### Redirect output to file (like shell > redirection) ``` # Transform JSON to CSV and save to file (instead of returning in context) shed_exec(zone="storage", cmd="jq", args=["-r", ".[] | [.name, .value] | @csv", "data.json"], stdout_file="output.csv") # Combine stderr with stdout shed_exec(zone="storage", cmd="some_cmd", args=["..."], stdout_file="all_output.log", redirect_stderr_to_stdout=True) ``` ## Summary: Tools for large files | Task | Tool | Example | |------|------|---------| | Line count | `wc -l` | `wc -l file.csv` | | File size | `du -h` | `du -h file.csv` | | Preview | `head`/`tail` | `head -20 file.csv` | | Search | `grep` | `grep pattern file` | | Columns | `cut` | `cut -d, -f1,3 file.csv` | | Aggregation | `awk` | `awk -F, '{sum+=$2} END {print sum}' file.csv` | | Sort | `sort` | `sort file -o sorted` | | Transform | `sed` | `sed 's/old/new/g' file` | | Database | `sqlite3` | `.import file.csv table` | | Save output | `stdout_file=` | `shed_exec(..., stdout_file="out.txt")` | """ } def _get_user_root(self, __user__: dict) -> Path: """Returns the user's root directory.""" user_id = __user__.get("id", "anonymous") return Path(self.valves.storage_base_path) / "users" / user_id def _get_groups_root(self) -> Path: """Returns the groups root directory.""" return Path(self.valves.storage_base_path) / "groups" def _get_db_path(self) -> Path: """Returns the SQLite database path.""" return Path(self.valves.storage_base_path) / "access_auth.sqlite" def _get_conv_id(self, __metadata__: dict) -> str: """Returns the conversation ID.""" return __metadata__.get("chat_id", "unknown") def _resolve_zone( self, zone: str, group: Optional[str], __user__: dict, __metadata__: dict, require_write: bool = False, ) -> ZoneContext: """ Resolves a zone string to a complete ZoneContext. :param zone: "uploads", "storage", "documents", or "group" :param group: Group name/ID (required if zone="group") :param __user__: Open WebUI user context :param __metadata__: Conversation metadata :param require_write: If True, rejects uploads zone :return: ZoneContext with all zone-specific info :raises StorageError: If zone invalid or access denied """ zone_lower = zone.lower() user_root = self._get_user_root(__user__) conv_id = self._get_conv_id(__metadata__) # UPLOADS: read-only, isolated per conversation if zone_lower == "uploads": if require_write: raise StorageError( "ZONE_READONLY", "Uploads zone is read-only", {"zone": zone}, "Use 'storage' or 'documents' for write operations" ) zone_path = user_root / "Uploads" / conv_id return ZoneContext( zone_root=zone_path, zone_name="Uploads", zone_lower="uploads", editzone_base=None, conv_id=conv_id, group_id=None, git_commit=False, readonly=True, whitelist=WHITELIST_READONLY, ) # STORAGE: read/write, no Git elif zone_lower == "storage": zone_path = user_root / "Storage" / "data" self._ensure_dir(zone_path) return ZoneContext( zone_root=zone_path, zone_name="Storage", zone_lower="storage", editzone_base=user_root / "Storage", conv_id=conv_id, group_id=None, git_commit=False, readonly=False, whitelist=WHITELIST_READWRITE, ) # DOCUMENTS: read/write, auto Git elif zone_lower == "documents": zone_path = user_root / "Documents" / "data" self._ensure_dir(zone_path) self._init_git_repo(zone_path) return ZoneContext( zone_root=zone_path, zone_name="Documents", zone_lower="documents", editzone_base=user_root / "Documents", conv_id=conv_id, group_id=None, git_commit=True, readonly=False, whitelist=WHITELIST_READWRITE, ) # GROUP: like Documents but with group validation elif zone_lower == "group": if not group: raise StorageError( "MISSING_PARAMETER", "Group parameter is required for zone='group'", {"zone": zone}, "Use: shed_xxx(zone='group', group='team-name', ...)" ) group_id = self._validate_group_id(group) self._check_group_access(__user__, group_id) zone_path = self._ensure_group_space(group_id) return ZoneContext( zone_root=zone_path, zone_name=f"Group:{group}", zone_lower="group", editzone_base=self._get_groups_root() / group_id, conv_id=conv_id, group_id=group_id, git_commit=True, readonly=False, whitelist=WHITELIST_READWRITE, ) else: raise StorageError( "INVALID_ZONE", f"Invalid zone: {zone}", {"zone": zone, "valid": ["uploads", "storage", "documents", "group"]}, "Use one of: uploads, storage, documents, group" ) def _resolve_chroot_path(self, base: Path, relative_path: str) -> Path: """ Resolves a relative path within a chroot and verifies it doesn't escape. Raises PATH_ESCAPE if escape attempt detected. """ # Clean the path relative_path = relative_path.lstrip("/") # Resolve target = (base / relative_path).resolve() base_resolved = base.resolve() # Verify we stay in chroot try: target.relative_to(base_resolved) except ValueError: raise StorageError( "PATH_ESCAPE", f"Chroot escape attempt detected", {"path": relative_path, "chroot": str(base)}, "Use only relative paths without ../" ) return target def _validate_relative_path(self, path: str) -> str: """ Validates that a relative path contains no traversal. Returns the cleaned and normalized path. """ # Normalize Unicode to NFC (prevents path confusion attacks) path = unicodedata.normalize("NFC", path) # Clean path = path.lstrip("/") # Block absolute paths if path.startswith("/"): raise StorageError( "PATH_ESCAPE", "Absolute paths forbidden", {"path": path}, "Use only relative paths" ) # Block .. that escapes current directory # Virtually resolve the path to check parts = [] for part in path.split("/"): if part == "..": if not parts: raise StorageError( "PATH_ESCAPE", "Directory escape attempt", {"path": path}, "Paths with .. going too high are forbidden" ) parts.pop() elif part and part != ".": parts.append(part) return "/".join(parts) if parts else "" def _validate_group_id(self, group_id: str) -> str: """ Validates and resolves a group identifier. Accepts either a group ID (UUID) or a group name (case-sensitive). Returns the validated group ID. """ if not group_id: raise StorageError( "INVALID_GROUP_ID", "Group ID cannot be empty" ) # Block path traversal characters if ".." in group_id or "/" in group_id or "\\" in group_id: raise StorageError( "INVALID_GROUP_ID", f"Invalid group ID: contains forbidden characters", {"group_id": group_id}, "Group ID cannot contain '..' or path separators" ) # Block null bytes and other control characters if any(ord(c) < 32 for c in group_id): raise StorageError( "INVALID_GROUP_ID", "Group ID contains invalid characters" ) # Check if it's a valid UUID (group ID) try: uuid.UUID(group_id) return group_id # Already a valid UUID except ValueError: pass # Not a UUID, try to resolve as group name # Try to resolve group name to ID (case-sensitive) if GROUPS_AVAILABLE: try: # Search for group by name groups = Groups.get_all_groups() case_insensitive_matches = [] for g in groups: if g.name == group_id: return g.id # Exact match found # Collect case-insensitive matches for hint if g.name.lower() == group_id.lower(): case_insensitive_matches.append(g.name) # No exact match - check if there's a case mismatch if case_insensitive_matches: raise StorageError( "GROUP_NOT_FOUND", f"Group '{group_id}' not found (name is case-sensitive)", {"requested": group_id, "similar": case_insensitive_matches}, f"Did you mean: {', '.join(case_insensitive_matches)}?" ) except StorageError: raise # Re-raise our own errors except Exception: pass # If we get here, it's not a UUID and not a known group name # Return as-is and let _check_group_access handle the error return group_id def _validate_command(self, cmd: str, whitelist: set, args: list = None) -> None: """Validates that a command is allowed.""" if cmd in BLACKLIST_COMMANDS: raise StorageError( "COMMAND_FORBIDDEN", f"Command '{cmd}' is forbidden", {"command": cmd}, "See shed_help() for allowed commands" ) # Handle curl/wget separately (controlled by valves) if cmd in CURL_COMMANDS: if self.valves.network_mode == "all": # All curl operations allowed return elif self.valves.network_mode == "safe": # Only GET operations - validate args self._validate_curl_args_get_only(args or []) return else: raise StorageError( "COMMAND_FORBIDDEN", f"Command '{cmd}' is disabled", {"command": cmd}, "Ask admin to set network_mode to 'safe' or 'all'" ) if cmd not in whitelist: raise StorageError( "COMMAND_FORBIDDEN", f"Command '{cmd}' is not in whitelist", {"command": cmd, "allowed": sorted(list(whitelist))[:20]}, "Use shed_allowed_commands() to see available commands" ) # If git, validate subcommands if cmd == "git" and args is not None: self._validate_git_command(args) # If find, block -exec options (can execute arbitrary commands) if cmd == "find" and args is not None: for arg in args: if str(arg) in FIND_EXEC_OPTIONS: raise StorageError( "ARGUMENT_FORBIDDEN", f"Option '{arg}' is forbidden for find", {"argument": str(arg)}, "find -exec can execute arbitrary commands. Use find + manual processing instead." ) # If awk, block system() and getline pipes (can execute commands) if cmd == "awk" and args is not None: for arg in args: if AWK_DANGEROUS_PATTERNS.search(str(arg)): raise StorageError( "ARGUMENT_FORBIDDEN", "awk script contains forbidden patterns (system, getline pipe)", {"argument": str(arg)[:100]}, "awk system() and getline pipes can execute commands" ) # If ln, block -s (symbolic links can point outside chroot) if cmd == "ln" and args is not None: for arg in args: arg_str = str(arg) if arg_str == "-s" or arg_str == "--symbolic" or (arg_str.startswith("-") and "s" in arg_str and not arg_str.startswith("--")): raise StorageError( "ARGUMENT_FORBIDDEN", "Symbolic links (-s) are forbidden", {"argument": arg_str}, "Symlinks can point outside the allowed zone. Use hard links (ln without -s) or cp instead." ) # If tar, block --absolute-names / -P (extracts to absolute paths) if cmd == "tar" and args is not None: for arg in args: arg_str = str(arg) if arg_str == "-P" or arg_str == "--absolute-names": raise StorageError( "ARGUMENT_FORBIDDEN", "Option --absolute-names (-P) is forbidden for tar", {"argument": arg_str}, "This option allows extracting to absolute paths outside the allowed zone." ) # Handle network-capable commands (ffmpeg, pandoc, convert, etc.) if cmd in NETWORK_CAPABLE_COMMANDS: self._validate_network_command(cmd, args or []) def _validate_curl_args_get_only(self, args: list) -> None: """Validates curl/wget arguments in GET-only mode (blocks upload/POST options).""" for arg in args: arg_str = str(arg) # Check if it's a forbidden option # Handle both "-X" and "-XPOST" formats for forbidden in CURL_FORBIDDEN_GET_OPTS: if arg_str == forbidden: raise StorageError( "ARGUMENT_FORBIDDEN", f"Option '{arg_str}' is forbidden in GET-only mode", {"argument": arg_str, "forbidden_options": sorted(CURL_FORBIDDEN_GET_OPTS)}, "This option allows data upload. Ask admin to set network_mode to 'all'" ) # Handle combined format: -XPOST, --request=POST if arg_str.startswith(forbidden + "=") or arg_str.startswith(forbidden): if forbidden in {"-X", "--request", "-d", "--data", "-F", "--form", "-T"}: raise StorageError( "ARGUMENT_FORBIDDEN", f"Option '{arg_str}' is forbidden in GET-only mode", {"argument": arg_str}, "This option allows data upload. Ask admin to set network_mode to 'all'" ) def _validate_network_command(self, cmd: str, args: list) -> bool: """ Validates network-capable commands (ffmpeg, pandoc, convert, etc.). Returns True if URLs should be allowed in subsequent _validate_args call. Raises StorageError if command is not allowed with current valve settings. """ # Commands that can SEND data (exfiltration risk) if cmd in NETWORK_OUTPUT_COMMANDS: # ffmpeg if self.valves.network_mode == "all": # All operations allowed, URLs ok return True elif self.valves.network_mode == "safe": # Block output protocols that can exfiltrate data for arg in args: arg_str = str(arg).lower() for protocol in FFMPEG_OUTPUT_PROTOCOLS: if protocol in arg_str: raise StorageError( "ARGUMENT_FORBIDDEN", f"Output protocol '{protocol}' is forbidden in 'safe' mode", {"argument": str(arg), "protocol": protocol}, "This protocol can send data to remote servers. Ask admin to set network_mode to 'all'" ) # Input URLs are ok return True else: # Network disabled - block ffmpeg entirely raise StorageError( "COMMAND_FORBIDDEN", f"Command '{cmd}' requires network access", {"command": cmd}, "Ask admin to set network_mode to 'safe' or 'all'" ) # Commands that can only RECEIVE data (read-only risk) if cmd in NETWORK_INPUT_COMMANDS: # ffprobe, pandoc, convert, etc. if self.valves.network_mode in ("safe", "all"): # Network access allowed, URLs ok return True else: # Network disabled - block URLs in arguments # (command itself is ok for local files) for arg in args: arg_str = str(arg) if URL_PATTERN.match(arg_str): raise StorageError( "ARGUMENT_FORBIDDEN", f"URLs are forbidden when network is disabled", {"argument": arg_str, "command": cmd}, "Ask admin to set network_mode to 'safe' or 'all' for URL access" ) return False return False def _validate_args(self, args: list, readonly: bool = False, cmd: str = "") -> None: """Validates arguments to detect injections and network access.""" # Check if URL check should be skipped for network-capable commands skip_url_check = False # Network-capable commands (ffmpeg, pandoc, convert, etc.) if cmd in NETWORK_CAPABLE_COMMANDS: skip_url_check = self.valves.network_mode in ("safe", "all") # curl/wget are handled separately but also need URL allowance if cmd in CURL_COMMANDS: skip_url_check = self.valves.network_mode in ("safe", "all") # git clone/fetch/pull need URLs in safe/all mode if cmd == "git": skip_url_check = self.valves.network_mode in ("safe", "all") for arg in args: arg_str = str(arg) # Check if this argument is a URL (for network-capable commands with network enabled) is_allowed_url = skip_url_check and URL_PATTERN.match(arg_str) # Choose pattern based on command (some commands use | in their internal syntax) dangerous_pattern = ( DANGEROUS_ARGS_PATTERN_ALLOW_PIPE if cmd in COMMANDS_ALLOWING_PIPE else DANGEROUS_ARGS_PATTERN ) # Check dangerous patterns # For allowed URLs, we skip this check because & is common in query strings if not is_allowed_url and dangerous_pattern.search(arg_str): raise StorageError( "ARGUMENT_FORBIDDEN", f"Dangerous argument detected", {"argument": arg_str}, "Characters ; | & && || > >> < << $( ${ ` are forbidden" ) # Block URLs (network access via ffmpeg, pandoc, imagemagick, etc.) # Skip if command is network-capable and network valves are enabled if not skip_url_check and URL_PATTERN.match(arg_str): raise StorageError( "ARGUMENT_FORBIDDEN", f"URLs are forbidden in arguments", {"argument": arg_str}, "Network access via URLs is blocked. Ask admin to set network_mode to 'safe' or 'all'." ) # In readonly mode, forbid -i/--in-place for sed (in-place editing) if readonly and (arg_str == "-i" or arg_str.startswith("-i") or arg_str == "--in-place" or arg_str.startswith("--in-place")): raise StorageError( "ARGUMENT_FORBIDDEN", "Option -i/--in-place is forbidden in read-only mode", {"argument": arg_str}, "This zone is read-only" ) def _validate_path_args(self, args: list, chroot: Path, cmd: str = "") -> list: """ Validates that arguments don't allow escaping the chroot. Blocks: absolute paths and .. that escape chroot. For sed/grep/awk, expressions starting with / are NOT treated as paths, but only if they look like expressions (contain space, :, or end with /). """ chroot_resolved = chroot.resolve() # Commands that use /pattern/ expressions expression_commands = {"sed", "grep", "egrep", "fgrep", "awk", "perl"} for arg in args: arg_str = str(arg) # Skip flags (like -i, -e, -n, etc.) if arg_str.startswith("-"): continue # For expression-based commands, detect expressions vs paths if cmd in expression_commands and arg_str.startswith("/"): # Clear expression indicators: # - Contains space: "/Team: Eng/a new line" # - Contains colon: "/Team: Eng/" # - Ends with /: "/pattern/" is_expression = False if " " in arg_str: is_expression = True elif ":" in arg_str: is_expression = True elif arg_str.endswith("/"): is_expression = True elif len(arg_str) > 2: # Check for /pattern/X format where X is a single sed command # Valid: /foo/d, /bar/p, /baz/a text # Invalid: /etc/passwd (passwd is not a single letter) second_slash = arg_str.find("/", 1) if second_slash > 0 and second_slash < len(arg_str) - 1: after_slash = arg_str[second_slash + 1:] # Must be a single sed command letter, alone or followed by space/text if len(after_slash) == 1 and after_slash in "acdipqswy": is_expression = True elif len(after_slash) > 1 and after_slash[0] in "acdipqswy" and after_slash[1] in " \t/": is_expression = True if is_expression: continue # Block absolute paths if arg_str.startswith("/"): raise StorageError( "PATH_ESCAPE", "Absolute paths forbidden", {"path": arg_str}, "Use only relative paths" ) # Verify .. doesn't escape chroot if ".." in arg_str: try: target = (chroot / arg_str).resolve() target.relative_to(chroot_resolved) except ValueError: raise StorageError( "PATH_ESCAPE", "Chroot escape attempt detected", {"path": arg_str, "chroot": str(chroot)}, "Resolved path escapes allowed zone" ) return list(args) def _validate_git_command(self, args: list) -> None: """Validates a Git subcommand based on whitelists and network valves.""" if not args: raise StorageError( "ARGUMENT_FORBIDDEN", "Git command without subcommand", {}, "Example: git status, git log" ) subcmd = args[0] # Always forbidden (dangerous local operations) if subcmd in GIT_BLACKLIST: raise StorageError( "COMMAND_FORBIDDEN", f"Command 'git {subcmd}' is forbidden", {"subcommand": subcmd}, "gc, prune, filter-branch are always forbidden" ) # Network push (exfiltration) - requires network_mode="all" if subcmd in GIT_NETWORK_PUSH: if self.valves.network_mode != "all": raise StorageError( "COMMAND_FORBIDDEN", f"Command 'git {subcmd}' is disabled (network upload)", {"subcommand": subcmd}, "Ask admin to set network_mode to 'all'" ) return # Allowed # Network download - requires network_mode="safe" or "all" if subcmd in GIT_NETWORK_GET: if not (self.valves.network_mode in ("safe", "all")): raise StorageError( "COMMAND_FORBIDDEN", f"Command 'git {subcmd}' is disabled (network access)", {"subcommand": subcmd}, "Ask admin to set network_mode to 'safe' or 'all'" ) return # Allowed # Standard whitelist check for local operations if subcmd not in GIT_WHITELIST_READ and subcmd not in GIT_WHITELIST_WRITE: raise StorageError( "COMMAND_FORBIDDEN", f"Git subcommand '{subcmd}' is not allowed", {"subcommand": subcmd, "allowed_read": sorted(GIT_WHITELIST_READ), "allowed_write": sorted(GIT_WHITELIST_WRITE)} ) def _exec_command( self, cmd: str, args: list, cwd: Path, timeout: int, max_output: int = None, stdout_file: Path = None, stderr_file: Path = None, redirect_stderr_to_stdout: bool = False, ) -> dict: """Executes a command and returns the result. Args: cmd: Command to execute args: Command arguments cwd: Working directory timeout: Timeout in seconds max_output: Max output size in bytes (None=default, 0=unlimited up to absolute max) stdout_file: Path to redirect stdout to (None=capture in memory) stderr_file: Path to redirect stderr to (None=capture in memory) redirect_stderr_to_stdout: If True, redirect stderr to stdout (2>&1) """ # Handle tar extraction: add --no-same-owner to prevent ownership errors # This avoids "Cannot change ownership" errors that cause tar to return code 2 # even though files are extracted successfully if cmd == "tar": args_str = " ".join(str(a) for a in args) is_extraction = any(x in args_str for x in ["-x", "--extract"]) # Also check combined flags like -xJf, -xzf, etc. if not is_extraction: for arg in args: arg_s = str(arg) if arg_s.startswith("-") and not arg_s.startswith("--") and "x" in arg_s: is_extraction = True break if is_extraction and "--no-same-owner" not in args_str: args = ["--no-same-owner"] + list(args) # Handle curl: require -o/--output to prevent stdout pollution # Also add -sS to suppress progress but show errors if cmd == "curl": args_str = " ".join(str(a) for a in args) # Check for output redirection (allow if stdout_file is specified) has_output = any(x in args_str for x in ["-o", "--output", "-O", "--remote-name"]) or stdout_file if not has_output: raise StorageError( "ARGUMENT_REQUIRED", "curl requires -o to save to file", {"command": "curl", "args": [str(a) for a in args]}, 'Use: curl -L -o "filename" "url" to save directly to file. This prevents context pollution.' ) if "-s" not in args_str and "--silent" not in args_str: args = ["-sS"] + list(args) # -s=silent, -S=show-error # Handle wget: require -O/--output-document to prevent stdout pollution # Also add -q to suppress progress if cmd == "wget": args_str = " ".join(str(a) for a in args) # Check for output redirection (allow if stdout_file is specified) has_output = any(x in args_str for x in ["-O", "--output-document"]) or stdout_file if not has_output: raise StorageError( "ARGUMENT_REQUIRED", "wget requires -O to save to file", {"command": "wget", "args": [str(a) for a in args]}, 'Use: wget -O "filename" "url" to save directly to file. This prevents context pollution.' ) if "-q" not in args_str and "--quiet" not in args_str: args = ["-q"] + list(args) # Build command full_cmd = [cmd] + [str(a) for a in args] # Prepare file handles for redirection stdout_handle = None stderr_handle = None files_to_close = [] try: # Setup stdout redirection if stdout_file: stdout_file.parent.mkdir(parents=True, exist_ok=True) stdout_handle = open(stdout_file, 'w', encoding='utf-8') files_to_close.append(stdout_handle) else: stdout_handle = subprocess.PIPE # Setup stderr redirection if redirect_stderr_to_stdout: stderr_handle = subprocess.STDOUT elif stderr_file: stderr_file.parent.mkdir(parents=True, exist_ok=True) stderr_handle = open(stderr_file, 'w', encoding='utf-8') files_to_close.append(stderr_handle) else: stderr_handle = subprocess.PIPE result = subprocess.run( full_cmd, cwd=str(cwd), stdout=stdout_handle, stderr=stderr_handle, text=True, timeout=timeout, ) # Close files before reading them for f in files_to_close: f.close() files_to_close = [] # Get stdout content if stdout_file: stdout = f"[Output written to {stdout_file.name}]" stdout_truncated = False else: # Truncate stdout if too long (prevents context pollution) if max_output is None: effective_max = self.valves.max_output_default elif max_output == 0: effective_max = self.valves.max_output_absolute else: effective_max = min(max_output, self.valves.max_output_absolute) stdout = result.stdout or "" stdout_truncated = False if len(stdout) > effective_max: stdout = stdout[:effective_max] + f"\n\n... [TRUNCATED - {len(result.stdout)} bytes total, showing first {effective_max}] ..." stdout_truncated = True # Get stderr content if stderr_file: stderr = f"[Errors written to {stderr_file.name}]" stderr_truncated = False elif redirect_stderr_to_stdout: stderr = "" stderr_truncated = False else: if max_output is None: effective_max = self.valves.max_output_default elif max_output == 0: effective_max = self.valves.max_output_absolute else: effective_max = min(max_output, self.valves.max_output_absolute) stderr = result.stderr or "" stderr_truncated = False if len(stderr) > effective_max: stderr = stderr[:effective_max] + f"\n\n... [TRUNCATED - {len(result.stderr)} bytes total, showing first {effective_max}] ..." stderr_truncated = True response = { "success": result.returncode == 0, "cmd": cmd, "args": [str(a) for a in args], "stdout": stdout, "stderr": stderr, "returncode": result.returncode, } # Add file info if redirected if stdout_file: response["stdout_file"] = stdout_file.name if stderr_file: response["stderr_file"] = stderr_file.name if (not stdout_file and stdout_truncated) or (not stderr_file and not redirect_stderr_to_stdout and stderr_truncated): response["truncated"] = True response["hint"] = "Output was truncated. Use stdout_file= to save full output to a file." return response except subprocess.TimeoutExpired: raise StorageError( "TIMEOUT", f"Command timeout after {timeout}s", {"command": cmd, "timeout": timeout}, f"Increase timeout (max: {self.valves.exec_timeout_max}s)" ) except FileNotFoundError: raise StorageError( "COMMAND_NOT_FOUND", f"Command '{cmd}' not found on system", {"command": cmd}, "Use shed_allowed_commands() to see available commands" ) except Exception as e: raise StorageError( "EXEC_ERROR", f"Execution error: {str(e)}", {"command": cmd, "error": str(e)} ) finally: # Ensure files are closed on error for f in files_to_close: try: f.close() except: pass def _ensure_dir(self, path: Path) -> None: """Creates a directory and its parents if needed.""" path.mkdir(parents=True, exist_ok=True) def _rm_with_empty_parents(self, filepath: Path, stop_at: Path) -> None: """Deletes a file then walks up deleting empty folders.""" if filepath.exists(): if filepath.is_dir(): shutil.rmtree(filepath) else: filepath.unlink() # Walk up and delete empty folders parent = filepath.parent stop_at_resolved = stop_at.resolve() while parent.resolve() != stop_at_resolved: try: parent.rmdir() # Fails if not empty parent = parent.parent except OSError: break def _get_lock_path(self, zone_root: Path, relative_path: str) -> Path: """Returns the lock file path.""" return zone_root / "locks" / (relative_path + ".lock") def _get_editzone_path(self, zone_root: Path, conv_id: str, relative_path: str) -> Path: """Returns the path in editzone.""" return zone_root / "editzone" / conv_id / relative_path def _acquire_lock(self, lock_path: Path, conv_id: str, user_id: str, path: str) -> None: """ Atomically acquires a lock file. Uses exclusive file creation to prevent race conditions (TOCTOU). Raises FILE_LOCKED if already locked by another conversation. """ self._ensure_dir(lock_path.parent) lock_data = { "conv_id": conv_id, "user_id": user_id, "locked_at": datetime.now(timezone.utc).isoformat(), "path": path, } # Check if lock exists and is from another conversation if lock_path.exists(): try: existing_lock = json.loads(lock_path.read_text()) if existing_lock.get("conv_id") != conv_id: raise StorageError( "FILE_LOCKED", f"File locked by another conversation", { "locked_by": existing_lock.get("user_id"), "locked_at": existing_lock.get("locked_at"), "conv_id": existing_lock.get("conv_id"), "path": existing_lock.get("path"), }, "Wait or use shed_force_unlock() / shed_maintenance()" ) # Same conversation - can proceed (re-lock) lock_path.write_text(json.dumps(lock_data, indent=2)) return except json.JSONDecodeError: # Corrupted lock - overwrite it pass # Try atomic creation with exclusive mode try: # os.open with O_CREAT | O_EXCL is atomic fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644) try: os.write(fd, json.dumps(lock_data, indent=2).encode('utf-8')) finally: os.close(fd) except FileExistsError: # Race condition: another process created the lock between our check and create # Re-read and check try: existing_lock = json.loads(lock_path.read_text()) if existing_lock.get("conv_id") != conv_id: raise StorageError( "FILE_LOCKED", f"File locked by another conversation", { "locked_by": existing_lock.get("user_id"), "locked_at": existing_lock.get("locked_at"), "conv_id": existing_lock.get("conv_id"), }, "Wait or use shed_force_unlock() / shed_maintenance()" ) except (json.JSONDecodeError, FileNotFoundError): # Lock was corrupted or removed - try again lock_path.write_text(json.dumps(lock_data, indent=2)) def _check_lock_owner(self, lock_path: Path, user_id: str) -> None: """ Verifies that the current user owns the lock. Raises NOT_LOCK_OWNER if not. """ if lock_path.exists(): try: lock_data = json.loads(lock_path.read_text()) if lock_data.get("user_id") != user_id: raise StorageError( "NOT_LOCK_OWNER", "You don't own this lock", {"locked_by": lock_data.get("user_id"), "your_id": user_id}, "Only the user who opened the file can save/cancel" ) except json.JSONDecodeError: pass # Corrupted lock, allow operation def _validate_content_size(self, content: str) -> None: """Checks that content doesn't exceed max size.""" max_bytes = self.valves.max_file_size_mb * 1024 * 1024 if len(content.encode('utf-8')) > max_bytes: raise StorageError( "FILE_TOO_LARGE", f"Content too large ({len(content.encode('utf-8')) / 1024 / 1024:.2f} MB)", {"max_mb": self.valves.max_file_size_mb}, f"Max size is {self.valves.max_file_size_mb} MB" ) def _get_user_usage(self, user_root: Path) -> int: """Calculate total size of user's personal space in bytes.""" total = 0 for zone in ["Uploads", "Storage/data", "Documents/data"]: zone_path = user_root / zone if zone_path.exists(): for f in zone_path.rglob("*"): if f.is_file(): try: total += f.stat().st_size except (OSError, FileNotFoundError): pass return total def _get_path_size(self, path: Path) -> int: """Calculate size of a file or directory in bytes.""" if not path.exists(): return 0 if path.is_file(): return path.stat().st_size # Directory: sum all files recursively total = 0 for f in path.rglob("*"): if f.is_file(): try: total += f.stat().st_size except (OSError, FileNotFoundError): pass return total def _check_quota(self, __user__: dict, additional_bytes: int) -> None: """ Checks if adding additional_bytes would exceed user quota. Raises QUOTA_EXCEEDED if quota would be exceeded. """ user_root = self._get_user_root(__user__) current_usage = self._get_user_usage(user_root) quota_bytes = self.valves.quota_per_user_mb * 1024 * 1024 if current_usage + additional_bytes > quota_bytes: raise StorageError( "QUOTA_EXCEEDED", f"Quota exceeded: {current_usage / 1024 / 1024:.1f} MB used + {additional_bytes / 1024 / 1024:.1f} MB requested > {self.valves.quota_per_user_mb} MB quota", { "current_usage_mb": round(current_usage / 1024 / 1024, 2), "requested_mb": round(additional_bytes / 1024 / 1024, 2), "quota_mb": self.valves.quota_per_user_mb, }, "Free up space or contact administrator to increase quota" ) def _check_group_quota(self, group_id: str, additional_bytes: int) -> None: """ Checks if adding additional_bytes would exceed group quota. Raises QUOTA_EXCEEDED if quota would be exceeded. """ group_data_path = self._get_groups_root() / group_id / "data" current_usage = self._get_path_size(group_data_path) quota_bytes = self.valves.quota_per_group_mb * 1024 * 1024 if current_usage + additional_bytes > quota_bytes: raise StorageError( "QUOTA_EXCEEDED", f"Group quota exceeded: {current_usage / 1024 / 1024:.1f} MB used + {additional_bytes / 1024 / 1024:.1f} MB requested > {self.valves.quota_per_group_mb} MB quota", { "group_id": group_id, "current_usage_mb": round(current_usage / 1024 / 1024, 2), "requested_mb": round(additional_bytes / 1024 / 1024, 2), "quota_mb": self.valves.quota_per_group_mb, }, "Free up space in group or contact administrator to increase quota" ) def _git_run(self, args: list, cwd: Path, timeout: int = 30) -> subprocess.CompletedProcess: """ Layer 2: Executes a git command. All git operations MUST use this method for consistency. Includes timeout and error handling. """ try: result = subprocess.run( ["git"] + args, cwd=str(cwd), capture_output=True, text=True, timeout=timeout, ) return result except subprocess.TimeoutExpired: raise StorageError( "TIMEOUT", f"Git command timed out after {timeout}s", {"command": ["git"] + args}, "Try a simpler operation or increase timeout" ) def _check_command_available(self, cmd: str) -> bool: """ Layer 2: Checks if a command is available on the system. Used for introspection (shed_allowed_commands). """ try: result = subprocess.run( ["which", cmd], capture_output=True, timeout=5, ) return result.returncode == 0 except Exception: return False def _init_git_repo(self, repo_path: Path) -> None: """Initializes a Git repository if needed.""" git_dir = repo_path / ".git" if not git_dir.exists(): self._ensure_dir(repo_path) self._git_run(["init"], repo_path) self._git_run(["config", "user.email", "storage@openwebui.local"], repo_path) self._git_run(["config", "user.name", "Fileshed"], repo_path) def _git_commit(self, repo_path: Path, message: str) -> None: """Performs a Git commit.""" self._git_run(["add", "-A"], repo_path) self._git_run(["commit", "-m", message, "--allow-empty-message"], repo_path) def _git_commit_as_user(self, repo_path: Path, message: str, user_id: str) -> None: """Performs a Git commit with user as author.""" self._git_run(["add", "-A"], repo_path) author = f"{user_id} <{user_id}@fileshed>" self._git_run(["commit", "--author", author, "-m", message, "--allow-empty-message"], repo_path) # ========================================================================= # GROUP HELPERS # ========================================================================= def _init_db(self) -> None: """Initialize SQLite database if needed.""" if self._db_initialized: return db_path = self._get_db_path() db_path.parent.mkdir(parents=True, exist_ok=True) conn = sqlite3.connect(str(db_path), timeout=10.0) try: conn.execute(""" CREATE TABLE IF NOT EXISTS file_ownership ( id INTEGER PRIMARY KEY AUTOINCREMENT, group_id TEXT NOT NULL, file_path TEXT NOT NULL, owner_id TEXT NOT NULL, write_access TEXT NOT NULL CHECK(write_access IN ('owner', 'group', 'owner_ro')), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(group_id, file_path) ) """) conn.execute("CREATE INDEX IF NOT EXISTS idx_ownership_group ON file_ownership(group_id)") conn.execute("CREATE INDEX IF NOT EXISTS idx_ownership_owner ON file_ownership(owner_id)") conn.execute("CREATE INDEX IF NOT EXISTS idx_ownership_path ON file_ownership(group_id, file_path)") conn.commit() finally: conn.close() self._db_initialized = True def _db_execute(self, query: str, params: tuple = ()) -> tuple: """ Execute a database query. Returns (rows, rowcount) tuple: - rows: list of Row objects for SELECT, empty list for others - rowcount: number of affected rows for INSERT/UPDATE/DELETE """ self._init_db() conn = sqlite3.connect(str(self._get_db_path()), timeout=10.0, isolation_level="IMMEDIATE") conn.row_factory = sqlite3.Row try: cursor = conn.execute(query, params) result = cursor.fetchall() rowcount = cursor.rowcount conn.commit() return result, rowcount finally: conn.close() def _get_user_groups(self, user_id: str) -> list: """Get groups the user belongs to via Open WebUI API.""" if not GROUPS_AVAILABLE: return [] try: return Groups.get_groups_by_member_id(user_id) except Exception: return [] def _is_group_member(self, user_id: str, group_id: str) -> bool: """Check if user is member of group.""" user_groups = self._get_user_groups(user_id) return any(g.id == group_id for g in user_groups) def _check_group_access(self, __user__: dict, group_id: str) -> None: """Verify user has access to group. Raises error if not.""" if not GROUPS_AVAILABLE: raise StorageError( "GROUP_NOT_AVAILABLE", "Group features are not available", hint="Open WebUI Groups API not found" ) user_id = __user__.get("id", "") if not self._is_group_member(user_id, group_id): raise StorageError( "GROUP_ACCESS_DENIED", f"You are not a member of group '{group_id}'", {"group_id": group_id}, "Request access from a group administrator" ) def _ensure_group_space(self, group_id: str) -> Path: """Ensure group space exists. Returns data path.""" group_path = self._get_groups_root() / group_id data_path = group_path / "data" if not data_path.exists(): data_path.mkdir(parents=True, exist_ok=True) (group_path / "editzone").mkdir(exist_ok=True) (group_path / "locks").mkdir(exist_ok=True) # Initialize Git repository via Layer 2 self._git_run(["init"], data_path) self._git_run(["config", "user.email", "storage@openwebui"], data_path) self._git_run(["config", "user.name", "Fileshed"], data_path) return data_path def _get_group_data_path(self, group_id: str) -> Path: """Get data path for a group.""" return self._get_groups_root() / group_id / "data" def _get_group_editzone(self, group_id: str, conv_id: str) -> Path: """Get editzone path for a group.""" return self._get_groups_root() / group_id / "editzone" / conv_id def _get_group_locks_dir(self, group_id: str) -> Path: """Get locks directory for a group.""" return self._get_groups_root() / group_id / "locks" def _get_file_ownership(self, group_id: str, file_path: str) -> Optional[dict]: """Get ownership info for a file in group space.""" rows, _ = self._db_execute( "SELECT owner_id, write_access FROM file_ownership WHERE group_id = ? AND file_path = ?", (group_id, file_path) ) if rows: return {"owner_id": rows[0]["owner_id"], "write_access": rows[0]["write_access"]} return None def _set_file_ownership(self, group_id: str, file_path: str, owner_id: str, write_access: str) -> None: """Set or update ownership for a file.""" self._db_execute(""" INSERT INTO file_ownership (group_id, file_path, owner_id, write_access) VALUES (?, ?, ?, ?) ON CONFLICT(group_id, file_path) DO UPDATE SET owner_id = excluded.owner_id, write_access = excluded.write_access, updated_at = CURRENT_TIMESTAMP """, (group_id, file_path, owner_id, write_access)) def _delete_file_ownership(self, group_id: str, file_path: str) -> None: """Delete ownership record for a file.""" self._db_execute( "DELETE FROM file_ownership WHERE group_id = ? AND file_path = ?", (group_id, file_path) ) def _delete_file_ownership_recursive(self, group_id: str, dir_path: str) -> int: """ Delete ownership records for a directory and all its contents. Returns the number of records deleted. """ # Delete exact match and all paths starting with dir_path/ if dir_path: _, rowcount = self._db_execute( "DELETE FROM file_ownership WHERE group_id = ? AND (file_path = ? OR file_path LIKE ?)", (group_id, dir_path, dir_path + "/%") ) else: # Empty path means root - delete all for this group _, rowcount = self._db_execute( "DELETE FROM file_ownership WHERE group_id = ?", (group_id,) ) return rowcount if rowcount > 0 else 0 def _update_file_ownership_paths(self, group_id: str, old_path: str, new_path: str) -> None: """ Update ownership records when a directory is renamed. Updates the path itself and all paths under it. """ # Update exact match self._db_execute( "UPDATE file_ownership SET file_path = ?, updated_at = CURRENT_TIMESTAMP WHERE group_id = ? AND file_path = ?", (new_path, group_id, old_path) ) # Update all paths under old_path/ # SQLite: replace the prefix old_path/ with new_path/ old_prefix = old_path + "/" new_prefix = new_path + "/" self._db_execute(""" UPDATE file_ownership SET file_path = ? || SUBSTR(file_path, ?), updated_at = CURRENT_TIMESTAMP WHERE group_id = ? AND file_path LIKE ? """, (new_prefix, len(old_prefix) + 1, group_id, old_prefix + "%")) def _can_write_group_file(self, group_id: str, file_path: str, user_id: str) -> tuple: """ Check if user can modify file in group space. Returns (can_write: bool, error_code: str|None) """ ownership = self._get_file_ownership(group_id, file_path) if ownership is None: return True, None # New file, anyone can create mode = ownership["write_access"] owner = ownership["owner_id"] if mode == "group": return True, None elif mode == "owner": if user_id == owner: return True, None return False, "FILE_OWNER_ONLY" elif mode == "owner_ro": return False, "FILE_READ_ONLY" return False, "PERMISSION_DENIED" def _can_delete_group_file(self, group_id: str, file_path: str, user_id: str) -> tuple: """Check if user can delete file in group space.""" ownership = self._get_file_ownership(group_id, file_path) if ownership is None: return True, None # Untracked file mode = ownership["write_access"] owner = ownership["owner_id"] if mode == "group": return True, None # Everyone can delete elif mode == "owner": if user_id == owner: return True, None # Owner can delete return False, "FILE_OWNER_ONLY" elif mode == "owner_ro": return False, "FILE_READ_ONLY" # Nobody can delete, change mode first return False, "PERMISSION_DENIED" def _format_response(self, success: bool, data: Any = None, message: str = None) -> str: """Formats a JSON response.""" response = {"success": success} if data is not None: response["data"] = data if message: response["message"] = message return json.dumps(response, indent=2, ensure_ascii=False) def _format_error(self, e: StorageError, func_name: str = None) -> str: """Formats a StorageError with contextual help.""" function_help = self._get_function_help(func_name) if func_name else None return json.dumps(e.to_dict(function_help), indent=2, ensure_ascii=False) def _clamp_timeout(self, timeout: int = None) -> int: """Clamps timeout to configured values. Uses exec_timeout_default if not specified.""" if timeout is None: timeout = self.valves.exec_timeout_default return max(1, min(timeout, self.valves.exec_timeout_max)) # ========================================================================= # UPLOADS (2 functions) # ========================================================================= def _get_howto_description(self, howto: str) -> str: """Get short description for a howto topic.""" descriptions = { "download": "Download files with curl (to work with them locally)", "csv_to_sqlite": "Import CSV/TSV data into SQLite (fast, no context pollution)", "upload": "Handle user-uploaded files", "share": "Create download links for files", "edit": "Edit existing files safely", "commands": "Available commands + workarounds when commands are missing", "network": "Network access configuration", "paths": "Path rules and examples", "large_files": "Process large files WITHOUT loading into context", "full": "Complete documentation (long)", } return descriptions.get(howto, "") def _get_full_help(self) -> str: """Return the complete documentation.""" return """ # Fileshed - Documentation ## ⚠️ IMPORTANT: Only call shed_* functions! ``` ✅ CORRECT: shed_exec(zone="storage", cmd="ls", args=["-la"]) ✅ CORRECT: shed_exec(zone="storage", cmd="mkdir", args=["-p", "dir"]) ✅ CORRECT: shed_patch_text(zone="storage", path="notes.txt", content="Hello") ✅ CORRECT: shed_sqlite(zone="storage", path="db.sqlite", query="SELECT * FROM t") ❌ WRONG: _exec_command(...) ← INTERNAL, will NOT work! ❌ WRONG: _validate_path(...) ← INTERNAL, will NOT work! ``` All methods starting with `_` are INTERNAL implementation details. They are NOT callable. Only `shed_*` functions are available to you. ## IMPORTANT: FILE UPLOAD WORKFLOW When a file is uploaded, you MUST follow these steps: STEP 1: shed_import(import_all=True) <- Import ALL attached files OR shed_import(filename="report.pdf") <- Import ONE specific file STEP 2: shed_move_uploads_to_storage(src="filename", dest="filename") NEVER skip step 1! ## PATH RULES - VERY IMPORTANT! Each function works INSIDE its zone. Paths are relative to the zone root. DO NOT include the zone name in paths! CORRECT: shed_exec(zone="storage", cmd="cat", args=["projects/file.txt"]) WRONG: shed_exec(zone="storage", cmd="cat", args=["Storage/projects/file.txt"]) ## DOWNLOAD LINKS Three functions to create download links: | Function | Description | |----------|-------------| | shed_link_create() | Create download link | | shed_link_list() | List your download links | | shed_link_delete() | Remove a download link | ### shed_link_create - Create a link shed_link_create(zone="storage", path="report.pdf") shed_link_create(zone="storage", path="data.zip") Returns: {"download_url": "/api/v1/files/{id}/content", "file_id": "..."} The user can click the download_url to download the file. ### shed_link_list - List links shed_link_list() Returns list of all download links you've created with file_id, filename, download_url. ### shed_link_delete - Remove a download link shed_link_delete(file_id="abc123-...") Removes file from Open WebUI. Use file_id from create or list. ## HOW TO DO COMMON OPERATIONS ⚠️ **SHELL COMMANDS FIRST!** Use shed_exec() for ALL shell-doable operations. Only use shed_patch_text() to CREATE or MODIFY file CONTENT. | Operation | Method | |--------------------|------------------------------------------------------------| | **Create folder** | shed_exec(zone="storage", cmd="mkdir", args=["-p", "dir"])| | Read file | shed_exec(zone="storage", cmd="cat", args=["file.txt"]) | | Copy file | shed_exec(zone="storage", cmd="cp", args=["src", "dest"]) | | Move/rename | shed_exec(zone="storage", cmd="mv", args=["old", "new"]) | | Delete file | shed_exec(zone="storage", cmd="rm", args=["file.txt"]) | | Delete folder | shed_exec(zone="storage", cmd="rm", args=["-rf", "dir"]) | | List files | shed_exec(zone="storage", cmd="ls", args=["-la"]) | | Search in file | shed_exec(zone="storage", cmd="grep", args=["pat", "f"]) | | Count lines | shed_exec(zone="storage", cmd="wc", args=["-l", "file"]) | | Git operations | shed_exec(zone="documents", cmd="git", args=["log"]) | CONTENT OPERATIONS (use shed_patch_text only for these): | Operation | Method | |------------------------|---------------------------------------------------------| | Create new file | shed_patch_text(zone, path, content, overwrite=True) | | Append to file | shed_patch_text(zone, path, content, position="end") | | Replace pattern | shed_patch_text(zone, path, content, pattern="...", position="replace") | | Edit specific line | shed_patch_text(zone, path, content, line=5, position="replace") | 📌 Line numbers start at 1 (first line = line=1) ❌ WRONG: Using patch_text to create directories (via .keep files) ✓ CORRECT: Using shed_exec(cmd="mkdir", args=["-p", "dir"]) ## ZONES All operations use a zone parameter: "uploads", "storage", "documents", or "group" - **Uploads** : Temporary import area (read + delete only) - **Storage** : Permanent user space (all operations, no Git) - **Documents** : Permanent + Git versioned (auto-commit) - **Groups** : Shared collaborative spaces (requires group= parameter) ## UNIFIED FUNCTIONS ### Core Operations (all zones) - shed_exec(zone, cmd, args, group?) : Execute shell commands - shed_patch_text(zone, path, content, ..., group?, message?) : Edit text files - shed_patch_bytes(zone, path, content, ..., group?, message?) : Edit binary files - shed_delete(zone, path, group?, message?) : Delete files/folders - shed_rename(zone, old_path, new_path, group?, message?) : Rename/move files ### Locked Editing (with locking) - shed_lockedit_open(zone, path, group?) : Lock + get content - shed_lockedit_exec(zone, path, cmd, args, group?): Execute on working copy - shed_lockedit_overwrite(zone, path, content, group?) : Write to working copy - shed_lockedit_save(zone, path, group?, message?) : Save + unlock + commit - shed_lockedit_cancel(zone, path, group?) : Discard + unlock ### Import (from chat) - shed_import(import_all=True) : Import all attached files - shed_import(filename="file.txt") : Import one specific file ### Bridges (between zones) - shed_move_uploads_to_storage(src, dest) - shed_move_uploads_to_documents(src, dest, message) - shed_copy_storage_to_documents(src, dest, message) - shed_move_documents_to_storage(src, dest, message) - shed_copy_to_group(src_zone, src_path, group, dest_path, message, mode) ## GROUP-SPECIFIC FUNCTIONS ### Discovery - shed_group_list() : List groups you belong to - shed_group_info(group) : Show files, ownership, stats ### Ownership Management - shed_group_set_mode(group, path, mode) : Change write mode - shed_group_chown(group, path, new_owner): Transfer ownership ## WRITE MODES (Groups) - **group** : Anyone in group can modify (default) - **owner** : Only owner can modify - **owner_ro** : Read-only for everyone (owner can delete) ## DOWNLOAD LINKS (3 functions) - shed_link_create(zone, path, group?) : Create download link - shed_link_list() : List links - shed_link_delete(file_id) : Remove download link Examples: shed_link_create(zone="storage", path="report.pdf") # Returns download link shed_link_list() # List your download links shed_link_delete(file_id="abc123-...") # Remove a download link ## UTILITIES (6 functions) - shed_help() - shed_stats() : Show storage usage and quotas - shed_parameters() : Show valve configuration (network_mode, quotas, etc.) - shed_allowed_commands() - shed_force_unlock(path, zone) : Unlock in personal zone (storage/documents) - shed_force_unlock(path, group=id) : Unlock in group zone - shed_maintenance() ## ANSWERING CONFIGURATION QUESTIONS When user asks about configuration, call shed_parameters() first: - "Is network enabled?" -> shed_parameters() -> check network_mode - "Can I use curl?" -> shed_parameters() -> network_mode in ("safe", "all") - "Can I git push?" -> shed_parameters() -> network_mode == "all" - "What's my quota?" -> shed_parameters() -> quota_per_user_mb - "Max file size?" -> shed_parameters() -> max_file_size_mb ## BUILTINS (8 functions - see below) ## BUILTIN FUNCTIONS (always available, no container dependency) Unlike shed_*_exec() which needs external commands, builtins use Python: | Function | Description | |----------|-------------| | shed_unzip(zone, src, dest) | Extract ZIP (replaces `unzip` command) | | shed_zip(zone, src, dest, include_empty_dirs) | Create ZIP (replaces `zip` command) | | shed_tree(zone, path, depth) | Directory tree (replaces `tree` command) | | shed_zipinfo(zone, path) | ZIP info (replaces `zipinfo` command) | | shed_file_type(zone, path) | File MIME type (replaces `file` command) | | shed_convert_eol(zone, path, to) | Line endings (replaces `dos2unix`/`unix2dos`) | | shed_hexdump(zone, path, offset, length) | Hex dump (replaces `xxd`/`hexdump`) | | shed_sqlite(zone, path, query, params, group) | SQLite queries (replaces `sqlite3` command) | | shed_sqlite(zone, path, import_csv, table, ...) | CSV import with auto-detection (delimiter, encoding, dates) | Examples: shed_unzip(zone="storage", src="repo.zip", dest="repo") shed_zip(zone="storage", src="projects/app", dest="app.zip") shed_zip(zone="storage", src="projects", dest="backup.zip", include_empty_dirs=True) # preserve empty dirs shed_tree(zone="storage", path="projects", depth=2) shed_zipinfo(zone="storage", path="backup.zip") shed_file_type(zone="uploads", path="document.pdf") shed_convert_eol(zone="storage", path="script.sh", to="unix") shed_hexdump(zone="storage", path="binary.dat", length=128) # SQLite queries shed_sqlite(zone="storage", path="data.db", query="SELECT * FROM users") shed_sqlite(zone="storage", path="data.db", query="INSERT INTO users VALUES (?, ?)", params=["Alice", "alice@ex.com"]) # CSV import (auto-detects delimiter and encoding) shed_sqlite(zone="storage", path="data.db", import_csv="users.csv", table="users") # European CSV (semicolon, comma decimal, latin-1) shed_sqlite(zone="storage", path="data.db", import_csv="french.csv", table="ventes", delimiter=";", decimal=",", encoding="latin-1") # With date parsing (European DD/MM/YYYY) shed_sqlite(zone="storage", path="data.db", import_csv="orders.csv", table="orders", date_columns=["order_date"], date_format="dayfirst") ## ALLOWED COMMANDS Use shed_allowed_commands() to see available commands. ## NETWORK ACCESS Network is disabled by default. Admin can configure via the network_mode valve: | network_mode | Effect | |--------------|--------| | "disabled" (default) | [X] No network. curl/wget blocked. ffmpeg blocked. URLs blocked in all commands. | | "safe" | [OK] Downloads only. curl/wget GET, git clone/fetch/pull, ffmpeg (input only), pandoc/convert with URLs. | | "all" | [!] Full access including upload. curl POST, git push, ffmpeg streaming output. EXFILTRATION RISK! | Network-capable commands: - **ffmpeg**: Blocked if disabled. In "safe" mode, output protocols (rtmp, udp, tcp, etc.) are blocked. - **ffprobe, pandoc, convert, identify**: Local files always ok. URLs require "safe" or "all". - **curl, wget**: Require "safe" (GET only) or "all" (all methods). - **git**: clone/fetch/pull require "safe". push requires "all". ## DOWNLOADING FILES - IMPORTANT! When network_mode is "safe" or "all", ALWAYS use curl via shed_*_exec() instead of fetch_url tool: shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "data.csv", "https://example.com/data.csv"]) REQUIRED: The -o flag is MANDATORY for curl/wget. Omitting it returns an error. This prevents context pollution when downloading large files. Why curl instead of fetch_url? 1. **Complete download**: curl downloads the FULL file. fetch_url TRUNCATES large content. 2. **No context pollution**: File is saved to disk, not dumped into conversation context. 3. **Post-processing**: You can then unzip, grep, head, tail, awk the file as needed. Examples: # Download a CSV (extension known) shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "data.csv", "https://example.com/data.csv"]) # Download and unzip shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "repo.zip", "https://github.com/.../archive.zip"]) shed_unzip(zone="storage", src="repo.zip", dest="repo") # Download then inspect shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "big.json", "https://api.example.com/data"]) shed_exec(zone="storage", cmd="head", args=["-100", "big.json"]) # First 100 lines shed_exec(zone="storage", cmd="jq", args=[".", "big.json"]) # Parse JSON ## DOWNLOAD WITH UNKNOWN EXTENSION When the URL doesn't reveal the file type (e.g., Google Drive, API endpoints): # Step 1: Download with temporary extension shed_exec(zone="storage", cmd="curl", args=["-L", "-o", "sample.tmp", "https://drive.google.com/uc?id=...&export=download"]) # Step 2: Detect the actual file type shed_file_type(zone="storage", path="sample.tmp") # Returns: {"mime_type": "text/csv", "extension": ".csv"} # Step 3: Rename with correct extension shed_rename(zone="storage", old_path="sample.tmp", new_path="sample.csv") This workflow ensures files get proper extensions even from URLs that hide the file type. Note: curl runs with -sS automatically (silent + show-error, no progress bar spam). Note: stdout/stderr are truncated at 50KB to prevent context overflow. ## FORBIDDEN ARGUMENTS - Shell metacharacters: ; | && & > >> $( ` - URLs: blocked unless network_mode is "safe" or "all" for network-capable commands - find: -exec, -execdir, -ok, -okdir (can execute commands) - awk: system(), getline pipes (can execute commands) ## REMOVED COMMANDS (security) - xargs, timeout, env: can execute arbitrary commands """ # ========================================================================= # INTERNAL IMPLEMENTATION METHODS FOR patch_* FUNCTIONS # ========================================================================= async def _patch_text_impl( self, zone: str, path: str, content: str, position: str, line: int, end_line: int, pattern: str, regex_flags: str, match_all: bool, overwrite: bool, safe: bool, group: str, message: str, mode: str, __user__: dict, __metadata__: dict, ) -> str: """Internal implementation for text file patching.""" user_id = __user__.get("id", "") conv_id = self._get_conv_id(__metadata__) zone_lower = zone.lower() # === ZONE RESOLUTION === user_root = self._get_user_root(__user__) git_commit = False group_id = None if zone_lower == "storage": zone_root = user_root / "Storage" / "data" editzone_base = user_root / "Storage" elif zone_lower == "documents": zone_root = user_root / "Documents" / "data" editzone_base = user_root / "Documents" git_commit = True self._init_git_repo(zone_root) elif zone_lower == "group": if not group: raise StorageError("MISSING_PARAMETER", "Group parameter required") group_id = self._validate_group_id(group) self._check_group_access(__user__, group_id) zone_root = self._ensure_group_space(group_id) editzone_base = self._get_groups_root() / group_id git_commit = True else: raise StorageError("ZONE_FORBIDDEN", f"Invalid zone: {zone}") self._ensure_dir(zone_root) path = self._validate_relative_path(path) target_path = self._resolve_chroot_path(zone_root, path) # === PERMISSION CHECK (groups) === if group_id: can_write, error = self._can_write_group_file(group_id, path, user_id) if not can_write and target_path.exists(): raise StorageError(error, f"Cannot write to file: {error}") # === VALIDATE PARAMETERS === valid_positions = ("start", "end", "before", "after", "replace") if position not in valid_positions: hint = "" if position == "overwrite": hint = ". To overwrite entire file, use overwrite=True parameter instead" elif position == "at": hint = ". 'at' is for shed_patch_bytes (binary). For text, use 'before' or 'after' with line=N" raise StorageError( "INVALID_PARAMETER", f"Invalid position: {position}. Valid: {', '.join(valid_positions)}{hint}" ) # Treat 0 as None (LLMs sometimes pass 0 instead of omitting the parameter) if line == 0: line = None if end_line == 0: end_line = None if not overwrite and position in ("before", "after", "replace"): if line is None and pattern is None: raise StorageError("MISSING_PARAMETER", f"Position '{position}' requires 'line' or 'pattern'") if line is not None and line < 1: raise StorageError("INVALID_PARAMETER", "Line must be >= 1 (first line is 1, not 0)") if end_line is not None and position != "replace": raise StorageError("INVALID_PARAMETER", "end_line only valid with position='replace'") if end_line is not None and end_line < line: raise StorageError("INVALID_PARAMETER", "end_line must be >= line") # === COMPILE REGEX === compiled_pattern = None if pattern is not None: flags = 0 for c in regex_flags.lower(): if c == 'i': flags |= re.IGNORECASE elif c == 'm': flags |= re.MULTILINE elif c == 's': flags |= re.DOTALL try: compiled_pattern = re.compile(pattern, flags) except re.error as e: raise StorageError("INVALID_PARAMETER", f"Invalid regex: {e}") # === CHECK FILE EXISTS === file_exists = target_path.exists() file_created = False if not file_exists: if overwrite or position in ("start", "end"): file_created = True else: raise StorageError("FILE_NOT_FOUND", f"File not found: {path}") # === SIZE AND QUOTA CHECKS === content_bytes = content.encode('utf-8') max_size = self.valves.max_file_size_mb * 1024 * 1024 current_size = target_path.stat().st_size if file_exists else 0 if current_size + len(content_bytes) > max_size: raise StorageError("FILE_TOO_LARGE", f"File would exceed {self.valves.max_file_size_mb} MB") if group_id: self._check_group_quota(group_id, len(content_bytes)) else: self._check_quota(__user__, len(content_bytes)) # === SAFE MODE SETUP === lock_path = None working_path = target_path if safe: rel_path = str(target_path.relative_to(zone_root)) lock_path = editzone_base / "locks" / (rel_path + ".lock") edit_path = editzone_base / "editzone" / conv_id / rel_path self._acquire_lock(lock_path, conv_id, user_id, rel_path) self._ensure_dir(edit_path.parent) if file_exists: shutil.copy2(target_path, edit_path) else: edit_path.touch() working_path = edit_path else: if file_created: self._ensure_dir(target_path.parent) target_path.touch() try: # === READ CONTENT === if overwrite: lines = [] elif file_created and not safe: lines = [] else: with open(working_path, 'r', encoding='utf-8', errors='replace') as f: lines = f.readlines() lines_affected = 0 match_count = 0 # === PERFORM EDIT === if overwrite: lines = [content] if content else [] lines_affected = 1 elif position == "start": lines.insert(0, content) lines_affected = 1 elif position == "end": lines.append(content) lines_affected = 1 elif position == "before": if line is not None: if line > len(lines) + 1: raise StorageError("INVALID_PARAMETER", f"Line {line} beyond file ({len(lines)} lines)") lines.insert(line - 1, content) lines_affected = 1 else: new_lines = [] found = False for l in lines: if compiled_pattern.search(l) and (not found or match_all): new_lines.append(content) lines_affected += 1 match_count += 1 found = True new_lines.append(l) if not found: raise StorageError("PATTERN_NOT_FOUND", f"Pattern not found: {pattern}") lines = new_lines elif position == "after": if line is not None: if line > len(lines): raise StorageError("INVALID_PARAMETER", f"Line {line} beyond file ({len(lines)} lines)") lines.insert(line, content) lines_affected = 1 else: new_lines = [] found = False for l in lines: new_lines.append(l) if compiled_pattern.search(l) and (not found or match_all): new_lines.append(content) lines_affected += 1 match_count += 1 found = True if not found: raise StorageError("PATTERN_NOT_FOUND", f"Pattern not found: {pattern}") lines = new_lines elif position == "replace": if line is not None: if line > len(lines): raise StorageError("INVALID_PARAMETER", f"Line {line} beyond file ({len(lines)} lines)") start_idx = line - 1 end_idx = (end_line - 1) if end_line else start_idx end_idx = min(end_idx, len(lines) - 1) lines_affected = end_idx - start_idx + 1 lines = lines[:start_idx] + [content] + lines[end_idx + 1:] else: new_lines = [] found = False for l in lines: if compiled_pattern.search(l) and (not found or match_all): new_lines.append(content) lines_affected += 1 match_count += 1 found = True else: new_lines.append(l) if not found: raise StorageError("PATTERN_NOT_FOUND", f"Pattern not found: {pattern}") lines = new_lines # === WRITE RESULT === with open(working_path, 'w', encoding='utf-8') as f: f.writelines(lines) # === SAFE MODE FINALIZE === if safe: self._ensure_dir(target_path.parent) shutil.move(str(working_path), str(target_path)) lock_path.unlink(missing_ok=True) # === GIT COMMIT === if git_commit: commit_msg = message or f"Patch {path}: {position}" if group_id: self._git_commit_as_user(zone_root, commit_msg, user_id) else: self._git_commit(zone_root, commit_msg) # === SET GROUP OWNERSHIP === if group_id and file_created: effective_mode = mode or self.valves.group_default_mode if effective_mode not in ("owner", "group", "owner_ro"): effective_mode = "group" self._set_file_ownership(group_id, path, user_id, effective_mode) # === BUILD RESPONSE === result = { "path": path, "zone": zone, "position": "overwrite" if overwrite else position, "lines_affected": lines_affected, "created": file_created, "file_size": target_path.stat().st_size, "safe_mode": safe, } if match_count > 0: result["pattern_matches"] = match_count if group_id: result["group"] = group_id action = "created" if file_created else ("overwritten" if overwrite else position) return self._format_response(True, data=result, message=f"File {action}: {lines_affected} line(s) affected") finally: if safe and lock_path and lock_path.exists(): lock_path.unlink(missing_ok=True) async def _patch_bytes_impl( self, zone: str, path: str, content: str, position: str, offset: int, length: int, content_format: str, safe: bool, group: str, message: str, mode: str, __user__: dict, __metadata__: dict, ) -> str: """Internal implementation for binary file patching.""" import base64 as base64_module user_id = __user__.get("id", "") conv_id = self._get_conv_id(__metadata__) zone_lower = zone.lower() # === PARSE CONTENT === try: if content_format == "hex": hex_clean = content.replace(" ", "").replace("\n", "") if len(hex_clean) % 2 != 0: raise ValueError("Hex string must have even length") content_bytes = bytes.fromhex(hex_clean) elif content_format == "base64": content_bytes = base64_module.b64decode(content) elif content_format == "raw": content_bytes = content.encode('utf-8') else: raise StorageError("INVALID_PARAMETER", f"Invalid content_format: {content_format}") except ValueError as e: raise StorageError("INVALID_PARAMETER", f"Invalid content: {e}") # === ZONE RESOLUTION === user_root = self._get_user_root(__user__) git_commit = False group_id = None if zone_lower == "storage": zone_root = user_root / "Storage" / "data" editzone_base = user_root / "Storage" elif zone_lower == "documents": zone_root = user_root / "Documents" / "data" editzone_base = user_root / "Documents" git_commit = True self._init_git_repo(zone_root) elif zone_lower == "group": if not group: raise StorageError("MISSING_PARAMETER", "Group parameter required") group_id = self._validate_group_id(group) self._check_group_access(__user__, group_id) zone_root = self._ensure_group_space(group_id) editzone_base = self._get_groups_root() / group_id git_commit = True else: raise StorageError("ZONE_FORBIDDEN", f"Invalid zone: {zone}") self._ensure_dir(zone_root) path = self._validate_relative_path(path) target_path = self._resolve_chroot_path(zone_root, path) # === PERMISSION CHECK === if group_id: can_write, error = self._can_write_group_file(group_id, path, user_id) if not can_write and target_path.exists(): raise StorageError(error, f"Cannot write to file: {error}") # === VALIDATE PARAMETERS === valid_positions = ("start", "end", "at", "replace") if position not in valid_positions: hint = "" if position == "overwrite": hint = ". To overwrite entire file, use overwrite=True parameter instead" raise StorageError( "INVALID_PARAMETER", f"Invalid position: {position}. Valid: {', '.join(valid_positions)}{hint}" ) if position in ("at", "replace") and offset is None: raise StorageError("MISSING_PARAMETER", f"Position '{position}' requires 'offset'") if position == "replace" and length is None: raise StorageError("MISSING_PARAMETER", "Position 'replace' requires 'length'") if offset is not None and offset < 0: raise StorageError("INVALID_PARAMETER", "Offset must be >= 0") if length is not None and length < 0: raise StorageError("INVALID_PARAMETER", "Length must be >= 0") # === CHECK FILE EXISTS === file_exists = target_path.exists() file_created = False if not file_exists: if position in ("start", "end"): file_created = True else: raise StorageError("FILE_NOT_FOUND", f"File not found: {path}") # === SIZE CHECKS === max_size = self.valves.max_file_size_mb * 1024 * 1024 current_size = target_path.stat().st_size if file_exists else 0 if offset is not None and offset > current_size: raise StorageError("INVALID_PARAMETER", f"Offset {offset} beyond file size ({current_size})") bytes_removed = 0 if position == "replace" and offset < current_size: bytes_removed = min(length, current_size - offset) if current_size + len(content_bytes) - bytes_removed > max_size: raise StorageError("FILE_TOO_LARGE", f"File would exceed {self.valves.max_file_size_mb} MB") if group_id: self._check_group_quota(group_id, len(content_bytes)) else: self._check_quota(__user__, len(content_bytes)) # === SAFE MODE SETUP === lock_path = None working_path = target_path if safe: rel_path = str(target_path.relative_to(zone_root)) lock_path = editzone_base / "locks" / (rel_path + ".lock") edit_path = editzone_base / "editzone" / conv_id / rel_path self._acquire_lock(lock_path, conv_id, user_id, rel_path) self._ensure_dir(edit_path.parent) if file_exists: shutil.copy2(target_path, edit_path) else: edit_path.touch() working_path = edit_path else: if file_created: self._ensure_dir(target_path.parent) target_path.touch() try: # === READ DATA === if file_created and not safe: data = bytearray() else: with open(working_path, 'rb') as f: data = bytearray(f.read()) bytes_affected = len(content_bytes) # === PERFORM EDIT === if position == "start": data = bytearray(content_bytes) + data elif position == "end": data.extend(content_bytes) elif position == "at": data = data[:offset] + bytearray(content_bytes) + data[offset:] elif position == "replace": end_offset = min(offset + length, len(data)) bytes_affected = end_offset - offset data = data[:offset] + bytearray(content_bytes) + data[end_offset:] # === WRITE RESULT === with open(working_path, 'wb') as f: f.write(data) # === SAFE MODE FINALIZE === if safe: self._ensure_dir(target_path.parent) shutil.move(str(working_path), str(target_path)) lock_path.unlink(missing_ok=True) # === GIT COMMIT === if git_commit: commit_msg = message or f"Patch bytes {path}: {position}" if group_id: self._git_commit_as_user(zone_root, commit_msg, user_id) else: self._git_commit(zone_root, commit_msg) # === SET GROUP OWNERSHIP === if group_id and file_created: effective_mode = mode or self.valves.group_default_mode if effective_mode not in ("owner", "group", "owner_ro"): effective_mode = "group" self._set_file_ownership(group_id, path, user_id, effective_mode) # === BUILD RESPONSE === result = { "path": path, "zone": zone, "position": position, "bytes_written": len(content_bytes), "bytes_affected": bytes_affected, "created": file_created, "file_size": target_path.stat().st_size, "safe_mode": safe, "content_format": content_format, } if offset is not None: result["offset"] = offset if group_id: result["group"] = group_id return self._format_response(True, data=result, message=f"File {'created' if file_created else 'patched'}: {len(content_bytes)} bytes written") finally: if safe and lock_path and lock_path.exists(): lock_path.unlink(missing_ok=True) # ========================================================================= # UNIFIED ZONE FUNCTIONS # ========================================================================= # These 10 functions replace 32 zone-specific functions. # All operations now use: shed_xxx(zone="...", ...) # # ⚠️ IMPORTANT: Use shed_exec() for ALL shell-doable operations! # Only use shed_patch_text() to CREATE or MODIFY file CONTENT. # # ✓ Create directory: shed_exec(zone="storage", cmd="mkdir", args=["-p", "dir"]) # ✗ WRONG: shed_patch_text(path="dir/.keep", content="") # ========================================================================= class Tools: """ Fileshed - Persistent file management with collaboration. ╔═══════════════════════════════════════════════════════════════════════════╗ ║ CRITICAL: UPLOADED FILES WORKFLOW ║ ║ When user uploads files, ALWAYS do this FIRST: ║ ║ 1. shed_import(import_all=True) ║ ║ 2. shed_exec(zone="uploads", cmd="ls", args=["-la"]) ║ ║ Then move files to Storage or Documents as needed. ║ ╚═══════════════════════════════════════════════════════════════════════════╝ ╔═══════════════════════════════════════════════════════════════════════════╗ ║ PATH RULES - IMPORTANT! ║ ║ Each function already works INSIDE its zone. Use relative paths only. ║ ║ ║ ║ ✓ CORRECT: shed_exec(zone="storage", cmd="cat", args=["projects/f.txt"])║ ║ ✗ WRONG: shed_exec(zone="storage", cmd="cat", args=["Storage/..."]) ║ ║ ║ ║ The zone name is NOT part of the path! ║ ╚═══════════════════════════════════════════════════════════════════════════╝ ╔═══════════════════════════════════════════════════════════════════════════╗ ║ ⚠️ SHELL COMMANDS FIRST! ║ ║ ║ ║ Use shed_exec(zone, cmd, args) for ALL shell-doable operations: ║ ║ • mkdir: shed_exec(zone="storage", cmd="mkdir", args=["-p", "dir"]) ║ ║ • Copy: shed_exec(zone="storage", cmd="cp", args=["src", "dst"]) ║ ║ • Move: shed_exec(zone="storage", cmd="mv", args=["old", "new"]) ║ ║ • Read: shed_exec(zone="storage", cmd="cat", args=["file.txt"]) ║ ║ • Delete: shed_exec(zone="storage", cmd="rm", args=["file.txt"]) ║ ║ • Git: shed_exec(zone="documents", cmd="git", args=["log"]) ║ ║ ║ ║ Use shed_patch_text() ONLY for file CONTENT operations: ║ ║ • Create: shed_patch_text(zone, path, content, overwrite=True) ║ ║ • Append: shed_patch_text(zone, path, content, position="end") ║ ║ ║ ║ ❌ WRONG: shed_patch_text(path="dir/.keep") to create directories ║ ║ ✓ RIGHT: shed_exec(cmd="mkdir", args=["-p", "dir"]) ║ ╚═══════════════════════════════════════════════════════════════════════════╝ ZONES (use zone= parameter): • zone="uploads" -> works in Uploads/ (temporary, per-conversation) • zone="storage" -> works in Storage/ (permanent, no versioning) • zone="documents" -> works in Documents/ (permanent, Git versioned) • zone="group" -> works in Groups/ (requires group= parameter) CROSS-ZONE TRANSFERS (special functions): • shed_move_uploads_to_storage(src, dest) • shed_move_uploads_to_documents(src, dest) • shed_copy_storage_to_documents(src, dest) • shed_move_documents_to_storage(src, dest) • shed_copy_to_group(src_zone, src_path, group, dest_path) GROUP WRITE MODES: • "group" -> Anyone can edit (default) • "owner" -> Only owner can edit • "owner_ro" -> Read-only for everyone """ class Valves(BaseModel): """Tool configuration via Open WebUI interface.""" storage_base_path: str = Field( default="/app/backend/data/user_files", description="Storage root path (contains users/ and groups/)" ) quota_per_user_mb: int = Field( default=1000, description="Quota per user in MB (personal space)" ) quota_per_group_mb: int = Field( default=2000, description="Quota per group in MB (shared space)" ) max_file_size_mb: int = Field( default=300, description="Max file size in MB" ) lock_max_age_hours: int = Field( default=24, description="Max lock duration before expiration" ) exec_timeout_default: int = Field( default=30, description="Default command timeout (seconds)" ) exec_timeout_max: int = Field( default=300, description="Maximum allowed timeout (seconds)" ) group_default_mode: str = Field( default="group", description="Default write mode for new group files: 'owner', 'group', or 'owner_ro'" ) network_mode: str = Field( default="disabled", description="Network access mode: 'disabled' (default, no network), 'safe' (downloads only, blocks exfiltration), or 'all' ([!] unrestricted, enables data exfiltration)" ) openwebui_api_url: str = Field( default="http://localhost:8080", description="Open WebUI base URL for shed_link_create(). Use ONLY the base URL without /api path. Examples: http://localhost:8080, https://chat.example.com. The /api/v1/files/ path is added automatically." ) max_output_default: int = Field( default=50000, description="Default max output size in bytes (~50KB). Use 0 for no limit (up to max_output_absolute)." ) max_output_absolute: int = Field( default=5000000, description="Absolute max output size in bytes (~5MB). Even max_output=0 cannot exceed this." ) class UserValves(BaseModel): """Per-user configuration. Users can set these in Tools > Fileshed > Settings.""" # Note: shed_link_* functions use internal API, no user configuration needed pass def __init__(self): self.valves = self.Valves() self._core = _FileshedCore(self) # ========================================================================= # INTERNAL IMPLEMENTATION METHODS FOR patch_* FUNCTIONS # ========================================================================= async def shed_exec( self, zone: str, cmd: str, args: list = [], timeout: int = None, max_output: int = None, stdout_file: str = None, stderr_file: str = None, redirect_stderr_to_stdout: bool = False, group: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Execute a command in the specified zone. :param zone: Target zone ("uploads", "storage", "documents", or "group") :param cmd: Command to execute (must be in whitelist) :param args: Command arguments - file paths go here :param timeout: Timeout in seconds (default: 30, max: 300) :param max_output: Max output bytes (None=50KB, 0=5MB max) :param stdout_file: Save stdout to this file instead of returning it :param stderr_file: Save stderr to this file instead of returning it :param redirect_stderr_to_stdout: Merge stderr into stdout (like 2>&1) :param group: Group name/ID (required if zone="group") :return: Command output as JSON Examples: shed_exec(zone="uploads", cmd="cat", args=["file.txt"]) shed_exec(zone="storage", cmd="ls", args=["-la"]) shed_exec(zone="storage", cmd="mkdir", args=["-p", "projects/2024"]) shed_exec(zone="storage", cmd="grep", args=["-r", "TODO", "."]) shed_exec(zone="documents", cmd="git", args=["log", "--oneline"]) shed_exec(zone="group", group="team", cmd="ls", args=["-la"]) # Redirect output to file (like shell > redirection) shed_exec(zone="storage", cmd="jq", args=["-r", ".[]", "data.json"], stdout_file="output.txt") Notes: - uploads: read-only commands only - documents/group: git commands allowed - File paths in args are relative to zone root - Use mkdir -p to create directories (NOT patch_text with .keep files!) - stdout_file/stderr_file: paths relative to zone root """ try: ctx = self._core._resolve_zone(zone, group, __user__, __metadata__) # Validate command against zone whitelist self._core._validate_command(cmd, ctx.whitelist, args) # Validate arguments (path escapes, network, etc.) self._core._validate_args(args, ctx.readonly, cmd) validated_args = self._core._validate_path_args(args, ctx.zone_root, cmd) # Validate and resolve output file paths stdout_path = None stderr_path = None if stdout_file: if ctx.readonly: raise StorageError( "ZONE_READONLY", "Cannot write stdout_file in read-only zone", {"zone": zone}, "Use a writable zone (storage, documents)" ) # Validate path doesn't escape self._core._validate_path_args([stdout_file], ctx.zone_root, cmd) stdout_path = ctx.zone_root / stdout_file if stderr_file: if ctx.readonly: raise StorageError( "ZONE_READONLY", "Cannot write stderr_file in read-only zone", {"zone": zone}, "Use a writable zone (storage, documents)" ) # Validate path doesn't escape self._core._validate_path_args([stderr_file], ctx.zone_root, cmd) stderr_path = ctx.zone_root / stderr_file # Execute result = self._core._exec_command( cmd, validated_args, cwd=ctx.zone_root, timeout=self._core._clamp_timeout(timeout), max_output=max_output, stdout_file=stdout_path, stderr_file=stderr_path, redirect_stderr_to_stdout=redirect_stderr_to_stdout, ) response_data = { "zone": ctx.zone_name, "command": cmd, "args": args, "stdout": result["stdout"], "stderr": result["stderr"], "returncode": result["returncode"], "truncated": result.get("truncated", False), } if stdout_file: response_data["stdout_file"] = stdout_file if stdout_path and stdout_path.exists(): response_data["stdout_file_size"] = stdout_path.stat().st_size if stderr_file: response_data["stderr_file"] = stderr_file if stderr_path and stderr_path.exists(): response_data["stderr_file_size"] = stderr_path.stat().st_size return self._core._format_response(True, data=response_data) except StorageError as e: return self._core._format_error(e, "shed_exec") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_patch_text( self, zone: str, path: str, content: str, position: str = "end", line: int = None, end_line: int = None, pattern: str = None, regex_flags: str = "", match_all: bool = False, overwrite: bool = False, safe: bool = False, group: str = None, message: str = None, mode: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Edit a text file in the specified zone. ⚠️ Use this ONLY for file CONTENT operations! For creating directories, use: shed_exec(zone, cmd="mkdir", args=["-p", "dir"]) :param zone: Target zone ("storage", "documents", or "group") :param path: File path relative to zone :param content: Content to write :param position: "start", "end", "before", "after", or "replace" (NOT "overwrite" or "at"!) :param line: Line number for "before"/"after"/"replace" (first line is 1, not 0) :param end_line: End line for range replacement (only with position="replace") :param pattern: Regex pattern for "replace" :param regex_flags: Regex flags (i=ignore case, m=multiline, s=dotall) :param match_all: Replace all pattern matches (default: first only) :param overwrite: Set to True to replace entire file (use this, NOT position="overwrite") :param safe: Lock file during edit :param group: Group name/ID (required if zone="group") :param message: Git commit message (documents/group only, ignored for storage) :param mode: Ownership mode for new files in group: "owner", "group", "owner_ro" :return: Edit result as JSON Examples: shed_patch_text(zone="storage", path="notes.txt", content="New line\\n", position="end") shed_patch_text(zone="storage", path="file.txt", content="inserted\\n", position="before", line=5) shed_patch_text(zone="storage", path="config.py", content="DEBUG=True", pattern="DEBUG=.*", position="replace") shed_patch_text(zone="documents", path="README.md", content="# Title", overwrite=True, message="Init") """ try: return await self._core._patch_text_impl( zone=zone, path=path, content=content, position=position, line=line, end_line=end_line, pattern=pattern, regex_flags=regex_flags, match_all=match_all, overwrite=overwrite, safe=safe, group=group, message=message, mode=mode, __user__=__user__, __metadata__=__metadata__, ) except StorageError as e: return self._core._format_error(e, "shed_patch_text") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_patch_bytes( self, zone: str, path: str, content: str, content_format: str = "hex", position: str = "end", offset: int = None, length: int = None, safe: bool = False, group: str = None, message: str = None, mode: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Edit a binary file in the specified zone. :param zone: Target zone ("storage", "documents", or "group") :param path: File path relative to zone :param content: Content to write (format depends on content_format) :param content_format: "hex" (default), "base64", or "raw" :param position: "start", "end", "at", or "replace" :param offset: Byte offset for "at"/"replace" :param length: Bytes to replace for "replace" :param safe: Lock file during edit :param group: Group name/ID (required if zone="group") :param message: Git commit message (documents/group only) :param mode: Ownership mode for new files in group :return: Edit result as JSON Examples: shed_patch_bytes(zone="storage", path="data.bin", content="48454C4C4F") shed_patch_bytes(zone="storage", path="img.png", content="89504E47", position="start") """ try: return await self._core._patch_bytes_impl( zone=zone, path=path, content=content, content_format=content_format, position=position, offset=offset, length=length, safe=safe, group=group, message=message, mode=mode, __user__=__user__, __metadata__=__metadata__, ) except StorageError as e: return self._core._format_error(e, "shed_patch_bytes") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_delete( self, zone: str, path: str, group: str = None, message: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Delete a file or folder in the specified zone. :param zone: Target zone ("uploads", "storage", "documents", or "group") :param path: Path to delete (relative to zone) :param group: Group name/ID (required if zone="group") :param message: Git commit message (documents/group only) :return: Deletion result as JSON Examples: shed_delete(zone="uploads", path="temp.txt") shed_delete(zone="storage", path="old_project/") shed_delete(zone="documents", path="draft.md", message="Remove draft") shed_delete(zone="group", group="team", path="obsolete.txt", message="Cleanup") Note: uploads allows delete to clean up imported files. """ try: # uploads allows delete even though readonly for other ops ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=False) path = self._core._validate_relative_path(path) target = self._core._resolve_chroot_path(ctx.zone_root, path) if not target.exists(): raise StorageError("FILE_NOT_FOUND", f"Path not found: {path}") # Group: check delete permission user_id = __user__.get("id", "") if ctx.group_id: can_delete, reason = self._core._can_delete_group_file(ctx.group_id, path, user_id) if not can_delete: raise StorageError("PERMISSION_DENIED", reason, {"path": path}) # Delete was_dir = target.is_dir() if was_dir: shutil.rmtree(target) if ctx.group_id: self._core._delete_file_ownership_recursive(ctx.group_id, path) else: target.unlink() if ctx.group_id: self._core._delete_file_ownership(ctx.group_id, path) # Git commit if needed if ctx.git_commit: self._core._git_run(["add", "-A"], ctx.zone_root) commit_msg = message or f"Delete {path}" self._core._git_commit_as_user(ctx.zone_root, commit_msg, user_id) return self._core._format_response(True, data={ "zone": ctx.zone_name, "deleted": path, "was_directory": was_dir, }, message=f"Deleted: {path}") except StorageError as e: return self._core._format_error(e, "shed_delete") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_rename( self, zone: str, old_path: str, new_path: str, group: str = None, message: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Rename or move a file/folder within the specified zone. :param zone: Target zone ("storage", "documents", or "group") :param old_path: Current path (relative to zone) :param new_path: New path (relative to zone) :param group: Group name/ID (required if zone="group") :param message: Git commit message (documents/group only) :return: Rename result as JSON Examples: shed_rename(zone="storage", old_path="draft.txt", new_path="final.txt") shed_rename(zone="documents", old_path="old/", new_path="archive/", message="Reorganize") shed_rename(zone="group", group="team", old_path="v1.doc", new_path="v2.doc") """ try: ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=True) old_path = self._core._validate_relative_path(old_path) new_path = self._core._validate_relative_path(new_path) old_target = self._core._resolve_chroot_path(ctx.zone_root, old_path) new_target = self._core._resolve_chroot_path(ctx.zone_root, new_path) if not old_target.exists(): raise StorageError("FILE_NOT_FOUND", f"Source not found: {old_path}") if new_target.exists(): raise StorageError("FILE_EXISTS", f"Destination exists: {new_path}") # Group: check write permission user_id = __user__.get("id", "") if ctx.group_id: can_write, reason = self._core._can_write_group_file(ctx.group_id, old_path, user_id) if not can_write: raise StorageError("PERMISSION_DENIED", reason, {"path": old_path}) # Create parent directories new_target.parent.mkdir(parents=True, exist_ok=True) # Rename old_target.rename(new_target) # Update ownership records if ctx.group_id: self._core._update_file_ownership_paths(ctx.group_id, old_path, new_path) # Git commit if ctx.git_commit: self._core._git_run(["add", "-A"], ctx.zone_root) commit_msg = message or f"Rename {old_path} -> {new_path}" self._core._git_commit_as_user(ctx.zone_root, commit_msg, user_id) return self._core._format_response(True, data={ "zone": ctx.zone_name, "old_path": old_path, "new_path": new_path, }, message=f"Renamed: {old_path} -> {new_path}") except StorageError as e: return self._core._format_error(e, "shed_rename") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_lockedit_open( self, zone: str, path: str, group: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Open a file for safe editing (locks file, creates working copy). ⚠️ COMPLETE WORKFLOW (must follow all steps): 1. shed_lockedit_open(zone, path) → Lock file, get content 2. shed_lockedit_overwrite(zone, path, content) → Modify (NOT shed_patch_text!) 3. shed_lockedit_save(zone, path) → Save + unlock (CLOSES edit mode!) OR to cancel: shed_lockedit_cancel(zone, path) → Discard changes + unlock :param zone: Target zone ("storage", "documents", or "group") :param path: File path to edit :param group: Group name/ID (required if zone="group") :return: File content and lock info as JSON Examples: shed_lockedit_open(zone="storage", path="config.json") shed_lockedit_open(zone="documents", path="report.md") shed_lockedit_open(zone="group", group="team", path="shared.txt") """ try: ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=True) path = self._core._validate_relative_path(path) target = self._core._resolve_chroot_path(ctx.zone_root, path) if not target.exists(): raise StorageError("FILE_NOT_FOUND", f"File not found: {path}") if target.is_dir(): raise StorageError("NOT_A_FILE", f"Cannot edit a directory: {path}") # Group: check write permission user_id = __user__.get("id", "") if ctx.group_id: can_write, reason = self._core._can_write_group_file(ctx.group_id, path, user_id) if not can_write: raise StorageError("PERMISSION_DENIED", reason, {"path": path}) # Create lock lock_path = self._core._get_lock_path(ctx.editzone_base, path) self._core._acquire_lock(lock_path, ctx.conv_id, user_id, path) # Copy to editzone editzone_path = self._core._get_editzone_path(ctx.editzone_base, ctx.conv_id, path) editzone_path.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(target, editzone_path) # Read content try: with open(editzone_path, 'r', encoding='utf-8', errors='replace') as f: content = f.read() is_binary = False except: content = None is_binary = True return self._core._format_response(True, data={ "zone": ctx.zone_name, "path": path, "content": content, "is_binary": is_binary, "size": target.stat().st_size, "locked_by": user_id, }, message=f"File opened for editing: {path}") except StorageError as e: return self._core._format_error(e, "shed_lockedit_open") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_lockedit_exec( self, zone: str, path: str, cmd: str, args: list = [], timeout: int = None, group: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Execute a command on file in editzone (working copy). :param zone: Target zone ("storage", "documents", or "group") :param path: File path (must be opened with shed_lockedit_open) :param cmd: Command to execute :param args: Command arguments (use "." for the file being edited) :param timeout: Timeout in seconds :param group: Group name/ID (required if zone="group") :return: Command output as JSON Examples: shed_lockedit_exec(zone="storage", path="data.txt", cmd="sed", args=["-i", "s/old/new/g", "."]) shed_lockedit_exec(zone="storage", path="code.py", cmd="cat", args=["."]) """ try: ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=True) path = self._core._validate_relative_path(path) user_id = __user__.get("id", "") # Verify lock ownership lock_path = self._core._get_lock_path(ctx.editzone_base, path) self._core._check_lock_owner(lock_path, user_id) # Get editzone path editzone_path = self._core._get_editzone_path(ctx.editzone_base, ctx.conv_id, path) if not editzone_path.exists(): raise StorageError("NOT_IN_EDIT_MODE", f"File not open for editing: {path}", hint="Use shed_lockedit_open() first. Note: shed_lockedit_save() CLOSES edit mode!") # Validate command self._core._validate_command(cmd, ctx.whitelist, args) # Replace "." with actual filename processed_args = [editzone_path.name if a == "." else a for a in args] # Execute in editzone directory result = self._core._exec_command( cmd, processed_args, cwd=editzone_path.parent, timeout=self._core._clamp_timeout(timeout), ) return self._core._format_response(True, data={ "zone": ctx.zone_name, "path": path, "command": cmd, "stdout": result["stdout"], "stderr": result["stderr"], "returncode": result["returncode"], }, message="Command executed in editzone") except StorageError as e: return self._core._format_error(e, "shed_lockedit_exec") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_lockedit_overwrite( self, zone: str, path: str, content: str, append: bool = False, group: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Write content to file in editzone (working copy). ⚠️ REQUIRES: File must be opened first with shed_lockedit_open() ⚠️ DO NOT use position, pattern, line, overwrite params - those are for shed_patch_text! :param zone: Target zone ("storage", "documents", or "group") :param path: File path (must be opened with shed_lockedit_open) :param content: Content to write (replaces entire file by default) :param append: If True, append instead of replace :param group: Group name/ID (required if zone="group") :return: Write result as JSON Examples: shed_lockedit_overwrite(zone="storage", path="config.json", content='{"key": "value"}') shed_lockedit_overwrite(zone="storage", path="log.txt", content="New entry\\n", append=True) """ try: ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=True) path = self._core._validate_relative_path(path) user_id = __user__.get("id", "") # Verify lock ownership lock_path = self._core._get_lock_path(ctx.editzone_base, path) self._core._check_lock_owner(lock_path, user_id) # Get editzone path editzone_path = self._core._get_editzone_path(ctx.editzone_base, ctx.conv_id, path) if not editzone_path.exists(): raise StorageError("NOT_IN_EDIT_MODE", f"File not open for editing: {path}", hint="Use shed_lockedit_open() first. Note: shed_lockedit_save() CLOSES edit mode!") # Check content size self._core._validate_content_size(content) # Write mode = "a" if append else "w" with open(editzone_path, mode, encoding="utf-8") as f: f.write(content) new_size = editzone_path.stat().st_size return self._core._format_response(True, data={ "zone": ctx.zone_name, "path": path, "bytes_written": len(content.encode('utf-8')), "new_size": new_size, "mode": "append" if append else "overwrite", }, message="Content written to editzone") except StorageError as e: return self._core._format_error(e, "shed_lockedit_overwrite") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_lockedit_save( self, zone: str, path: str, group: str = None, message: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Save edited file back to zone and release lock. ⚠️ THIS CLOSES EDIT MODE! After save, the file is unlocked. To edit again, you must call shed_lockedit_open() first. Workflow: shed_lockedit_open → shed_lockedit_overwrite → shed_lockedit_save (done!) :param zone: Target zone ("storage", "documents", or "group") :param path: File path :param group: Group name/ID (required if zone="group") :param message: Git commit message (documents/group only) :return: Save result as JSON Examples: shed_lockedit_save(zone="storage", path="config.json") shed_lockedit_save(zone="documents", path="report.md", message="Final version") """ try: ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=True) path = self._core._validate_relative_path(path) user_id = __user__.get("id", "") # Verify lock ownership lock_path = self._core._get_lock_path(ctx.editzone_base, path) self._core._check_lock_owner(lock_path, user_id) # Get paths editzone_path = self._core._get_editzone_path(ctx.editzone_base, ctx.conv_id, path) target = self._core._resolve_chroot_path(ctx.zone_root, path) if not editzone_path.exists(): raise StorageError("NOT_IN_EDIT_MODE", f"File not open for editing: {path}", hint="Use shed_lockedit_open() first. Note: shed_lockedit_save() CLOSES edit mode!") # Check quota size_diff = editzone_path.stat().st_size - (target.stat().st_size if target.exists() else 0) if size_diff > 0: if ctx.group_id: self._core._check_group_quota(ctx.group_id, size_diff) else: self._core._check_quota(__user__, size_diff) # Copy back to zone target.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(editzone_path, target) # Git commit if needed if ctx.git_commit: self._core._git_run(["add", "-A"], ctx.zone_root) commit_msg = message or f"Edit {path}" self._core._git_commit_as_user(ctx.zone_root, commit_msg, user_id) # Cleanup self._core._rm_with_empty_parents(editzone_path, ctx.editzone_base / "editzone") lock_path.unlink(missing_ok=True) return self._core._format_response(True, data={ "zone": ctx.zone_name, "path": path, "size": target.stat().st_size, "committed": ctx.git_commit, }, message=f"Saved and unlocked: {path}") except StorageError as e: return self._core._format_error(e, "shed_lockedit_save") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_lockedit_cancel( self, zone: str, path: str, group: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Cancel editing and release lock (discards changes). :param zone: Target zone ("storage", "documents", or "group") :param path: File path :param group: Group name/ID (required if zone="group") :return: Cancel result as JSON Examples: shed_lockedit_cancel(zone="storage", path="config.json") """ try: ctx = self._core._resolve_zone(zone, group, __user__, __metadata__, require_write=True) path = self._core._validate_relative_path(path) user_id = __user__.get("id", "") # Verify lock ownership lock_path = self._core._get_lock_path(ctx.editzone_base, path) self._core._check_lock_owner(lock_path, user_id) # Get editzone path editzone_path = self._core._get_editzone_path(ctx.editzone_base, ctx.conv_id, path) # Cleanup if editzone_path.exists(): self._core._rm_with_empty_parents(editzone_path, ctx.editzone_base / "editzone") lock_path.unlink(missing_ok=True) return self._core._format_response(True, data={ "zone": ctx.zone_name, "path": path, "changes_discarded": True, }, message=f"Edit cancelled, changes discarded: {path}") except StorageError as e: return self._core._format_error(e, "shed_lockedit_cancel") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_move_uploads_to_storage( self, src: str, dest: str, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Moves file from Uploads to Storage. IMPORTANT: Call shed_import() first to import uploaded files! :param src: Source path in Uploads :param dest: Destination path in Storage :return: Confirmation as JSON """ try: user_root = self._core._get_user_root(__user__) conv_id = self._core._get_conv_id(__metadata__) src_chroot = user_root / "Uploads" / conv_id dest_chroot = user_root / "Storage" / "data" source = self._core._resolve_chroot_path(src_chroot, src) target = self._core._resolve_chroot_path(dest_chroot, dest) if not source.exists(): raise StorageError( "FILE_NOT_FOUND", f"File not found: {src}", {"path": src, "uploads_dir": str(src_chroot)}, "Did you call shed_import(import_all=True) first? Files must be imported before moving." ) # No quota check needed: move within user space doesn't change total usage self._core._ensure_dir(dest_chroot) self._core._ensure_dir(target.parent) shutil.move(str(source), str(target)) return self._core._format_response(True, message=f"Moved: Uploads/{src} -> Storage/{dest}") except StorageError as e: return self._core._format_error(e, "shed_move_uploads_to_storage") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_move_uploads_to_documents( self, src: str, dest: str, message: str = "", __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Moves file from Uploads to Documents with Git commit. IMPORTANT: Call shed_import() first to import uploaded files! :param src: Source path in Uploads :param dest: Destination path in Documents :param message: Commit message :return: Confirmation as JSON """ try: user_root = self._core._get_user_root(__user__) conv_id = self._core._get_conv_id(__metadata__) src_chroot = user_root / "Uploads" / conv_id dest_chroot = user_root / "Documents" / "data" source = self._core._resolve_chroot_path(src_chroot, src) target = self._core._resolve_chroot_path(dest_chroot, dest) if not source.exists(): raise StorageError( "FILE_NOT_FOUND", f"File not found: {src}", {"path": src, "uploads_dir": str(src_chroot)}, "Did you call shed_import(import_all=True) first? Files must be imported before moving." ) # No quota check needed: move within user space doesn't change total usage # Init Git self._core._init_git_repo(dest_chroot) self._core._ensure_dir(target.parent) shutil.move(str(source), str(target)) # Commit if not message: message = f"Import {src}" self._core._git_commit(dest_chroot, message) return self._core._format_response(True, message=f"Moved and committed: Uploads/{src} -> Documents/{dest}") except StorageError as e: return self._core._format_error(e, "shed_move_uploads_to_documents") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_copy_storage_to_documents( self, src: str, dest: str, message: str = "", __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Copies from Storage to Documents with Git commit. :param src: Source path :param dest: Destination path :param message: Commit message :return: Confirmation as JSON """ try: user_root = self._core._get_user_root(__user__) src_chroot = user_root / "Storage" / "data" dest_chroot = user_root / "Documents" / "data" source = self._core._resolve_chroot_path(src_chroot, src) target = self._core._resolve_chroot_path(dest_chroot, dest) if not source.exists(): raise StorageError("FILE_NOT_FOUND", f"File not found: {src}") # Check quota before copy self._core._check_quota(__user__, self._core._get_path_size(source)) # Init Git self._core._init_git_repo(dest_chroot) self._core._ensure_dir(target.parent) if source.is_dir(): shutil.copytree(source, target) else: shutil.copy2(source, target) # Commit if not message: message = f"Import from Storage: {src}" self._core._git_commit(dest_chroot, message) return self._core._format_response(True, message=f"Copied and committed: Storage/{src} -> Documents/{dest}") except StorageError as e: return self._core._format_error(e, "shed_copy_storage_to_documents") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_move_documents_to_storage( self, src: str, dest: str, message: str = "", __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Moves from Documents to Storage with git rm + commit. :param src: Source path :param dest: Destination path :param message: Commit message :return: Confirmation as JSON """ try: user_root = self._core._get_user_root(__user__) src_chroot = user_root / "Documents" / "data" dest_chroot = user_root / "Storage" / "data" source = self._core._resolve_chroot_path(src_chroot, src) target = self._core._resolve_chroot_path(dest_chroot, dest) if not source.exists(): raise StorageError("FILE_NOT_FOUND", f"File not found: {src}") # Check quota (move requires temporary duplication) self._core._check_quota(__user__, self._core._get_path_size(source)) self._core._ensure_dir(dest_chroot) self._core._ensure_dir(target.parent) # Copy to Storage if source.is_dir(): shutil.copytree(source, target) else: shutil.copy2(source, target) # git rm in Documents via Layer 2 self._core._git_run(["rm", "-rf", src], src_chroot) # Commit if not message: message = f"Move to Storage: {src}" self._core._git_commit(src_chroot, message) return self._core._format_response(True, message=f"Moved: Documents/{src} -> Storage/{dest}") except StorageError as e: return self._core._format_error(e, "shed_move_documents_to_storage") except Exception as e: return self._core._format_response(False, message=str(e)) # ========================================================================= # UTILITIES (5 functions) # ========================================================================= async def shed_import( self, filename: str = "", import_all: bool = False, dest_subdir: str = "", __user__: dict = {}, __metadata__: dict = {}, __files__: list = None, __event_emitter__=None, ) -> str: """ STEP 1: Imports files from chat to Uploads/. ALWAYS call this function first when user uploads a file! :param filename: Import only this specific file :param import_all: True to import ALL attached files :param dest_subdir: Optional subdirectory in Uploads/ :return: List of imported files Examples: shed_import(import_all=True) -> import all attached files shed_import(filename="report.pdf") -> import only report.pdf """ try: user_root = self._core._get_user_root(__user__) conv_id = self._core._get_conv_id(__metadata__) uploads_dir = user_root / "Uploads" / conv_id if dest_subdir: # Validate dest_subdir dest_subdir = self._core._validate_relative_path(dest_subdir) if dest_subdir: uploads_dir = uploads_dir / dest_subdir self._core._ensure_dir(uploads_dir) # Get files (try multiple sources) files = __files__ or [] if not files: files = __metadata__.get("files", []) if not files: return self._core._format_response( False, message="No files attached to conversation" ) imported = [] errors = [] # Possible paths for Open WebUI files owui_upload_paths = [ Path("/app/backend/data/uploads"), Path("/app/backend/data/files"), Path("/app/backend/data/cache/files"), Path("/app/backend/data/cache/uploads"), ] for file_info in files: try: file_path = None file_name = None file_id = None user_id_from_file = None if isinstance(file_info, dict): # Open WebUI structure detected file_name = file_info.get("name") or file_info.get("filename") file_id = file_info.get("id") # Search in nested "file" dict nested_file = file_info.get("file") if isinstance(nested_file, dict): file_path = nested_file.get("path") or nested_file.get("file_path") user_id_from_file = nested_file.get("user_id") if not file_name: file_name = nested_file.get("filename") or nested_file.get("name") if not file_id: file_id = nested_file.get("id") # Try direct keys if not found if not file_path: file_path = file_info.get("path") or file_info.get("file_path") # If no direct path, search file by ID if not file_path and file_id: # Search in different possible paths for base_path in owui_upload_paths: if not base_path.exists(): continue # Format Open WebUI: {id}_{name} if file_name: candidate = base_path / f"{file_id}_{file_name}" if candidate.exists(): file_path = str(candidate) break # Essayer: /base/file_id candidate = base_path / file_id if candidate.exists(): file_path = str(candidate) break # Essayer: /base/user_id/file_id if user_id_from_file: candidate = base_path / user_id_from_file / file_id if candidate.exists(): file_path = str(candidate) break # Chercher par pattern {id}_* for f in base_path.glob(f"{file_id}_*"): file_path = str(f) if not file_name: file_name = f.name.split("_", 1)[1] if "_" in f.name else f.name break if file_path: break elif isinstance(file_info, str): file_path = file_info file_name = Path(file_info).name if not file_name: file_name = file_id or "unknown" # Security: clean filename (prevent traversal) file_name = Path(file_name).name # Keep only the name, not the path if not file_name or file_name in (".", ".."): file_name = file_id or "unknown" # Filter if filename specified if filename and file_name != filename: continue if not import_all and not filename: continue # Copy the file if file_path and isinstance(file_path, str): source = Path(file_path) if source.exists(): # Security: reject symlinks if source.is_symlink(): errors.append(f"{file_name}: symlinks not allowed") continue # Security: only regular files if not source.is_file(): errors.append(f"{file_name}: not a regular file") continue # Check file size file_size = source.stat().st_size max_size = self.valves.max_file_size_mb * 1024 * 1024 if file_size > max_size: errors.append(f"{file_name}: too large ({file_size / 1024 / 1024:.1f} MB > {self.valves.max_file_size_mb} MB)") continue # Check quota try: self._core._check_quota(__user__, file_size) except StorageError as quota_error: errors.append(f"{file_name}: {quota_error.message}") continue dest = uploads_dir / file_name shutil.copy2(source, dest) imported.append(file_name) else: errors.append(f"{file_name}: file not found") else: errors.append(f"{file_name}: source file not found") except Exception as e: errors.append(f"Error: {str(e)}") if not imported: return self._core._format_response( False, message="No matching files found", data={"errors": errors} if errors else None ) result_data = {"imported": imported, "count": len(imported)} if errors: result_data["errors"] = errors return self._core._format_response( True, data=result_data, message=f"Imported {len(imported)} file(s)" ) except Exception as e: return self._core._format_response(False, message=str(e)) # ========================================================================= # BUILTIN ZIP/UNZIP (Python zipfile - no external dependency) # ========================================================================= async def shed_unzip( self, zone: str, src: str, dest: str = "", __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Extracts a ZIP archive using Python zipfile (builtin, no external dependency). Works in Storage or Documents zones. :param zone: Target zone ("storage" or "documents") :param src: Path to ZIP file (relative to zone) :param dest: Destination folder (relative to zone). Empty = same folder as ZIP :return: List of extracted files as JSON Example: shed_unzip(zone="storage", src="downloads/repo.zip", dest="projects/repo") """ try: user_root = self._core._get_user_root(__user__) zone_lower = zone.lower() # Validate zone if zone_lower not in ("storage", "documents"): raise StorageError( "ZONE_FORBIDDEN", f"Zone '{zone}' not allowed for unzip", {"zone": zone}, "Use 'storage' or 'documents'" ) # Get zone path if zone_lower == "storage": zone_root = user_root / "Storage" / "data" else: zone_root = user_root / "Documents" / "data" # Validate and resolve paths src = self._core._validate_relative_path(src) src_path = self._core._resolve_chroot_path(zone_root, src) if not src_path.exists(): raise StorageError("FILE_NOT_FOUND", f"ZIP file not found: {src}") if not src_path.suffix.lower() == ".zip": raise StorageError( "INVALID_FORMAT", "File is not a ZIP archive", {"file": src}, "Only .zip files are supported" ) # Determine destination if dest: dest = self._core._validate_relative_path(dest) dest_path = self._core._resolve_chroot_path(zone_root, dest) else: dest_path = src_path.parent # Check quota before extraction (estimate: 3x zip size) zip_size = src_path.stat().st_size self._core._check_quota(__user__, zip_size * 3) # Extract self._core._ensure_dir(dest_path) extracted_files = [] with zipfile.ZipFile(src_path, 'r') as zf: # Security: check for path traversal in zip entries for member in zf.namelist(): # Block absolute paths and .. in zip entries if member.startswith('/') or '..' in member: raise StorageError( "PATH_ESCAPE", f"ZIP contains dangerous path: {member}", {"member": member}, "ZIP file may be malicious (path traversal attempt)" ) # Extract all files zf.extractall(dest_path) extracted_files = zf.namelist() # Git commit if Documents if zone_lower == "documents": docs_data = user_root / "Documents" / "data" self._core._git_run(["add", "-A"], cwd=docs_data) self._core._git_run( ["commit", "-m", f"Extracted {src} to {dest or 'same folder'}", "--allow-empty"], cwd=docs_data ) return self._core._format_response( True, data={ "source": src, "destination": str(dest_path.relative_to(zone_root)), "files_count": len(extracted_files), "files": extracted_files[:50], # Limit to first 50 "truncated": len(extracted_files) > 50, }, message=f"Extracted {len(extracted_files)} files" ) except StorageError as e: return self._core._format_error(e, "shed_unzip") except zipfile.BadZipFile: return self._core._format_response(False, message="Invalid or corrupted ZIP file") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_zip( self, zone: str, src: str, dest: str = "", include_empty_dirs: bool = False, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Creates a ZIP archive using Python zipfile (builtin, no external dependency). Works in Storage or Documents zones. :param zone: Source zone ("storage" or "documents") :param src: File or folder to compress (relative to zone) :param dest: Destination ZIP path (relative to zone). Empty = src + ".zip" :param include_empty_dirs: Include empty directories in archive (default: False, like standard zip) :return: Path to created ZIP as JSON Example: shed_zip(zone="storage", src="projects/myapp", dest="archives/myapp.zip") shed_zip(zone="storage", src="projects", dest="backup.zip", include_empty_dirs=True) """ try: user_root = self._core._get_user_root(__user__) zone_lower = zone.lower() # Validate zone if zone_lower not in ("storage", "documents"): raise StorageError( "ZONE_FORBIDDEN", f"Zone '{zone}' not allowed for zip", {"zone": zone}, "Use 'storage' or 'documents'" ) # Get zone path if zone_lower == "storage": zone_root = user_root / "Storage" / "data" else: zone_root = user_root / "Documents" / "data" # Validate and resolve source path src = self._core._validate_relative_path(src) src_path = self._core._resolve_chroot_path(zone_root, src) if not src_path.exists(): raise StorageError("FILE_NOT_FOUND", f"Source not found: {src}") # Determine destination if dest: dest = self._core._validate_relative_path(dest) if not dest.endswith('.zip'): dest += '.zip' dest_path = self._core._resolve_chroot_path(zone_root, dest) else: dest_path = src_path.parent / (src_path.name + ".zip") # Check quota (estimate: same size as source) src_size = self._core._get_path_size(src_path) self._core._check_quota(__user__, src_size) # Create ZIP self._core._ensure_dir(dest_path.parent) files_added = 0 dirs_added = 0 with zipfile.ZipFile(dest_path, 'w', zipfile.ZIP_DEFLATED) as zf: if src_path.is_file(): zf.write(src_path, src_path.name) files_added = 1 else: # Add directory recursively for item_path in src_path.rglob('*'): arcname = item_path.relative_to(src_path.parent) if item_path.is_file(): zf.write(item_path, arcname) files_added += 1 elif item_path.is_dir() and include_empty_dirs: # Check if directory is empty (no files, only subdirs or nothing) has_files = any(p.is_file() for p in item_path.rglob('*')) if not has_files: # Add empty directory entry (path must end with /) zf.writestr(str(arcname) + '/', '') dirs_added += 1 # Git commit if Documents if zone_lower == "documents": docs_data = user_root / "Documents" / "data" self._core._git_run(["add", "-A"], cwd=docs_data) self._core._git_run( ["commit", "-m", f"Created ZIP: {dest_path.name}", "--allow-empty"], cwd=docs_data ) zip_size = dest_path.stat().st_size response_data = { "source": src, "zip_path": str(dest_path.relative_to(zone_root)), "files_count": files_added, "size_bytes": zip_size, "size_human": f"{zip_size / 1024 / 1024:.2f} MB" if zip_size > 1024*1024 else f"{zip_size / 1024:.1f} KB", } if dirs_added > 0: response_data["empty_dirs_count"] = dirs_added message = f"Created ZIP with {files_added} files" if dirs_added > 0: message += f" and {dirs_added} empty directories" return self._core._format_response(True, data=response_data, message=message) except StorageError as e: return self._core._format_error(e, "shed_zip") except Exception as e: return self._core._format_response(False, message=str(e)) # ========================================================================= # BUILTIN UTILITIES - Replace missing system commands (5 functions) # ========================================================================= async def shed_tree( self, zone: str, path: str = ".", depth: int = 3, group: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Displays directory tree structure (replaces missing 'tree' command). :param zone: Target zone ("uploads", "storage", "documents", or "group") :param path: Starting path (default: root of zone) :param depth: Maximum depth to display (default: 3, max: 10) :param group: Group name (required if zone="group") :return: Tree structure as text Example: shed_tree(zone="storage", path="projects", depth=2) shed_tree(zone="group", group="MyTeam", path="docs") """ try: user_root = self._core._get_user_root(__user__) conv_id = self._core._get_conv_id(__metadata__) zone_lower = zone.lower() # Validate zone if zone_lower == "uploads": zone_root = user_root / "Uploads" / conv_id elif zone_lower == "storage": zone_root = user_root / "Storage" / "data" elif zone_lower == "documents": zone_root = user_root / "Documents" / "data" elif zone_lower == "group": if not group: raise StorageError( "MISSING_PARAMETER", "Group name is required for zone='group'", hint="Use: shed_tree(zone='group', group='GroupName', path='...')" ) # Resolve group and check membership group_id = self._core._validate_group_id(group) self._core._check_group_access(__user__, group_id) zone_root = self._core._get_group_data_path(group_id) else: raise StorageError( "ZONE_FORBIDDEN", f"Invalid zone: {zone}", hint="Use 'uploads', 'storage', 'documents', or 'group'" ) if not zone_root.exists(): return self._core._format_response(True, data={"tree": "(empty)"}, message="Zone is empty") # Validate and resolve path path = self._core._validate_relative_path(path) if path and path != "." else "" start_path = self._core._resolve_chroot_path(zone_root, path) if path else zone_root if not start_path.exists(): raise StorageError("FILE_NOT_FOUND", f"Path not found: {path}") # Clamp depth depth = max(1, min(depth, 10)) # Build tree def build_tree(current: Path, prefix: str = "", current_depth: int = 0) -> list: if current_depth >= depth: return [] lines = [] try: items = sorted(current.iterdir(), key=lambda x: (not x.is_dir(), x.name.lower())) except PermissionError: return [f"{prefix}[permission denied]"] # Filter out hidden files and limit items items = [i for i in items if not i.name.startswith('.')] total = len(items) for idx, item in enumerate(items[:100]): # Limit to 100 items per dir is_last = (idx == len(items[:100]) - 1) or (idx == 99 and total > 100) connector = "└── " if is_last else "├── " if item.is_dir(): lines.append(f"{prefix}{connector}{item.name}/") if current_depth + 1 < depth: extension = " " if is_last else "│ " lines.extend(build_tree(item, prefix + extension, current_depth + 1)) else: try: size = item.stat().st_size size_str = f"{size / 1024 / 1024:.1f}M" if size > 1024*1024 else f"{size / 1024:.1f}K" if size > 1024 else f"{size}B" except (OSError, FileNotFoundError): size_str = "?" lines.append(f"{prefix}{connector}{item.name} ({size_str})") if total > 100: lines.append(f"{prefix}... and {total - 100} more items") return lines # Generate tree root_name = start_path.name if path else (group if zone_lower == "group" else zone_lower.capitalize()) tree_lines = [f"{root_name}/"] tree_lines.extend(build_tree(start_path)) tree_output = "\n".join(tree_lines) zone_display = f"Group:{group}" if zone_lower == "group" else zone_lower.capitalize() return self._core._format_response( True, data={"tree": tree_output, "depth": depth, "path": path or ".", "zone": zone_display}, message=f"Tree of {zone_display}/{path or '.'} (depth={depth})" ) except StorageError as e: return self._core._format_error(e, "shed_tree") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_zipinfo( self, zone: str, path: str, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Shows ZIP archive contents and metadata (replaces missing 'zipinfo' command). :param zone: Target zone ("uploads", "storage", or "documents") :param path: Path to ZIP file :return: ZIP contents and metadata as JSON Example: shed_zipinfo(zone="storage", path="backup.zip") """ try: user_root = self._core._get_user_root(__user__) zone_lower = zone.lower() # Validate zone if zone_lower == "storage": zone_root = user_root / "Storage" / "data" elif zone_lower == "documents": zone_root = user_root / "Documents" / "data" elif zone_lower == "uploads": conv_id = self._core._get_conv_id(__metadata__) zone_root = user_root / "Uploads" / conv_id else: raise StorageError( "ZONE_FORBIDDEN", f"Invalid zone: {zone}", hint="Use 'uploads', 'storage', or 'documents'" ) # Validate and resolve path path = self._core._validate_relative_path(path) zip_path = self._core._resolve_chroot_path(zone_root, path) if not zip_path.exists(): raise StorageError("FILE_NOT_FOUND", f"File not found: {path}") if not zip_path.suffix.lower() == ".zip": raise StorageError( "INVALID_FORMAT", "File is not a ZIP archive", hint="Only .zip files are supported" ) # Read ZIP info with zipfile.ZipFile(zip_path, 'r') as zf: files = [] total_size = 0 total_compressed = 0 for info in zf.infolist(): total_size += info.file_size total_compressed += info.compress_size files.append({ "name": info.filename, "size": info.file_size, "compressed": info.compress_size, "ratio": f"{(1 - info.compress_size / info.file_size) * 100:.1f}%" if info.file_size > 0 else "0%", "date": f"{info.date_time[0]:04d}-{info.date_time[1]:02d}-{info.date_time[2]:02d} {info.date_time[3]:02d}:{info.date_time[4]:02d}", "is_dir": info.filename.endswith('/'), }) # Summary ratio = (1 - total_compressed / total_size) * 100 if total_size > 0 else 0 return self._core._format_response( True, data={ "path": path, "files_count": len(files), "total_size": total_size, "total_size_human": f"{total_size / 1024 / 1024:.2f} MB" if total_size > 1024*1024 else f"{total_size / 1024:.1f} KB", "compressed_size": total_compressed, "compression_ratio": f"{ratio:.1f}%", "files": files[:100], # Limit to 100 "truncated": len(files) > 100, }, message=f"ZIP contains {len(files)} files ({ratio:.1f}% compression)" ) except StorageError as e: return self._core._format_error(e, "shed_zipinfo") except zipfile.BadZipFile: return self._core._format_response(False, message="Invalid or corrupted ZIP file") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_file_type( self, zone: str, path: str, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Identifies file MIME type (replaces missing 'file' command). :param zone: Target zone ("uploads", "storage", or "documents") :param path: Path to file :return: File type information as JSON Example: shed_file_type(zone="storage", path="document.pdf") """ try: user_root = self._core._get_user_root(__user__) conv_id = self._core._get_conv_id(__metadata__) zone_lower = zone.lower() # Validate zone if zone_lower == "uploads": zone_root = user_root / "Uploads" / conv_id elif zone_lower == "storage": zone_root = user_root / "Storage" / "data" elif zone_lower == "documents": zone_root = user_root / "Documents" / "data" else: raise StorageError( "ZONE_FORBIDDEN", f"Invalid zone: {zone}", hint="Use 'uploads', 'storage', or 'documents'" ) # Validate and resolve path path = self._core._validate_relative_path(path) file_path = self._core._resolve_chroot_path(zone_root, path) if not file_path.exists(): raise StorageError("FILE_NOT_FOUND", f"File not found: {path}") if file_path.is_dir(): return self._core._format_response( True, data={"path": path, "type": "directory", "mime": "inode/directory"}, message="Directory" ) # Get MIME type from extension mime_type, encoding = mimetypes.guess_type(str(file_path)) # Read magic bytes for common formats magic_info = None try: with open(file_path, 'rb') as f: header = f.read(16) # Common magic bytes magic_signatures = { b'\x89PNG\r\n\x1a\n': 'PNG image', b'\xff\xd8\xff': 'JPEG image', b'GIF87a': 'GIF image (87a)', b'GIF89a': 'GIF image (89a)', b'PK\x03\x04': 'ZIP archive (or DOCX/XLSX/PPTX/JAR)', b'PK\x05\x06': 'ZIP archive (empty)', b'%PDF': 'PDF document', b'\x7fELF': 'ELF executable', b'#!': 'Script (shebang)', b'\x1f\x8b': 'Gzip compressed', b'BZ': 'Bzip2 compressed', b'\xfd7zXZ': 'XZ compressed', b'Rar!': 'RAR archive', b'7z\xbc\xaf': '7-Zip archive', b'\x00\x00\x00\x1c\x66\x74\x79\x70': 'MP4/MOV video', b'\x00\x00\x00\x20\x66\x74\x79\x70': 'MP4 video', b'ID3': 'MP3 audio (ID3)', b'\xff\xfb': 'MP3 audio', b'OggS': 'Ogg container', b'RIFF': 'RIFF container (WAV/AVI)', b'SQLite format 3': 'SQLite database', } for sig, desc in magic_signatures.items(): if header.startswith(sig): magic_info = desc break # Check for text if not magic_info: try: # Try to decode as UTF-8 with open(file_path, 'r', encoding='utf-8') as tf: tf.read(1024) magic_info = "Text file (UTF-8)" except UnicodeDecodeError: magic_info = "Binary file" except Exception: pass # Get file stats stat = file_path.stat() return self._core._format_response( True, data={ "path": path, "mime": mime_type or "application/octet-stream", "encoding": encoding, "magic": magic_info, "extension": file_path.suffix, "size": stat.st_size, "size_human": f"{stat.st_size / 1024 / 1024:.2f} MB" if stat.st_size > 1024*1024 else f"{stat.st_size / 1024:.1f} KB" if stat.st_size > 1024 else f"{stat.st_size} B", }, message=f"{magic_info or mime_type or 'Unknown type'}" ) except StorageError as e: return self._core._format_error(e, "shed_file_type") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_convert_eol( self, zone: str, path: str, to: str = "unix", __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Converts line endings (replaces missing 'dos2unix'/'unix2dos' commands). :param zone: Target zone ("storage" or "documents") :param path: Path to text file :param to: Target format: "unix" (LF) or "dos" (CRLF) :return: Conversion result as JSON Example: shed_convert_eol(zone="storage", path="script.sh", to="unix") shed_convert_eol(zone="storage", path="readme.txt", to="dos") """ try: user_root = self._core._get_user_root(__user__) zone_lower = zone.lower() # Validate zone (not uploads - read-only) if zone_lower == "storage": zone_root = user_root / "Storage" / "data" elif zone_lower == "documents": zone_root = user_root / "Documents" / "data" else: raise StorageError( "ZONE_FORBIDDEN", f"Invalid zone for writing: {zone}", hint="Use 'storage' or 'documents'" ) # Validate target format to_lower = to.lower() if to_lower not in ("unix", "dos", "lf", "crlf"): raise StorageError( "INVALID_MODE", f"Invalid EOL format: {to}", hint="Use 'unix' (LF) or 'dos' (CRLF)" ) # Normalize format name to_unix = to_lower in ("unix", "lf") # Validate and resolve path path = self._core._validate_relative_path(path) file_path = self._core._resolve_chroot_path(zone_root, path) if not file_path.exists(): raise StorageError("FILE_NOT_FOUND", f"File not found: {path}") if file_path.is_dir(): raise StorageError("INVALID_FORMAT", "Cannot convert directory") # Read file try: content = file_path.read_bytes() except Exception as e: raise StorageError("EXEC_ERROR", f"Cannot read file: {e}") # Count existing line endings crlf_count = content.count(b'\r\n') lf_only_count = content.count(b'\n') - crlf_count cr_only_count = content.count(b'\r') - crlf_count original_format = "mixed" if crlf_count > 0 and lf_only_count == 0: original_format = "dos" elif lf_only_count > 0 and crlf_count == 0: original_format = "unix" elif cr_only_count > 0 and crlf_count == 0 and lf_only_count == 0: original_format = "mac (old)" # Convert if to_unix: # To Unix: CRLF -> LF, CR -> LF new_content = content.replace(b'\r\n', b'\n').replace(b'\r', b'\n') target_format = "unix" else: # To DOS: First normalize to LF, then convert to CRLF normalized = content.replace(b'\r\n', b'\n').replace(b'\r', b'\n') new_content = normalized.replace(b'\n', b'\r\n') target_format = "dos" # Check if changed if new_content == content: return self._core._format_response( True, data={"path": path, "format": original_format, "changed": False}, message=f"File already in {target_format} format" ) # Write back file_path.write_bytes(new_content) # Git commit if Documents if zone_lower == "documents": self._core._git_commit(zone_root, f"Convert EOL to {target_format}: {path}") return self._core._format_response( True, data={ "path": path, "original_format": original_format, "new_format": target_format, "changed": True, "lines_converted": crlf_count if to_unix else lf_only_count, }, message=f"Converted {path} from {original_format} to {target_format}" ) except StorageError as e: return self._core._format_error(e, "shed_convert_eol") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_hexdump( self, zone: str, path: str, offset: int = 0, length: int = 256, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Shows hexadecimal dump of file (replaces missing 'xxd'/'hexdump' commands). :param zone: Target zone ("uploads", "storage", or "documents") :param path: Path to file :param offset: Starting offset in bytes (default: 0) :param length: Number of bytes to display (default: 256, max: 4096) :return: Hex dump as text Example: shed_hexdump(zone="storage", path="binary.dat", offset=0, length=128) """ try: user_root = self._core._get_user_root(__user__) conv_id = self._core._get_conv_id(__metadata__) zone_lower = zone.lower() # Validate zone if zone_lower == "uploads": zone_root = user_root / "Uploads" / conv_id elif zone_lower == "storage": zone_root = user_root / "Storage" / "data" elif zone_lower == "documents": zone_root = user_root / "Documents" / "data" else: raise StorageError( "ZONE_FORBIDDEN", f"Invalid zone: {zone}", hint="Use 'uploads', 'storage', or 'documents'" ) # Validate and resolve path path = self._core._validate_relative_path(path) file_path = self._core._resolve_chroot_path(zone_root, path) if not file_path.exists(): raise StorageError("FILE_NOT_FOUND", f"File not found: {path}") if file_path.is_dir(): raise StorageError("INVALID_FORMAT", "Cannot hexdump directory") # Clamp values offset = max(0, offset) length = max(1, min(length, 4096)) # Read file portion file_size = file_path.stat().st_size if offset >= file_size: return self._core._format_response( True, data={"path": path, "offset": offset, "size": file_size, "hexdump": "(offset beyond file)"}, message="Offset is beyond end of file" ) with open(file_path, 'rb') as f: f.seek(offset) data = f.read(length) # Format hex dump (xxd style) lines = [] for i in range(0, len(data), 16): chunk = data[i:i+16] addr = f"{offset + i:08x}" # Hex part hex_parts = [] for j in range(0, 16, 2): if j < len(chunk): if j + 1 < len(chunk): hex_parts.append(f"{chunk[j]:02x}{chunk[j+1]:02x}") else: hex_parts.append(f"{chunk[j]:02x} ") else: hex_parts.append(" ") hex_str = " ".join(hex_parts) # ASCII part ascii_str = "" for b in chunk: if 32 <= b < 127: ascii_str += chr(b) else: ascii_str += "." lines.append(f"{addr}: {hex_str} {ascii_str}") hexdump_output = "\n".join(lines) return self._core._format_response( True, data={ "path": path, "offset": offset, "length": len(data), "file_size": file_size, "hexdump": hexdump_output, }, message=f"Hexdump of {path} ({len(data)} bytes from offset {offset})" ) except StorageError as e: return self._core._format_error(e, "shed_hexdump") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_sqlite( self, zone: str, path: str, query: str = None, params: list = None, limit: int = None, output_csv: str = None, import_csv: str = None, table: str = None, if_exists: str = "fail", delimiter: str = None, encoding: str = None, date_columns: list = None, date_format: str = None, decimal: str = None, skip_rows: int = 0, has_header: bool = True, group: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Executes SQL query on a SQLite database file OR imports a CSV file. Python builtin using sqlite3 module, no external dependency. ⚠️ CONTEXT PROTECTION: SELECT queries without LIMIT return only 10 rows by default! Use limit=N for more rows, or output_csv="file.csv" to export all results to file. :param zone: Target zone ("uploads", "storage", "documents", or "group") :param path: Path to .db file (created if not exists for write queries) :param query: SQL query to execute (optional if import_csv is provided) :param params: Optional list of parameters for parameterized queries (prevents SQL injection) :param limit: Max rows to return for SELECT (default: 10 if no LIMIT in query). Use limit=0 for no limit (dangerous!). :param output_csv: Export ALL results to this CSV file instead of returning rows (prevents context pollution) :param import_csv: Path to CSV file to import (in same zone). If provided, imports CSV instead of running query. :param table: Table name for CSV import (required if import_csv is provided) :param if_exists: What to do if table exists: "fail" (error), "replace" (drop+recreate), "append" (add rows) :param delimiter: CSV delimiter. None=auto-detect, or specify: ",", ";", "\\t", "|" :param encoding: File encoding. None=auto-detect (tries utf-8, latin-1, cp1252), or specify: "utf-8", "latin-1", "cp1252", etc. :param date_columns: List of column names to parse as dates, e.g. ["created_at", "updated_at"] :param date_format: Date format: None=auto, "dayfirst" (DD/MM/YYYY), "monthfirst" (MM/DD/YYYY), or strptime format like "%d/%m/%Y" :param decimal: Decimal separator for numbers. None="." (default), or "," for European format (1.234,56) :param skip_rows: Number of rows to skip at the beginning (before header). Default: 0 :param has_header: True if first row (after skip_rows) contains column names, False if data only. Default: True. ⚠️ If False, columns are named col_1, col_2, etc. :param group: Group ID or name (required if zone="group") :return: Query results or import stats as JSON Examples: # === SQL QUERIES === # Basic SELECT (returns max 10 rows by default) shed_sqlite(zone="storage", path="data.db", query="SELECT * FROM users") # → Returns 10 rows + warning if more exist # Request more rows explicitly shed_sqlite(zone="storage", path="data.db", query="SELECT * FROM users", limit=100) # → Returns up to 100 rows # Use LIMIT in SQL (respected as-is) shed_sqlite(zone="storage", path="data.db", query="SELECT * FROM users LIMIT 50") # → Returns 50 rows, no warning # Export ALL results to CSV (no context pollution!) shed_sqlite(zone="storage", path="data.db", query="SELECT * FROM users", output_csv="users_export.csv") # → Writes all rows to file, returns stats only # Create a table shed_sqlite(zone="storage", path="data.db", query="CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY, name TEXT, email TEXT)") # Insert with parameters (safe from SQL injection) shed_sqlite(zone="storage", path="data.db", query="INSERT INTO users (name, email) VALUES (?, ?)", params=["Alice", "alice@example.com"]) # === CSV IMPORT (FAST, NO CONTEXT POLLUTION) === # Basic import (auto-detects delimiter and encoding) shed_sqlite(zone="storage", path="data.db", import_csv="users.csv", table="users") # French/European CSV (semicolon, comma decimal, latin-1) shed_sqlite(zone="storage", path="data.db", import_csv="french_data.csv", table="sales", delimiter=";", decimal=",", encoding="latin-1") # With date parsing (European format DD/MM/YYYY) shed_sqlite(zone="storage", path="data.db", import_csv="orders.csv", table="orders", date_columns=["order_date", "ship_date"], date_format="dayfirst") # TSV file (tab-separated) shed_sqlite(zone="storage", path="data.db", import_csv="data.tsv", table="data", delimiter="\\t") # Skip header rows (e.g., file has title + empty row before headers) shed_sqlite(zone="storage", path="data.db", import_csv="report.csv", table="report", skip_rows=2) # CSV without header row (data only, columns named col_1, col_2, ...) shed_sqlite(zone="storage", path="data.db", import_csv="raw_data.csv", table="raw", has_header=False) # Replace existing table shed_sqlite(zone="storage", path="data.db", import_csv="updated.csv", table="users", if_exists="replace") Note: Uses pandas if available (best auto-detection), falls back to csv module. CSV import keeps data on disk - no context pollution! """ try: user_root = self._core._get_user_root(__user__) conv_id = self._core._get_conv_id(__metadata__) zone_lower = zone.lower() # Determine the zone root if zone_lower == "uploads": zone_root = user_root / "Uploads" / conv_id readonly = True elif zone_lower == "storage": zone_root = user_root / "Storage" / "data" readonly = False elif zone_lower == "documents": zone_root = user_root / "Documents" / "data" readonly = False elif zone_lower == "group": if not group: raise StorageError( "MISSING_PARAMETER", "Group parameter required when zone='group'", hint="Add group='group_name' parameter" ) # Validate and resolve group group = self._core._validate_group_id(group) self._core._check_group_access(__user__, group) zone_root = Path(self.valves.storage_base_path) / "groups" / group / "data" readonly = False else: raise StorageError( "ZONE_FORBIDDEN", f"Invalid zone: {zone}", hint="Use 'uploads', 'storage', 'documents', or 'group'" ) # Validate and resolve path path = self._core._validate_relative_path(path) db_path = self._core._resolve_chroot_path(zone_root, path) # Ensure parent directory exists db_path.parent.mkdir(parents=True, exist_ok=True) # ===================================================== # CSV IMPORT MODE # ===================================================== if import_csv: # Validate parameters if not table: raise StorageError( "MISSING_PARAMETER", "table parameter required for CSV import", hint="Add table='tablename' parameter" ) if if_exists not in ("fail", "replace", "append"): raise StorageError( "INVALID_PARAMETER", f"Invalid if_exists value: {if_exists}", hint="Use 'fail', 'replace', or 'append'" ) # Block CSV import in readonly zones if readonly: raise StorageError( "ZONE_FORBIDDEN", "CSV import not allowed in Uploads zone", hint="Move the CSV to Storage or Documents first, then import" ) # Validate table name (prevent SQL injection) if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', table): raise StorageError( "INVALID_PARAMETER", "Invalid table name", {"table": table}, hint="Table name must be alphanumeric with underscores, starting with letter or underscore" ) # Resolve CSV path (in same zone) import_csv_path = self._core._validate_relative_path(import_csv) csv_path = self._core._resolve_chroot_path(zone_root, import_csv_path) if not csv_path.exists(): raise StorageError( "FILE_NOT_FOUND", f"CSV file not found: {import_csv}", hint="Check the path is correct and relative to the zone" ) # Try to use pandas (best auto-detection), fallback to csv module use_pandas = False try: import pandas as pd use_pandas = True except ImportError: pass conn = sqlite3.connect(str(db_path), timeout=30.0) try: cursor = conn.cursor() # Check if table exists cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?", (table,)) table_exists = cursor.fetchone() is not None if table_exists: if if_exists == "fail": conn.close() raise StorageError( "TABLE_EXISTS", f"Table '{table}' already exists", hint="Use if_exists='replace' or if_exists='append'" ) elif if_exists == "replace": cursor.execute(f"DROP TABLE IF EXISTS {table}") table_exists = False import_info = {"method": "unknown"} if use_pandas: # ===================================================== # PANDAS IMPORT (best auto-detection) # ===================================================== import_info["method"] = "pandas" # Build pandas read_csv arguments pd_kwargs = {} # Delimiter: auto-detect or specified if delimiter: pd_kwargs['sep'] = delimiter.replace('\\t', '\t') else: pd_kwargs['sep'] = None # Auto-detect pd_kwargs['engine'] = 'python' # Required for sep=None # Encoding: auto-detect or specified if encoding: pd_kwargs['encoding'] = encoding else: # Try common encodings detected_encoding = None for enc in ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1']: try: with open(csv_path, 'r', encoding=enc) as test_f: test_f.read(8192) detected_encoding = enc break except (UnicodeDecodeError, UnicodeError): continue pd_kwargs['encoding'] = detected_encoding or 'utf-8' import_info['detected_encoding'] = pd_kwargs['encoding'] # Skip rows if skip_rows > 0: pd_kwargs['skiprows'] = skip_rows # Decimal separator if decimal: pd_kwargs['decimal'] = decimal # Date parsing if date_columns: pd_kwargs['parse_dates'] = date_columns if date_format: if date_format == 'dayfirst': pd_kwargs['dayfirst'] = True elif date_format == 'monthfirst': pd_kwargs['dayfirst'] = False elif date_format != 'auto': pd_kwargs['date_format'] = date_format # Handle NA values pd_kwargs['na_values'] = ['', 'NA', 'N/A', 'NULL', 'null', 'None', 'none', '#N/A', '#NA'] pd_kwargs['keep_default_na'] = True # Handle header if not has_header: pd_kwargs['header'] = None # No header row in file # Read CSV try: df = pd.read_csv(str(csv_path), **pd_kwargs) except Exception as e: conn.close() raise StorageError( "CSV_PARSE_ERROR", f"Failed to parse CSV with pandas: {str(e)}", {"csv": import_csv, "pandas_args": {k: str(v) for k, v in pd_kwargs.items()}}, hint="Try specifying delimiter, encoding, or skip_rows explicitly" ) # Get detected delimiter if auto-detected if 'sep' in pd_kwargs and pd_kwargs['sep'] is None: # pandas doesn't expose detected delimiter easily, so we sniff it import csv as csv_module try: with open(csv_path, 'r', encoding=pd_kwargs.get('encoding', 'utf-8')) as sniff_f: sample = sniff_f.read(8192) dialect = csv_module.Sniffer().sniff(sample) import_info['detected_delimiter'] = repr(dialect.delimiter) except Exception: pass # Generate or sanitize column names if not has_header: # No header: generate col_1, col_2, ... df.columns = [f"col_{i+1}" for i in range(len(df.columns))] import_info['generated_columns'] = True else: # Sanitize column names from header clean_columns = [] for col in df.columns: clean = re.sub(r'[^\w]', '_', str(col).strip()) if not clean or clean[0].isdigit(): clean = '_' + clean clean_columns.append(clean) df.columns = clean_columns # Import to SQLite pandas_if_exists = 'append' if if_exists == 'append' and table_exists else 'replace' df.to_sql(table, conn, if_exists=pandas_if_exists, index=False) total_rows = len(df) clean_headers = list(df.columns) # Get column types import_info['column_types'] = {col: str(df[col].dtype) for col in df.columns} else: # ===================================================== # CSV MODULE FALLBACK (always available) # ===================================================== import csv as csv_module import_info["method"] = "csv_module" # Detect encoding if not specified file_encoding = encoding if not file_encoding: for enc in ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1']: try: with open(csv_path, 'r', encoding=enc) as test_f: test_f.read(8192) file_encoding = enc break except (UnicodeDecodeError, UnicodeError): continue file_encoding = file_encoding or 'utf-8' import_info['detected_encoding'] = file_encoding # Detect delimiter using Sniffer if not specified csv_delimiter = delimiter.replace('\\t', '\t') if delimiter else None if not csv_delimiter: try: with open(csv_path, 'r', encoding=file_encoding) as sniff_f: sample = sniff_f.read(8192) dialect = csv_module.Sniffer().sniff(sample, delimiters=',;\t|') csv_delimiter = dialect.delimiter import_info['detected_delimiter'] = repr(csv_delimiter) except csv_module.Error: csv_delimiter = ',' # Default to comma # Read and import CSV with open(csv_path, 'r', newline='', encoding=file_encoding) as f: # Skip rows if needed for _ in range(skip_rows): next(f, None) reader = csv_module.reader(f, delimiter=csv_delimiter) # Handle header row if has_header: headers = next(reader) # First row = column names # Sanitize column names clean_headers = [] for h in headers: clean = re.sub(r'[^\w]', '_', h.strip()) if not clean or clean[0].isdigit(): clean = '_' + clean clean_headers.append(clean) first_data_row = None else: # No header: first row is data, generate column names first_data_row = next(reader, None) if first_data_row is None: conn.close() raise StorageError( "CSV_EMPTY", "CSV file is empty (no data rows)", {"csv": import_csv} ) clean_headers = [f"col_{i+1}" for i in range(len(first_data_row))] import_info['generated_columns'] = True # Create table if needed if not table_exists or if_exists == "replace": columns_def = ", ".join(f"{col} TEXT" for col in clean_headers) cursor.execute(f"CREATE TABLE {table} ({columns_def})") # Prepare INSERT statement placeholders = ", ".join("?" * len(clean_headers)) insert_sql = f"INSERT INTO {table} VALUES ({placeholders})" # Date parsing setup date_col_indices = [] if date_columns: for dc in date_columns: # Find column index (case-insensitive, sanitized) dc_clean = re.sub(r'[^\w]', '_', dc.strip()) if not dc_clean or dc_clean[0].isdigit(): dc_clean = '_' + dc_clean try: idx = clean_headers.index(dc_clean) date_col_indices.append(idx) except ValueError: # Try original name for i, h in enumerate(clean_headers): if h.lower() == dc.lower() or h.lower() == dc_clean.lower(): date_col_indices.append(i) break # Batch insert for performance batch_size = 1000 batch = [] total_rows = 0 # If has_header=False, we already read the first data row # Create an iterator that includes it if first_data_row is not None: import itertools all_rows = itertools.chain([first_data_row], reader) else: all_rows = reader for row in all_rows: # Pad or truncate row to match headers if len(row) < len(clean_headers): row = list(row) + [''] * (len(clean_headers) - len(row)) elif len(row) > len(clean_headers): row = list(row[:len(clean_headers)]) else: row = list(row) # Process decimal separator if specified if decimal and decimal != '.': for i, val in enumerate(row): if val and i not in date_col_indices: # Try to convert European decimal format try: # Remove thousand separators (spaces or dots) and replace decimal cleaned = val.replace(' ', '').replace('.', '') cleaned = cleaned.replace(decimal, '.') float(cleaned) # Test if it's a number row[i] = cleaned except (ValueError, AttributeError): pass # Not a number, keep original # Process date columns if date_col_indices and date_format: from datetime import datetime for idx in date_col_indices: if idx < len(row) and row[idx]: try: if date_format == 'dayfirst': # Try common European formats for fmt in ['%d/%m/%Y', '%d-%m-%Y', '%d.%m.%Y', '%d/%m/%y', '%d-%m-%y']: try: dt = datetime.strptime(row[idx], fmt) row[idx] = dt.strftime('%Y-%m-%d') break except ValueError: continue elif date_format == 'monthfirst': # Try common US formats for fmt in ['%m/%d/%Y', '%m-%d-%Y', '%m/%d/%y', '%m-%d-%y']: try: dt = datetime.strptime(row[idx], fmt) row[idx] = dt.strftime('%Y-%m-%d') break except ValueError: continue elif date_format != 'auto': dt = datetime.strptime(row[idx], date_format) row[idx] = dt.strftime('%Y-%m-%d') except (ValueError, TypeError): pass # Keep original value batch.append(tuple(row)) if len(batch) >= batch_size: cursor.executemany(insert_sql, batch) total_rows += len(batch) batch = [] # Insert remaining rows if batch: cursor.executemany(insert_sql, batch) total_rows += len(batch) conn.commit() conn.close() response_data = { "db_path": path, "csv_path": import_csv, "table": table, "columns": clean_headers, "rows_imported": total_rows, "if_exists": if_exists, "import_info": import_info, } if delimiter: response_data["delimiter"] = delimiter if encoding: response_data["encoding"] = encoding if date_columns: response_data["date_columns"] = date_columns if decimal: response_data["decimal"] = decimal return self._core._format_response( True, data=response_data, message=f"Imported {total_rows} rows from '{import_csv}' into table '{table}' (using {import_info['method']})" ) except StorageError: raise except sqlite3.Error as e: conn.close() raise StorageError( "EXEC_ERROR", f"SQLite error during import: {str(e)}", {"csv": import_csv, "table": table} ) except Exception as e: conn.close() raise StorageError( "EXEC_ERROR", f"CSV import error: {str(e)}", {"csv": import_csv, "table": table}, hint="Try specifying delimiter, encoding, or check CSV format" ) # ===================================================== # SQL QUERY MODE # ===================================================== if not query: raise StorageError( "MISSING_PARAMETER", "Either 'query' or 'import_csv' parameter is required", hint="Provide a SQL query or a CSV file to import" ) # Check if this is a read or write query query_stripped = query.strip().upper() is_read_query = query_stripped.startswith(("SELECT", "PRAGMA", "EXPLAIN")) # Block write operations in readonly zones if readonly and not is_read_query: raise StorageError( "ZONE_FORBIDDEN", "Write operations not allowed in Uploads zone", hint="Move the database to Storage or Documents first" ) # Block dangerous operations dangerous_patterns = [ "ATTACH", "DETACH", # Could access other databases "LOAD_EXTENSION", # Could load malicious code ] for pattern in dangerous_patterns: if pattern in query_stripped: raise StorageError( "COMMAND_FORBIDDEN", f"SQL operation '{pattern}' is not allowed for security reasons" ) # Execute the query params = params or [] conn = sqlite3.connect(str(db_path), timeout=10.0) conn.row_factory = sqlite3.Row try: cursor = conn.cursor() cursor.execute(query, params) if is_read_query: # Get column names first columns = [description[0] for description in cursor.description] if cursor.description else [] # Check if user wants CSV export (all results, no context pollution) if output_csv: # Export all results to CSV file import csv as csv_module output_csv_path = self._core._validate_relative_path(output_csv) csv_path = self._core._resolve_chroot_path(zone_root, output_csv_path) self._core._ensure_dir(csv_path.parent) row_count = 0 with open(csv_path, 'w', newline='', encoding='utf-8') as f: writer = csv_module.writer(f) writer.writerow(columns) # Header # Fetch and write in batches to handle large results while True: batch = cursor.fetchmany(1000) if not batch: break for row in batch: writer.writerow(list(row)) row_count += 1 conn.close() return self._core._format_response( True, data={ "path": path, "query": query, "output_csv": output_csv, "rows_exported": row_count, "columns": columns, }, message=f"Exported {row_count} row(s) to {output_csv}" ) # Check if query already has LIMIT has_limit = bool(re.search(r'\bLIMIT\s+\d+', query_stripped)) # Determine effective limit DEFAULT_LIMIT = 10 if has_limit: # User specified LIMIT in SQL - respect it rows = cursor.fetchall() total_rows = len(rows) results = [dict(zip(columns, row)) for row in rows] if rows else [] truncated = False effective_limit = None else: # No LIMIT in query - apply protection if limit is None: effective_limit = DEFAULT_LIMIT elif limit == 0: effective_limit = None # No limit (dangerous but explicit) else: effective_limit = limit if effective_limit: # First count total rows (for user info) rows = cursor.fetchall() total_rows = len(rows) # Truncate if needed if total_rows > effective_limit: results = [dict(zip(columns, row)) for row in rows[:effective_limit]] truncated = True else: results = [dict(zip(columns, row)) for row in rows] truncated = False else: # limit=0: no limit (user explicitly requested all) rows = cursor.fetchall() total_rows = len(rows) results = [dict(zip(columns, row)) for row in rows] if rows else [] truncated = False conn.close() # Build response response_data = { "path": path, "query": query, "row_count": len(results), "columns": columns, "rows": results, } if truncated: response_data["total_rows"] = total_rows response_data["truncated"] = True response_data["limit_applied"] = effective_limit message = ( f"⚠️ No LIMIT in query. Showing {len(results)}/{total_rows} rows to protect context. " f"Options: (1) Add LIMIT to SQL, (2) Use limit=N parameter for more rows, " f"(3) Use output_csv='results.csv' to export ALL {total_rows} rows to file." ) else: message = f"Query returned {len(results)} row(s)" return self._core._format_response(True, data=response_data, message=message) else: # For INSERT/UPDATE/DELETE/CREATE/etc. conn.commit() rowcount = cursor.rowcount lastrowid = cursor.lastrowid conn.close() return self._core._format_response( True, data={ "path": path, "query": query, "rows_affected": rowcount, "last_row_id": lastrowid, }, message=f"Query executed successfully ({rowcount} row(s) affected)" ) except sqlite3.Error as e: conn.close() raise StorageError( "EXEC_ERROR", f"SQLite error: {str(e)}", {"query": query}, hint="Check your SQL syntax" ) except StorageError as e: return self._core._format_error(e, "shed_sqlite") except Exception as e: return self._core._format_response(False, message=str(e)) # ========================================================================= # DOWNLOAD LINKS (3 functions) # ========================================================================= async def shed_link_create( self, zone: str, path: str, group: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Create a download link for a file. Uploads the file to Open WebUI's file system and returns a download URL. The link works while the user is logged in (uses session cookie). Works with any file type: PDF, images, ZIP, CSV, etc. :param zone: Zone to read from ("uploads", "storage", "documents", "group") :param path: Relative path to the file :param group: Group name (required if zone="group") :return: JSON with download_url and file_id Related functions: shed_link_list() - List all download links shed_link_delete() - Remove a download link Examples: shed_link_create(zone="storage", path="exports/report.pdf") shed_link_create(zone="storage", path="archives/data.zip") shed_link_create(zone="group", group="team", path="shared/presentation.pptx") """ try: # Resolve zone and path zone_lower = zone.lower() user_root = self._core._get_user_root(__user__) if zone_lower == "uploads": conv_id = self._core._get_conv_id(__metadata__) chroot = user_root / "Uploads" / conv_id elif zone_lower == "storage": chroot = user_root / "Storage" / "data" elif zone_lower == "documents": chroot = user_root / "Documents" / "data" elif zone_lower == "group": if not group: raise StorageError( "MISSING_GROUP", "Group name required for group zone", {"zone": zone}, "Provide group parameter: shed_link_create(zone='group', group='team', path='...')" ) group_id = self._core._validate_group_id(group) self._core._check_group_access(__user__, group_id) chroot = Path(self.valves.storage_base_path) / "groups" / group_id / "data" else: raise StorageError( "INVALID_ZONE", f"Invalid zone: {zone}", {"zone": zone, "valid_zones": ["uploads", "storage", "documents", "group"]}, "Use one of: uploads, storage, documents, group" ) # Resolve and validate path filepath = self._core._resolve_chroot_path(chroot, path) if not filepath.exists(): raise StorageError( "FILE_NOT_FOUND", f"File not found: {path}", {"zone": zone, "path": path} ) if not filepath.is_file(): raise StorageError( "NOT_A_FILE", f"Path is not a file: {path}", {"zone": zone, "path": path} ) # Use Open WebUI internal Python API via Bridge (no HTTP request = no deadlock) # Get user ID user_id = __user__.get("id") if not user_id: raise StorageError( "NO_USER_ID", "User ID not available", {}, "This should not happen - contact administrator" ) # Generate unique file ID file_id = str(uuid.uuid4()) filename = filepath.name file_size = filepath.stat().st_size # Detect content type content_type, _ = mimetypes.guess_type(filename) if not content_type: content_type = "application/octet-stream" # Copy file to Open WebUI uploads directory uploads_dir = Path("/app/backend/data/uploads") uploads_dir.mkdir(parents=True, exist_ok=True) dest_path = uploads_dir / f"{file_id}_{filename}" shutil.copy2(filepath, dest_path) try: # Create database entry using Bridge (isolates Open WebUI API changes) bridge = _OpenWebUIBridge() file_item = bridge.insert_file( user_id=user_id, file_id=file_id, filename=filename, file_path=str(dest_path), content_type=content_type, file_size=file_size, metadata={"fileshed_link": True, "source_zone": zone, "source_path": path} ) if not file_item: # Clean up copied file on failure dest_path.unlink(missing_ok=True) raise StorageError( "DB_ERROR", "Failed to create file entry in database", {"file_id": file_id} ) # Build download URL (full URL with base from valve) base_url = self.valves.openwebui_api_url.rstrip('/') download_url = f"{base_url}/api/v1/files/{file_id}/content" # Markdown clickable link for easy copy-paste by LLM clickable_link = f"[📥 Download {filename}]({download_url})" return self._core._format_response(True, { "file_id": file_id, "download_url": download_url, "clickable_link": clickable_link, "filename": filename, "size_bytes": file_size, "zone": zone, "path": path, }, message=f"Link ready: {clickable_link}") except StorageError: # Clean up on failure dest_path.unlink(missing_ok=True) raise except Exception as e: dest_path.unlink(missing_ok=True) raise StorageError( "INTERNAL_API_ERROR", f"Error calling Open WebUI API: {e}", {}, "Check Open WebUI version compatibility" ) except StorageError as e: return self._core._format_error(e, "shed_link_create") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_link_list( self, __user__: dict = {}, ) -> str: """ List all download links created by the current user. :return: JSON with list of links (file_id, filename, download_url, size) Related functions: shed_link_create() - Create a new download link shed_link_delete() - Remove a download link Examples: shed_link_list() """ try: user_id = __user__.get("id") if not user_id: raise StorageError( "NO_USER_ID", "User ID not available", {}, "This should not happen - contact administrator" ) # Use Bridge to get user's files bridge = _OpenWebUIBridge() bridge._ensure_initialized() # Get files using the internal API all_files = bridge._files_class.get_files_by_user_id(user_id) # Filter only files created by Fileshed (have fileshed_link marker) files = [] if all_files: for f in all_files: if hasattr(f, 'meta') and f.meta and isinstance(f.meta, dict): meta_data = f.meta.get('data', {}) if isinstance(meta_data, dict) and meta_data.get('fileshed_link') == True: files.append(f) if not files: return self._core._format_response(True, { "files": [], "count": 0 }, message="No download links found") # Format file list base_url = self.valves.openwebui_api_url.rstrip('/') file_list = [] for f in files: download_url = f"{base_url}/api/v1/files/{f.id}/content" file_info = { "file_id": f.id, "filename": f.filename, "download_url": download_url, "clickable_link": f"[📥 {f.filename}]({download_url})", "created_at": f.created_at, } # Add size and source info if available in meta if hasattr(f, 'meta') and f.meta and isinstance(f.meta, dict): file_info["size_bytes"] = f.meta.get("size") file_info["content_type"] = f.meta.get("content_type") meta_data = f.meta.get('data', {}) if isinstance(meta_data, dict): file_info["source_zone"] = meta_data.get("source_zone") file_info["source_path"] = meta_data.get("source_path") file_list.append(file_info) return self._core._format_response(True, { "files": file_list, "count": len(file_list) }, message=f"Found {len(file_list)} download link(s)") except StorageError as e: return self._core._format_error(e, "shed_link_list") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_link_delete( self, file_id: str, __user__: dict = {}, ) -> str: """ Remove a download link from Open WebUI. This deletes both the database entry and the physical file. :param file_id: The file ID returned by shed_link_create() or shed_link_list() :return: JSON with success status Related functions: shed_link_create() - Create a new download link shed_link_list() - List all download links Examples: shed_link_delete(file_id="317ef925-c87a-44fd-8d29-acdccb8e6070") """ try: user_id = __user__.get("id") if not user_id: raise StorageError( "NO_USER_ID", "User ID not available", {}, "This should not happen - contact administrator" ) if not file_id: raise StorageError( "MISSING_FILE_ID", "file_id parameter is required", {}, "Use shed_link_list() to get file IDs" ) # Use Bridge to get and verify file ownership bridge = _OpenWebUIBridge() file_item = bridge.get_file_by_id(file_id) if not file_item: raise StorageError( "FILE_NOT_FOUND", f"File not found: {file_id}", {"file_id": file_id}, "Use shed_link_list() to see your download links" ) # Verify ownership if file_item.user_id != user_id: raise StorageError( "ACCESS_DENIED", "You can only delete your own download links", {"file_id": file_id} ) # Verify this is a Fileshed-created link (not a user upload) is_fileshed_link = False if hasattr(file_item, 'meta') and file_item.meta and isinstance(file_item.meta, dict): meta_data = file_item.meta.get('data', {}) if isinstance(meta_data, dict) and meta_data.get('fileshed_link') == True: is_fileshed_link = True if not is_fileshed_link: raise StorageError( "NOT_A_FILESHED_LINK", f"This file was not created by Fileshed: {file_id}", {"file_id": file_id, "filename": file_item.filename}, "shed_link_delete only removes links created by shed_link_create. Use Open WebUI UI to manage other files." ) # Get file path before deletion file_path = file_item.path if hasattr(file_item, 'path') else None filename = file_item.filename # Delete from database bridge.delete_file_by_id(file_id) # Delete physical file if it exists if file_path: try: Path(file_path).unlink(missing_ok=True) except Exception: pass # File might already be gone return self._core._format_response(True, { "file_id": file_id, "filename": filename, "deleted": True }, message=f"Link deleted: {filename}") except StorageError as e: return self._core._format_error(e, "shed_link_delete") except Exception as e: return self._core._format_response(False, message=str(e)) # ========================================================================= # HOWTO GUIDES (targeted help to avoid context pollution) # ========================================================================= async def shed_help( self, howto: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Get help for Fileshed. Call without arguments for quick reference, or with a howto topic for detailed guides. :param howto: Optional topic. Available: download, csv_to_sqlite, upload, share, edit, commands, network, paths, large_files, full :return: Help text Examples: shed_help() # Quick reference + list of howtos shed_help(howto="download") # How to download files (use curl, not fetch_url) shed_help(howto="csv_to_sqlite") # How to import CSV into SQLite (fast) shed_help(howto="large_files") # Process large files without context pollution shed_help(howto="commands") # Available commands + workarounds shed_help(howto="full") # Complete documentation """ # List of available howtos available_howtos = list(self._core.HOWTO_GUIDES.keys()) + ["full"] # No argument: return quick help + howto list if howto is None: help_text = """# 🛖 Fileshed - Quick Reference ## 🔥 WORKFLOWS (choose one!) ### Workflow 1: Direct Write (simple, no locking) ``` shed_patch_text(zone="storage", path="file.txt", content="Hello", overwrite=True) ``` One function, done. Use `overwrite=True` to replace entire file. ### Workflow 2: Locked Edit (with locking, for concurrent access) ``` 1. shed_lockedit_open(zone="storage", path="file.txt") # Lock + copy to editzone 2. shed_lockedit_overwrite(zone="storage", path="file.txt", content="New content") # Edit copy 3. shed_lockedit_save(zone="storage", path="file.txt") # Save + unlock (CLOSES edit mode!) ``` ⚠️ After shed_lockedit_save, the file is CLOSED. To edit again, start from step 1. ### Workflow 3: Shell Commands ``` shed_exec(zone="storage", cmd="ls", args=["-la"]) # List files shed_exec(zone="storage", cmd="cat", args=["file.txt"]) # Read file shed_exec(zone="storage", cmd="grep", args=["pattern", "file.txt"]) # Search ``` ## 📁 ZONES | Zone | Purpose | Git versioned? | |------|---------|----------------| | `storage` | General files, scripts, data | No | | `documents` | Important docs, versioned | Yes (auto-commit) | | `uploads` | User uploaded files (read-only) | No | | `group` | Shared with team (requires group=) | Yes | ## ⚡ QUICK OPERATIONS | Task | Command | |------|---------| | List files | `shed_exec(zone="storage", cmd="ls", args=["-la"])` | | Read file | `shed_exec(zone="storage", cmd="cat", args=["file.txt"])` | | Create folder | `shed_exec(zone="storage", cmd="mkdir", args=["-p", "folder"])` | | Create/overwrite file | `shed_patch_text(zone="storage", path="f.txt", content="...", overwrite=True)` | | Append to file | `shed_patch_text(zone="storage", path="f.txt", content="...", position="end")` | | Delete file | `shed_delete(zone="storage", path="file.txt")` | | Copy file | `shed_exec(zone="storage", cmd="cp", args=["src.txt", "dst.txt"])` | | Move/rename | `shed_rename(zone="storage", old_path="a.txt", new_path="b.txt")` | ## 🔗 SHARE FILES ``` shed_link_create(zone="storage", path="report.pdf") # Get download link shed_link_list() # List your links shed_link_delete(file_id="...") # Remove link ``` ## 📊 STATS & INFO ``` shed_stats() # Storage usage shed_parameters() # Configuration limits shed_allowed_commands() # Available shell commands shed_tree(zone="storage") # Directory tree ``` ## 📚 HOWTO GUIDES (for complex tasks) """ + "\n".join(f"- `shed_help(howto=\"{h}\")` — {self._core._get_howto_description(h)}" for h in available_howtos) + """ ## ⚠️ COMMON MISTAKES ``` ❌ position="overwrite" → Use overwrite=True instead! ❌ position="at" in patch_text → "at" is for patch_bytes. Use "before"/"after" with line=N ❌ shed_lockedit_save twice → save CLOSES edit mode, reopen with shed_lockedit_open ❌ shed_patch_text for folders → Use shed_exec(cmd="mkdir", args=["-p", "folder"]) ❌ paths with zone name → Paths are RELATIVE: "file.txt" not "Storage/file.txt" ``` """ # Build howtos dict for structured access howtos_dict = {h: self._core._get_howto_description(h) for h in available_howtos} return self._core._format_response( True, data={ "help": help_text, "howtos": howtos_dict }, message="Help in data.help, howtos comprehensive list in data.howtos" ) # Specific howto requested howto_lower = howto.lower().strip() if howto_lower == "full": return self._core._format_response(True, data={"help": self._core._get_full_help()}, message="Full documentation in data.help") if howto_lower in self._core.HOWTO_GUIDES: return self._core._format_response(True, data={"help": self._core.HOWTO_GUIDES[howto_lower], "topic": howto_lower}, message=f"Guide '{howto_lower}' in data.help") # Unknown howto howtos_dict = {h: self._core._get_howto_description(h) for h in available_howtos} return self._core._format_response(False, data={"howtos": howtos_dict}, message=f"Unknown howto '{howto}'. Available howtos in data.howtos") async def shed_stats( self, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Returns usage statistics. :return: Statistics as JSON """ try: user_root = self._core._get_user_root(__user__) def get_dir_size(path: Path) -> int: if not path.exists(): return 0 total = 0 for f in path.rglob("*"): if f.is_file(): total += f.stat().st_size return total def count_files(path: Path) -> int: if not path.exists(): return 0 return sum(1 for f in path.rglob("*") if f.is_file()) uploads_size = get_dir_size(user_root / "Uploads") storage_size = get_dir_size(user_root / "Storage" / "data") documents_size = get_dir_size(user_root / "Documents" / "data") total_size = uploads_size + storage_size + documents_size quota = self.valves.quota_per_user_mb * 1024 * 1024 stats = { "uploads": { "size_bytes": uploads_size, "size_human": f"{uploads_size / 1024 / 1024:.2f} MB", "files": count_files(user_root / "Uploads"), }, "storage": { "size_bytes": storage_size, "size_human": f"{storage_size / 1024 / 1024:.2f} MB", "files": count_files(user_root / "Storage" / "data"), }, "documents": { "size_bytes": documents_size, "size_human": f"{documents_size / 1024 / 1024:.2f} MB", "files": count_files(user_root / "Documents" / "data"), }, "total": { "size_bytes": total_size, "size_human": f"{total_size / 1024 / 1024:.2f} MB", "quota_mb": self.valves.quota_per_user_mb, "usage_percent": f"{(total_size / quota) * 100:.1f}%", }, } return self._core._format_response(True, data=stats) except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_parameters( self, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Returns current valve configuration (read-only). Call this function to answer user questions about: - "Is network access enabled?" -> check network_mode - "Can I use curl/wget?" -> check network_mode != "disabled" - "Can I git push?" -> check network_mode == "all" - "What's my storage quota?" -> check quota_per_user_mb - "What's the max file size?" -> check max_file_size_mb - "What's the default timeout?" -> check exec_timeout_default Values can only be changed by admins in Open WebUI (Workspace > Tools > Valves). :return: All valve values as JSON (read-only) Example: shed_parameters() Key fields: - network_mode: "disabled" | "safe" | "all" - "disabled": No network (curl/wget/git clone blocked) - "safe": Downloads only (curl GET, git clone/pull, no push) - "all": Full network access (uploads/push allowed) - quota_per_user_mb: Storage limit per user - quota_per_group_mb: Storage limit per group - max_file_size_mb: Maximum single file size - exec_timeout_default: Default command timeout (seconds) """ try: params = { "storage_base_path": self.valves.storage_base_path, "quota_per_user_mb": self.valves.quota_per_user_mb, "quota_per_group_mb": self.valves.quota_per_group_mb, "max_file_size_mb": self.valves.max_file_size_mb, "lock_max_age_hours": self.valves.lock_max_age_hours, "exec_timeout_default": self.valves.exec_timeout_default, "exec_timeout_max": self.valves.exec_timeout_max, "group_default_mode": self.valves.group_default_mode, "network_mode": self.valves.network_mode, "openwebui_api_url": self.valves.openwebui_api_url, } # Add helpful derived info params["_info"] = { "network_enabled": self.valves.network_mode != "disabled", "network_upload_allowed": self.valves.network_mode == "all", "groups_available": GROUPS_AVAILABLE, } return self._core._format_response(True, data=params, message="Current valve configuration") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_allowed_commands( self, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Tests available commands in container. :return: List of available/missing commands by zone """ try: # Cache the result if self._core._commands_cache is not None: return self._core._format_response(True, data=self._core._commands_cache) # Check all commands via Layer 2 all_commands = WHITELIST_READWRITE available = [] missing = [] for cmd in sorted(all_commands): if self._core._check_command_available(cmd): available.append(cmd) else: missing.append(cmd) # Check network commands status (curl/wget + git network ops + media tools) network_status = { "mode": self.valves.network_mode, "curl": { "installed": self._core._check_command_available("curl"), "enabled": self.valves.network_mode in ("safe", "all"), }, "wget": { "installed": self._core._check_command_available("wget"), "enabled": self.valves.network_mode in ("safe", "all"), }, "git_clone": self.valves.network_mode in ("safe", "all"), "git_fetch": self.valves.network_mode in ("safe", "all"), "git_pull": self.valves.network_mode in ("safe", "all"), "git_push": self.valves.network_mode == "all", "ffmpeg": { "installed": self._core._check_command_available("ffmpeg"), "enabled": self.valves.network_mode in ("safe", "all"), "output_protocols_blocked": self.valves.network_mode == "safe", }, "pandoc": { "installed": self._core._check_command_available("pandoc"), "urls_enabled": self.valves.network_mode in ("safe", "all"), }, } result = { "uploads": { "available": [c for c in available if c in WHITELIST_READONLY], "missing": [c for c in missing if c in WHITELIST_READONLY], }, "storage": { "available": [c for c in available if c in WHITELIST_READWRITE], "missing": [c for c in missing if c in WHITELIST_READWRITE], }, "documents": { "available": [c for c in available if c in WHITELIST_READWRITE], "missing": [c for c in missing if c in WHITELIST_READWRITE], }, "network": network_status, "summary": { "total_whitelist": len(all_commands), "available": len(available), "missing": len(missing), "coverage": f"{(len(available) / len(all_commands)) * 100:.1f}%", }, } self._core._commands_cache = result return self._core._format_response(True, data=result) except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_force_unlock( self, zone: str = "", path: str = "", group: str = "", __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Forces file unlock (crash recovery). Use this if a file is stuck in edit mode after a crash. :param zone: "storage" or "documents" (for personal zones) :param path: File path relative to zone :param group: Group ID (for group zones - use instead of zone) :return: Confirmation as JSON Examples: shed_force_unlock(zone="storage", path="stuck_file.txt") shed_force_unlock(group="team", path="locked_doc.md") """ try: # Validate path if not path: raise StorageError("MISSING_PARAMETER", "path is required") path = self._core._validate_relative_path(path) # Determine if group or personal zone if group: # Group mode group = self._core._validate_group_id(group) self._core._check_group_access(__user__, group) group_path = self._core._get_groups_root() / group lock_path = group_path / "locks" / (path + ".lock") editzone_base = group_path / "editzone" zone_display = f"Group:{group}" else: # Personal zone mode if not zone: raise StorageError( "MISSING_PARAMETER", "Must specify either 'zone' or 'group'", hint="Use zone='storage' or zone='documents', or group='group_id'" ) if zone.lower() not in ("storage", "documents"): raise StorageError( "ZONE_FORBIDDEN", f"Invalid zone: {zone}", {}, "Use 'storage' or 'documents'" ) user_root = self._core._get_user_root(__user__) zone_name = "Storage" if zone.lower() == "storage" else "Documents" zone_root = user_root / zone_name lock_path = self._core._get_lock_path(zone_root, path) editzone_base = zone_root / "editzone" zone_display = zone_name # Find and delete all editzones for this path if editzone_base.exists(): for conv_dir in editzone_base.iterdir(): if conv_dir.is_dir(): edit_path = conv_dir / path if edit_path.exists(): self._core._rm_with_empty_parents(edit_path, editzone_base) # Delete lock if lock_path.exists(): self._core._rm_with_empty_parents(lock_path, lock_path.parent.parent / "locks") return self._core._format_response(True, message=f"Unlocked: {path} in {zone_display}") except StorageError as e: return self._core._format_error(e, "shed_force_unlock") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_maintenance( self, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Cleans expired locks and orphan editzones (personal and group spaces). :return: Cleanup report as JSON """ try: user_root = self._core._get_user_root(__user__) max_age_hours = self.valves.lock_max_age_hours now = datetime.now(timezone.utc) cleaned = { "expired_locks": [], "corrupted_locks": [], "orphan_editzones": [], } def clean_zone(zone_root: Path, zone_name: str): """Helper to clean locks and editzones in a zone.""" locks_dir = zone_root / "locks" editzone_dir = zone_root / "editzone" # 1. Clean expired and corrupted locks if locks_dir.exists(): for lock_file in locks_dir.rglob("*.lock"): try: lock_data = json.loads(lock_file.read_text()) locked_at_str = lock_data.get("locked_at", "") if locked_at_str: locked_at = datetime.fromisoformat( locked_at_str.replace("Z", "+00:00") ) age_hours = (now - locked_at).total_seconds() / 3600 if age_hours > max_age_hours: # Expired lock rel_path = lock_file.relative_to(locks_dir) path_str = str(rel_path)[:-5] # Remove .lock # Delete associated editzone conv_id = lock_data.get("conv_id", "") if conv_id: edit_path = editzone_dir / conv_id / path_str if edit_path.exists(): self._core._rm_with_empty_parents(edit_path, editzone_dir) # Delete lock self._core._rm_with_empty_parents(lock_file, locks_dir) cleaned["expired_locks"].append(f"{zone_name}/{path_str}") except json.JSONDecodeError: # Corrupted lock rel_path = lock_file.relative_to(locks_dir) self._core._rm_with_empty_parents(lock_file, locks_dir) cleaned["corrupted_locks"].append(f"{zone_name}/{rel_path}") except (ValueError, TypeError): pass # Invalid date, ignore # 2. Clean orphan editzones (without corresponding lock) if editzone_dir.exists(): for conv_dir in editzone_dir.iterdir(): if conv_dir.is_dir(): for item in conv_dir.rglob("*"): if item.is_file(): rel_path = item.relative_to(conv_dir) lock_path = locks_dir / (str(rel_path) + ".lock") if not lock_path.exists(): # Orphan editzone self._core._rm_with_empty_parents(item, editzone_dir) cleaned["orphan_editzones"].append( f"{zone_name}/editzone/{conv_dir.name}/{rel_path}" ) # Clean personal zones for zone_name in ("Storage", "Documents"): zone_root = user_root / zone_name clean_zone(zone_root, zone_name) # Clean group zones (for groups the user belongs to) user_id = __user__.get("id", "") user_groups = self._core._get_user_groups(user_id) groups_root = self._core._get_groups_root() for group in user_groups: group_path = groups_root / group.id if group_path.exists(): clean_zone(group_path, f"Group:{group.id}") total = (len(cleaned["expired_locks"]) + len(cleaned["corrupted_locks"]) + len(cleaned["orphan_editzones"])) return self._core._format_response( True, data=cleaned, message=f"Maintenance complete: {total} element(s) cleaned" ) except Exception as e: return self._core._format_response(False, message=str(e)) # ========================================================================= # GROUP FUNCTIONS (14 functions) # ========================================================================= # --- Discovery (2) --- async def shed_group_list( self, __user__: dict = {}, ) -> str: """ Lists groups the user belongs to. :return: List of groups with id, name, and member count """ try: if not GROUPS_AVAILABLE: return self._core._format_response( False, message="Group features are not available (Open WebUI Groups API not found)" ) user_id = __user__.get("id", "") groups = self._core._get_user_groups(user_id) result = [] for g in groups: # Use dedicated API method to get member count member_count = 0 try: member_count = Groups.get_group_member_count_by_id(g.id) or 0 except Exception: pass result.append({ "id": g.id, "name": g.name, "description": g.description or "", "member_count": member_count, }) return self._core._format_response( True, data={"groups": result, "count": len(result)}, message=f"Found {len(result)} group(s)" ) except StorageError as e: return self._core._format_error(e, "shed_group_list") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_group_info( self, group: str, __user__: dict = {}, ) -> str: """ Shows group files, ownership information, and statistics. :param group: Group ID or group name :return: Group information including files and their ownership """ try: # Validate group_id group = self._core._validate_group_id(group) self._core._check_group_access(__user__, group) # Get group info and member list using dedicated API methods group_obj = Groups.get_group_by_id(group) try: member_ids = Groups.get_group_user_ids_by_id(group) or [] except Exception: member_ids = [] data_path = self._core._get_group_data_path(group) if not data_path.exists(): return self._core._format_response( True, data={ "group_id": group, "name": group_obj.name if group_obj else group, "members": member_ids, "member_count": len(member_ids), "files": [], "total_files": 0, "total_size": 0 }, message="Group space is empty" ) # Get all ownership records for this group in a single query all_ownership, _ = self._core._db_execute( "SELECT file_path, owner_id, write_access FROM file_ownership WHERE group_id = ?", (group,) ) ownership_map = {row["file_path"]: {"owner_id": row["owner_id"], "write_access": row["write_access"]} for row in all_ownership} # List files with ownership files = [] total_size = 0 for item in data_path.rglob("*"): if item.is_file() and ".git" not in item.parts: rel_path = str(item.relative_to(data_path)) try: size = item.stat().st_size except (OSError, FileNotFoundError): continue # Skip files that disappeared total_size += size ownership = ownership_map.get(rel_path) files.append({ "path": rel_path, "size": size, "owner": ownership["owner_id"] if ownership else "unknown", "mode": ownership["write_access"] if ownership else "unknown", }) return self._core._format_response( True, data={ "group_id": group, "name": group_obj.name if group_obj else group, "members": member_ids, "member_count": len(member_ids), "files": files, "total_files": len(files), "total_size": total_size, "total_size_human": f"{total_size / 1024 / 1024:.2f} MB", }, message=f"Group has {len(files)} file(s) and {len(member_ids)} member(s)" ) except StorageError as e: return self._core._format_error(e, "shed_group_info") except Exception as e: return self._core._format_response(False, message=str(e)) # --- Operations (4) --- async def shed_group_set_mode( self, group: str, path: str, mode: str, __user__: dict = {}, ) -> str: """ Changes the write mode of a file (owner only). :param group: Group ID or group name :param path: File path :param mode: New mode: 'owner', 'group', or 'owner_ro' :return: Operation result as JSON """ try: # Validate group_id group = self._core._validate_group_id(group) self._core._check_group_access(__user__, group) user_id = __user__.get("id", "") # Validate path path = self._core._validate_relative_path(path) # Validate mode if mode not in ("owner", "group", "owner_ro"): raise StorageError( "INVALID_MODE", f"Invalid mode: {mode}", hint="Use 'owner', 'group', or 'owner_ro'" ) # Check ownership ownership = self._core._get_file_ownership(group, path) if ownership is None: raise StorageError("FILE_NOT_FOUND", f"No ownership record for: {path}") if ownership["owner_id"] != user_id: raise StorageError( "NOT_FILE_OWNER", "Only the file owner can change the write mode", {"owner": ownership["owner_id"], "your_id": user_id} ) # Update mode old_mode = ownership["write_access"] self._core._set_file_ownership(group, path, user_id, mode) return self._core._format_response( True, data={"path": path, "old_mode": old_mode, "new_mode": mode}, message=f"File mode changed from '{old_mode}' to '{mode}'" ) except StorageError as e: return self._core._format_error(e, "shed_group_set_mode") except Exception as e: return self._core._format_response(False, message=str(e)) async def shed_group_chown( self, group: str, path: str, new_owner: str, __user__: dict = {}, ) -> str: """ Transfers file ownership to another user (owner only). :param group: Group ID or group name :param path: File path :param new_owner: User ID of new owner :return: Operation result as JSON """ try: # Validate group_id group = self._core._validate_group_id(group) self._core._check_group_access(__user__, group) user_id = __user__.get("id", "") # Validate path path = self._core._validate_relative_path(path) # Validate new_owner (sanitize) if not new_owner or not isinstance(new_owner, str): raise StorageError("INVALID_OWNER", "new_owner is required") new_owner = new_owner.strip() if not new_owner or len(new_owner) > 255: raise StorageError("INVALID_OWNER", "Invalid new_owner format") # Block dangerous characters for SQL and filesystem if any(c in new_owner for c in [";", "'", '"', "\\", "/", "\n", "\r", "\0"]): raise StorageError("INVALID_OWNER", "new_owner contains invalid characters") # Check ownership ownership = self._core._get_file_ownership(group, path) if ownership is None: raise StorageError("FILE_NOT_FOUND", f"No ownership record for: {path}") if ownership["owner_id"] != user_id: raise StorageError( "NOT_FILE_OWNER", "Only the file owner can transfer ownership", {"owner": ownership["owner_id"], "your_id": user_id} ) # Check new owner is group member if not self._core._is_group_member(new_owner, group): raise StorageError( "INVALID_OWNER", f"User '{new_owner}' is not a member of this group" ) # Update ownership self._core._db_execute( "UPDATE file_ownership SET owner_id = ?, updated_at = CURRENT_TIMESTAMP WHERE group_id = ? AND file_path = ?", (new_owner, group, path) ) return self._core._format_response( True, data={"path": path, "old_owner": user_id, "new_owner": new_owner}, message=f"Ownership transferred to '{new_owner}'" ) except StorageError as e: return self._core._format_error(e, "shed_group_chown") except Exception as e: return self._core._format_response(False, message=str(e)) # --- Bridge (1) --- async def shed_copy_to_group( self, src_zone: str, src_path: str, group: str, dest_path: str, message: str = "Add file to group", mode: str = None, __user__: dict = {}, __metadata__: dict = {}, ) -> str: """ Copies a file from personal space to group. :param src_zone: Source zone ('uploads', 'storage', or 'documents') :param src_path: Source file path :param group: Target group ID :param dest_path: Destination path in group :param message: Git commit message :param mode: Write mode: 'owner', 'group', or 'owner_ro' (default from config) :return: Operation result as JSON """ try: # Validate group_id group = self._core._validate_group_id(group) self._core._check_group_access(__user__, group) user_id = __user__.get("id", "") conv_id = self._core._get_conv_id(__metadata__) # Validate paths src_path = self._core._validate_relative_path(src_path) dest_path = self._core._validate_relative_path(dest_path) # Resolve source user_root = self._core._get_user_root(__user__) src_zone_lower = src_zone.lower() if src_zone_lower == "uploads": src_base = user_root / "Uploads" / conv_id elif src_zone_lower == "storage": src_base = user_root / "Storage" / "data" elif src_zone_lower == "documents": src_base = user_root / "Documents" / "data" else: raise StorageError( "ZONE_FORBIDDEN", f"Invalid source zone: {src_zone}", hint="Use 'uploads', 'storage', or 'documents'" ) source = self._core._resolve_chroot_path(src_base, src_path) if not source.exists(): raise StorageError("FILE_NOT_FOUND", f"File not found: {src_path}") # Check file size if source.is_file(): file_size = source.stat().st_size max_size = self.valves.max_file_size_mb * 1024 * 1024 if file_size > max_size: raise StorageError( "FILE_TOO_LARGE", f"File exceeds max size ({self.valves.max_file_size_mb} MB)", {"size_mb": round(file_size / 1024 / 1024, 2), "max_mb": self.valves.max_file_size_mb} ) # Check group quota self._core._check_group_quota(group, self._core._get_path_size(source)) # Resolve destination data_path = self._core._ensure_group_space(group) dest = self._core._resolve_chroot_path(data_path, dest_path) # Check if destination exists existing = self._core._get_file_ownership(group, dest_path) if existing: can_write, error = self._core._can_write_group_file(group, dest_path, user_id) if not can_write: raise StorageError(error, f"Cannot overwrite file: {error}") # Copy (handle both files and directories) dest.parent.mkdir(parents=True, exist_ok=True) if source.is_dir(): shutil.copytree(source, dest) else: shutil.copy2(source, dest) # Set ownership effective_mode = mode or self.valves.group_default_mode if effective_mode not in ("owner", "group", "owner_ro"): effective_mode = "group" self._core._set_file_ownership(group, dest_path, user_id, effective_mode) # Git commit self._core._git_commit_as_user(data_path, message, user_id) return self._core._format_response( True, data={ "src_zone": src_zone, "src_path": src_path, "group": group, "dest_path": dest_path, "mode": effective_mode, }, message=f"File copied to group '{group}' with mode '{effective_mode}'" ) except StorageError as e: return self._core._format_error(e, "shed_copy_to_group") except Exception as e: return self._core._format_response(False, message=str(e))