mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-04-28 06:19:36 +00:00
fix(sandbox): block host bash traversal escapes (#2560)
* fix(sandbox): block host bash traversal escapes Fixes #2535 * fix(sandbox): harden local bash path guards * fix(sandbox): avoid bash cd argument false positives * Fix the lint error Add function to resolve and validate user data path. * Fix the lint error --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
parent
39c5da94f3
commit
6bd88fe14c
2 changed files with 373 additions and 25 deletions
|
|
@ -22,6 +22,9 @@ from deerflow.sandbox.security import LOCAL_HOST_BASH_DISABLED_MESSAGE, is_host_
|
|||
|
||||
_ABSOLUTE_PATH_PATTERN = re.compile(r"(?<![:\w])(?<!:/)/(?:[^\s\"'`;&|<>()]+)")
|
||||
_FILE_URL_PATTERN = re.compile(r"\bfile://\S+", re.IGNORECASE)
|
||||
_URL_WITH_SCHEME_PATTERN = re.compile(r"^[a-z][a-z0-9+.-]*://", re.IGNORECASE)
|
||||
_URL_IN_COMMAND_PATTERN = re.compile(r"\b[a-z][a-z0-9+.-]*://[^\s\"'`;&|<>()]+", re.IGNORECASE)
|
||||
_DOTDOT_PATH_SEGMENT_PATTERN = re.compile(r"(?:^|[/\\=])\.\.(?:$|[/\\])")
|
||||
_LOCAL_BASH_SYSTEM_PATH_PREFIXES = (
|
||||
"/bin/",
|
||||
"/usr/bin/",
|
||||
|
|
@ -37,6 +40,42 @@ _DEFAULT_GLOB_MAX_RESULTS = 200
|
|||
_MAX_GLOB_MAX_RESULTS = 1000
|
||||
_DEFAULT_GREP_MAX_RESULTS = 100
|
||||
_MAX_GREP_MAX_RESULTS = 500
|
||||
_LOCAL_BASH_CWD_COMMANDS = {"cd", "pushd"}
|
||||
_LOCAL_BASH_COMMAND_WRAPPERS = {"command", "builtin"}
|
||||
_LOCAL_BASH_COMMAND_PREFIX_KEYWORDS = {"!", "{", "case", "do", "elif", "else", "for", "if", "select", "then", "time", "until", "while"}
|
||||
_LOCAL_BASH_COMMAND_END_KEYWORDS = {"}", "done", "esac", "fi"}
|
||||
_LOCAL_BASH_ROOT_PATH_COMMANDS = {
|
||||
"awk",
|
||||
"cat",
|
||||
"cp",
|
||||
"du",
|
||||
"find",
|
||||
"grep",
|
||||
"head",
|
||||
"less",
|
||||
"ln",
|
||||
"ls",
|
||||
"more",
|
||||
"mv",
|
||||
"rm",
|
||||
"sed",
|
||||
"tail",
|
||||
"tar",
|
||||
}
|
||||
_SHELL_COMMAND_SEPARATORS = {";", "&&", "||", "|", "|&", "&", "(", ")"}
|
||||
_SHELL_REDIRECTION_OPERATORS = {
|
||||
"<",
|
||||
">",
|
||||
"<<",
|
||||
">>",
|
||||
"<<<",
|
||||
"<>",
|
||||
">&",
|
||||
"<&",
|
||||
"&>",
|
||||
"&>>",
|
||||
">|",
|
||||
}
|
||||
|
||||
|
||||
def _get_skills_container_path() -> str:
|
||||
|
|
@ -635,6 +674,214 @@ def _resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState
|
|||
return str(resolved)
|
||||
|
||||
|
||||
def _is_non_file_url_token(token: str) -> bool:
|
||||
"""Return True for URL tokens that should not be interpreted as paths."""
|
||||
values = [token]
|
||||
if "=" in token:
|
||||
values.append(token.split("=", 1)[1])
|
||||
|
||||
for value in values:
|
||||
match = _URL_WITH_SCHEME_PATTERN.match(value)
|
||||
if match and not value.lower().startswith("file://"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _non_file_url_spans(command: str) -> list[tuple[int, int]]:
|
||||
spans = []
|
||||
for match in _URL_IN_COMMAND_PATTERN.finditer(command):
|
||||
if not match.group().lower().startswith("file://"):
|
||||
spans.append(match.span())
|
||||
return spans
|
||||
|
||||
|
||||
def _is_in_spans(position: int, spans: list[tuple[int, int]]) -> bool:
|
||||
return any(start <= position < end for start, end in spans)
|
||||
|
||||
|
||||
def _has_dotdot_path_segment(token: str) -> bool:
|
||||
if _is_non_file_url_token(token):
|
||||
return False
|
||||
return bool(_DOTDOT_PATH_SEGMENT_PATTERN.search(token))
|
||||
|
||||
|
||||
def _split_shell_tokens(command: str) -> list[str]:
|
||||
try:
|
||||
normalized = command.replace("\r\n", "\n").replace("\r", "\n").replace("\n", " ; ")
|
||||
lexer = shlex.shlex(normalized, posix=True, punctuation_chars=True)
|
||||
lexer.whitespace_split = True
|
||||
lexer.commenters = ""
|
||||
return list(lexer)
|
||||
except ValueError:
|
||||
# The shell will reject malformed quoting later; keep validation
|
||||
# best-effort instead of turning syntax errors into security messages.
|
||||
return command.split()
|
||||
|
||||
|
||||
def _is_shell_command_separator(token: str) -> bool:
|
||||
return token in _SHELL_COMMAND_SEPARATORS
|
||||
|
||||
|
||||
def _is_shell_redirection_operator(token: str) -> bool:
|
||||
return token in _SHELL_REDIRECTION_OPERATORS
|
||||
|
||||
|
||||
def _is_shell_assignment(token: str) -> bool:
|
||||
name, separator, _ = token.partition("=")
|
||||
if not separator or not name:
|
||||
return False
|
||||
return bool(re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", name))
|
||||
|
||||
|
||||
def _is_allowed_local_bash_absolute_path(path: str, allowed_paths: list[str], *, allow_system_paths: bool) -> bool:
|
||||
# Check for MCP filesystem server allowed paths
|
||||
if any(path.startswith(allowed_path) or path == allowed_path.rstrip("/") for allowed_path in allowed_paths):
|
||||
_reject_path_traversal(path)
|
||||
return True
|
||||
|
||||
if path == VIRTUAL_PATH_PREFIX or path.startswith(f"{VIRTUAL_PATH_PREFIX}/"):
|
||||
_reject_path_traversal(path)
|
||||
return True
|
||||
|
||||
# Allow skills container path (resolved by tools.py before passing to sandbox)
|
||||
if _is_skills_path(path):
|
||||
_reject_path_traversal(path)
|
||||
return True
|
||||
|
||||
# Allow ACP workspace path (path-traversal check only)
|
||||
if _is_acp_workspace_path(path):
|
||||
_reject_path_traversal(path)
|
||||
return True
|
||||
|
||||
# Allow custom mount container paths
|
||||
if _is_custom_mount_path(path):
|
||||
_reject_path_traversal(path)
|
||||
return True
|
||||
|
||||
if allow_system_paths and any(path == prefix.rstrip("/") or path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _next_cd_target(tokens: list[str], start_index: int) -> tuple[str | None, int]:
|
||||
index = start_index
|
||||
while index < len(tokens):
|
||||
token = tokens[index]
|
||||
if _is_shell_command_separator(token):
|
||||
return None, index
|
||||
if _is_shell_redirection_operator(token):
|
||||
index += 2
|
||||
continue
|
||||
if token == "--":
|
||||
index += 1
|
||||
continue
|
||||
if token in {"-L", "-P", "-e", "-@"}:
|
||||
index += 1
|
||||
continue
|
||||
if token.startswith("-") and token != "-":
|
||||
index += 1
|
||||
continue
|
||||
return token, index + 1
|
||||
return None, index
|
||||
|
||||
|
||||
def _validate_local_bash_cwd_target(command_name: str, target: str | None, allowed_paths: list[str]) -> None:
|
||||
if target is None or target == "-":
|
||||
raise PermissionError(f"Unsafe working directory change in command: {command_name}. Use paths under {VIRTUAL_PATH_PREFIX}")
|
||||
if target.startswith(("$", "`")):
|
||||
raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}")
|
||||
if target.startswith("~"):
|
||||
raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}")
|
||||
if target.startswith("/"):
|
||||
_reject_path_traversal(target)
|
||||
if not _is_allowed_local_bash_absolute_path(target, allowed_paths, allow_system_paths=False):
|
||||
raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}")
|
||||
|
||||
|
||||
def _looks_like_unsafe_cwd_target(target: str | None) -> bool:
|
||||
if target is None:
|
||||
return False
|
||||
return target == "-" or target.startswith(("$", "`", "~", "/", "..")) or _has_dotdot_path_segment(target)
|
||||
|
||||
|
||||
def _validate_local_bash_root_path_args(command_name: str, tokens: list[str], start_index: int) -> None:
|
||||
if command_name not in _LOCAL_BASH_ROOT_PATH_COMMANDS:
|
||||
return
|
||||
|
||||
index = start_index
|
||||
while index < len(tokens):
|
||||
token = tokens[index]
|
||||
if _is_shell_command_separator(token):
|
||||
return
|
||||
if _is_shell_redirection_operator(token):
|
||||
index += 2
|
||||
continue
|
||||
if token == "/" and not _is_non_file_url_token(token):
|
||||
raise PermissionError(f"Unsafe absolute paths in command: /. Use paths under {VIRTUAL_PATH_PREFIX}")
|
||||
index += 1
|
||||
|
||||
|
||||
def _validate_local_bash_shell_tokens(command: str, allowed_paths: list[str]) -> None:
|
||||
"""Conservatively reject relative path escapes missed by absolute-path scanning."""
|
||||
if re.search(r"\$\([^)]*\b(?:cd|pushd)\b", command):
|
||||
raise PermissionError(f"Unsafe working directory change in command substitution. Use paths under {VIRTUAL_PATH_PREFIX}")
|
||||
|
||||
tokens = _split_shell_tokens(command)
|
||||
|
||||
for token in tokens:
|
||||
if _is_shell_command_separator(token) or _is_shell_redirection_operator(token):
|
||||
continue
|
||||
if _has_dotdot_path_segment(token):
|
||||
raise PermissionError("Access denied: path traversal detected")
|
||||
|
||||
at_command_start = True
|
||||
index = 0
|
||||
while index < len(tokens):
|
||||
token = tokens[index]
|
||||
|
||||
if _is_shell_command_separator(token):
|
||||
at_command_start = True
|
||||
index += 1
|
||||
continue
|
||||
|
||||
if _is_shell_redirection_operator(token):
|
||||
index += 1
|
||||
continue
|
||||
|
||||
if at_command_start and _is_shell_assignment(token):
|
||||
index += 1
|
||||
continue
|
||||
|
||||
command_name = token.rsplit("/", 1)[-1]
|
||||
if at_command_start and command_name in _LOCAL_BASH_COMMAND_PREFIX_KEYWORDS | _LOCAL_BASH_COMMAND_END_KEYWORDS:
|
||||
index += 1
|
||||
continue
|
||||
|
||||
if not at_command_start:
|
||||
index += 1
|
||||
continue
|
||||
|
||||
at_command_start = False
|
||||
if command_name in _LOCAL_BASH_COMMAND_WRAPPERS and index + 1 < len(tokens):
|
||||
wrapped_name = tokens[index + 1].rsplit("/", 1)[-1]
|
||||
if wrapped_name in _LOCAL_BASH_CWD_COMMANDS:
|
||||
target, next_index = _next_cd_target(tokens, index + 2)
|
||||
_validate_local_bash_cwd_target(wrapped_name, target, allowed_paths)
|
||||
index = next_index
|
||||
continue
|
||||
_validate_local_bash_root_path_args(wrapped_name, tokens, index + 2)
|
||||
|
||||
if command_name not in _LOCAL_BASH_CWD_COMMANDS:
|
||||
_validate_local_bash_root_path_args(command_name, tokens, index + 1)
|
||||
index += 1
|
||||
continue
|
||||
|
||||
target, next_index = _next_cd_target(tokens, index + 1)
|
||||
_validate_local_bash_cwd_target(command_name, target, allowed_paths)
|
||||
index = next_index
|
||||
|
||||
|
||||
def resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState) -> str:
|
||||
"""Resolve a /mnt/user-data virtual path and validate it stays in bounds."""
|
||||
return _resolve_and_validate_user_data_path(path, thread_data)
|
||||
|
|
@ -665,33 +912,14 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
|
|||
|
||||
unsafe_paths: list[str] = []
|
||||
allowed_paths = _get_mcp_allowed_paths()
|
||||
_validate_local_bash_shell_tokens(command, allowed_paths)
|
||||
url_spans = _non_file_url_spans(command)
|
||||
|
||||
for absolute_path in _ABSOLUTE_PATH_PATTERN.findall(command):
|
||||
# Check for MCP filesystem server allowed paths
|
||||
if any(absolute_path.startswith(path) or absolute_path == path.rstrip("/") for path in allowed_paths):
|
||||
_reject_path_traversal(absolute_path)
|
||||
for match in _ABSOLUTE_PATH_PATTERN.finditer(command):
|
||||
if _is_in_spans(match.start(), url_spans):
|
||||
continue
|
||||
|
||||
if absolute_path == VIRTUAL_PATH_PREFIX or absolute_path.startswith(f"{VIRTUAL_PATH_PREFIX}/"):
|
||||
_reject_path_traversal(absolute_path)
|
||||
continue
|
||||
|
||||
# Allow skills container path (resolved by tools.py before passing to sandbox)
|
||||
if _is_skills_path(absolute_path):
|
||||
_reject_path_traversal(absolute_path)
|
||||
continue
|
||||
|
||||
# Allow ACP workspace path (path-traversal check only)
|
||||
if _is_acp_workspace_path(absolute_path):
|
||||
_reject_path_traversal(absolute_path)
|
||||
continue
|
||||
|
||||
# Allow custom mount container paths
|
||||
if _is_custom_mount_path(absolute_path):
|
||||
_reject_path_traversal(absolute_path)
|
||||
continue
|
||||
|
||||
if any(absolute_path == prefix.rstrip("/") or absolute_path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES):
|
||||
absolute_path = match.group()
|
||||
if _is_allowed_local_bash_absolute_path(absolute_path, allowed_paths, allow_system_paths=True):
|
||||
continue
|
||||
|
||||
unsafe_paths.append(absolute_path)
|
||||
|
|
|
|||
|
|
@ -346,6 +346,104 @@ def test_validate_local_bash_command_paths_blocks_traversal_in_skills() -> None:
|
|||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"command",
|
||||
[
|
||||
"cat ../uploads/secret.txt",
|
||||
"cat subdir/../../secret.txt",
|
||||
"python script.py --input=../secret.txt",
|
||||
"echo ok > ../outputs/result.txt",
|
||||
],
|
||||
)
|
||||
def test_validate_local_bash_command_paths_blocks_relative_dotdot_segments(command: str) -> None:
|
||||
with pytest.raises(PermissionError, match="path traversal"):
|
||||
validate_local_bash_command_paths(command, _THREAD_DATA)
|
||||
|
||||
|
||||
def test_validate_local_bash_command_paths_blocks_cd_root_escape() -> None:
|
||||
with pytest.raises(PermissionError, match="Unsafe working directory"):
|
||||
validate_local_bash_command_paths("cd / && cat etc/passwd", _THREAD_DATA)
|
||||
|
||||
|
||||
def test_validate_local_bash_command_paths_blocks_cd_parent_escape() -> None:
|
||||
with pytest.raises(PermissionError, match="path traversal"):
|
||||
validate_local_bash_command_paths("cd .. && cat etc/passwd", _THREAD_DATA)
|
||||
|
||||
|
||||
def test_validate_local_bash_command_paths_blocks_cd_env_var_escape() -> None:
|
||||
with pytest.raises(PermissionError, match="Unsafe working directory"):
|
||||
validate_local_bash_command_paths("cd $HOME && cat .ssh/id_rsa", _THREAD_DATA)
|
||||
|
||||
|
||||
def test_validate_local_bash_command_paths_blocks_multiline_cd_escape() -> None:
|
||||
with pytest.raises(PermissionError, match="Unsafe working directory"):
|
||||
validate_local_bash_command_paths("echo ok\ncd $HOME && cat .ssh/id_rsa", _THREAD_DATA)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"command",
|
||||
[
|
||||
"command cd / && cat etc/passwd",
|
||||
"builtin cd $HOME && cat .ssh/id_rsa",
|
||||
"if cd $HOME; then cat .ssh/id_rsa; fi",
|
||||
"{ cd /; cat etc/passwd; }",
|
||||
'echo "$(cd $HOME && cat .ssh/id_rsa)"',
|
||||
],
|
||||
)
|
||||
def test_validate_local_bash_command_paths_blocks_complex_cd_escapes(command: str) -> None:
|
||||
with pytest.raises(PermissionError, match="Unsafe working directory"):
|
||||
validate_local_bash_command_paths(command, _THREAD_DATA)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"command",
|
||||
[
|
||||
"ls /",
|
||||
"ln -s / root && cat root/etc/passwd",
|
||||
"command ls /",
|
||||
],
|
||||
)
|
||||
def test_validate_local_bash_command_paths_blocks_bare_root_path(command: str) -> None:
|
||||
with pytest.raises(PermissionError, match="Unsafe absolute paths"):
|
||||
validate_local_bash_command_paths(command, _THREAD_DATA)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"command",
|
||||
[
|
||||
"echo cd /",
|
||||
"printf '%s\\n' pushd /",
|
||||
],
|
||||
)
|
||||
def test_validate_local_bash_command_paths_allows_cd_words_as_arguments(command: str) -> None:
|
||||
validate_local_bash_command_paths(command, _THREAD_DATA)
|
||||
|
||||
|
||||
def test_validate_local_bash_command_paths_allows_workspace_relative_paths() -> None:
|
||||
validate_local_bash_command_paths(
|
||||
"mkdir -p reports && python script.py data/input.csv > reports/out.txt",
|
||||
_THREAD_DATA,
|
||||
)
|
||||
|
||||
|
||||
def test_validate_local_bash_command_paths_allows_cd_virtual_workspace_with_relative_paths() -> None:
|
||||
validate_local_bash_command_paths(
|
||||
"cd /mnt/user-data/workspace && cat data/input.csv > reports/out.txt",
|
||||
_THREAD_DATA,
|
||||
)
|
||||
|
||||
|
||||
def test_validate_local_bash_command_paths_allows_http_url_dotdot_segments() -> None:
|
||||
validate_local_bash_command_paths(
|
||||
"curl https://example.com/packages/../archive.tar.gz -o /mnt/user-data/workspace/archive.tar.gz",
|
||||
_THREAD_DATA,
|
||||
)
|
||||
validate_local_bash_command_paths(
|
||||
"curl http://example.com/packages/../archive.tar.gz -o /mnt/user-data/workspace/archive.tar.gz",
|
||||
_THREAD_DATA,
|
||||
)
|
||||
|
||||
|
||||
def test_bash_tool_rejects_host_bash_when_local_sandbox_default(monkeypatch) -> None:
|
||||
runtime = SimpleNamespace(
|
||||
state={"sandbox": {"sandbox_id": "local"}, "thread_data": _THREAD_DATA.copy()},
|
||||
|
|
@ -367,6 +465,28 @@ def test_bash_tool_rejects_host_bash_when_local_sandbox_default(monkeypatch) ->
|
|||
assert "Host bash execution is disabled" in result
|
||||
|
||||
|
||||
def test_bash_tool_blocks_relative_traversal_before_host_execution(monkeypatch) -> None:
|
||||
runtime = SimpleNamespace(
|
||||
state={"sandbox": {"sandbox_id": "local"}, "thread_data": _THREAD_DATA.copy()},
|
||||
context={"thread_id": "thread-1"},
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"deerflow.sandbox.tools.ensure_sandbox_initialized",
|
||||
lambda runtime: SimpleNamespace(execute_command=lambda command: pytest.fail("unsafe command should not execute")),
|
||||
)
|
||||
monkeypatch.setattr("deerflow.sandbox.tools.ensure_thread_directories_exist", lambda runtime: None)
|
||||
monkeypatch.setattr("deerflow.sandbox.tools.is_host_bash_allowed", lambda: True)
|
||||
|
||||
result = bash_tool.func(
|
||||
runtime=runtime,
|
||||
description="run command",
|
||||
command="cat ../uploads/secret.txt",
|
||||
)
|
||||
|
||||
assert "path traversal" in result
|
||||
|
||||
|
||||
# ---------- Skills path tests ----------
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue