diff --git a/rust/crates/runtime/src/permission_enforcer.rs b/rust/crates/runtime/src/permission_enforcer.rs index 6ff872bcc8..e826c277ee 100644 --- a/rust/crates/runtime/src/permission_enforcer.rs +++ b/rust/crates/runtime/src/permission_enforcer.rs @@ -173,33 +173,119 @@ impl PermissionEnforcer { } } -/// Simple workspace boundary check via string prefix. +/// Workspace boundary check. +/// +/// Resolves `.` and `..` components lexically *before* comparing against the +/// workspace root, so that traversal sequences like `/workspace/../../etc` +/// cannot escape the sandbox via a naive string prefix match. Normalization is +/// lexical (it does not touch the filesystem) because the target path may not +/// exist yet on a write, and we must not depend on CWD. fn is_within_workspace(path: &str, workspace_root: &str) -> bool { - let normalized = if path.starts_with('/') { + let combined = if path.starts_with('/') { path.to_owned() } else { format!("{workspace_root}/{path}") }; - let root = if workspace_root.ends_with('/') { - workspace_root.to_owned() + let normalized = lexically_normalize(&combined); + let root = lexically_normalize(workspace_root); + let root_with_slash = if root.ends_with('/') { + root.clone() } else { - format!("{workspace_root}/") + format!("{root}/") }; - normalized.starts_with(&root) || normalized == workspace_root.trim_end_matches('/') + normalized == root || normalized.starts_with(&root_with_slash) +} + +/// Collapse `.` and `..` segments without consulting the filesystem. +/// `..` that would climb above an absolute root is clamped at `/`, so the +/// result can never be a prefix-match for a deeper workspace root. +fn lexically_normalize(path: &str) -> String { + let is_absolute = path.starts_with('/'); + let mut stack: Vec<&str> = Vec::new(); + for component in path.split('/') { + match component { + "" | "." => {} + ".." => { + stack.pop(); + } + other => stack.push(other), + } + } + let joined = stack.join("/"); + if is_absolute { + format!("/{joined}") + } else { + joined + } } /// Conservative heuristic: is this bash command read-only? +/// +/// Hardening notes: +/// - Any shell metacharacter that could chain, substitute, pipe, or redirect +/// into a state-changing command rejects the whole line. This blocks +/// `cat x; rm -rf y`, `cat x | sh`, `$(...)`, backticks, redirects, and +/// subshells regardless of the leading token. +/// - Language interpreters (`python`, `node`, `ruby`) and build drivers +/// (`cargo`, `rustc`) are NOT read-only: they execute arbitrary code, so they +/// are excluded from the allow-list. +/// - `git` is allowed only for a known set of non-mutating subcommands. +/// - `find` is rejected when it carries an action that can execute or delete. +/// +/// Residual known gaps (documented, not yet closed): `sed`'s `w`/`e` script +/// commands and `awk`'s `system()` can still mutate — these require quoting or +/// metacharacters that the checks above usually catch, but a dedicated parser +/// would be more robust. Tracked as follow-up. fn is_read_only_command(command: &str) -> bool { - let first_token = command - .split_whitespace() + // Shell metacharacters that enable command chaining, substitution, + // piping, redirection, or subshells. Presence of any of these means we + // cannot reason about the command from its leading token alone. + const SHELL_METACHARS: &[char] = + &[';', '|', '&', '$', '`', '>', '<', '(', ')', '{', '}', '\n']; + if command.contains(SHELL_METACHARS) { + return false; + } + + let mut tokens = command.split_whitespace(); + let first_token = tokens .next() .unwrap_or("") .rsplit('/') .next() .unwrap_or(""); + // `git` is only read-only for a curated set of subcommands. + if first_token == "git" { + let subcommand = tokens.next().unwrap_or(""); + return matches!( + subcommand, + "status" + | "log" + | "diff" + | "show" + | "branch" + | "rev-parse" + | "ls-files" + | "blame" + | "describe" + | "tag" + | "remote" + ); + } + + // `find` can execute or delete via actions; reject those forms. + if first_token == "find" + && (command.contains("-exec") + || command.contains("-execdir") + || command.contains("-delete") + || command.contains("-ok") + || command.contains("-fprintf")) + { + return false; + } + matches!( first_token, "cat" @@ -237,8 +323,6 @@ fn is_read_only_command(command: &str) -> bool { | "tr" | "cut" | "paste" - | "tee" - | "xargs" | "test" | "true" | "false" @@ -257,18 +341,8 @@ fn is_read_only_command(command: &str) -> bool { | "tree" | "jq" | "yq" - | "python3" - | "python" - | "node" - | "ruby" - | "cargo" - | "rustc" - | "git" - | "gh" ) && !command.contains("-i ") && !command.contains("--in-place") - && !command.contains(" > ") - && !command.contains(" >> ") } #[cfg(test)] @@ -375,6 +449,85 @@ mod tests { assert!(!is_read_only_command("sed -i 's/a/b/' file")); } + // --- Hardening regression tests (#2: read-only bypasses) --- + + #[test] + fn read_only_rejects_command_chaining() { + // A leading read-only token must not launder a trailing destructive one. + assert!(!is_read_only_command("cat foo; rm -rf bar")); + assert!(!is_read_only_command("cat foo && rm -rf bar")); + assert!(!is_read_only_command("ls || rm bar")); + assert!(!is_read_only_command("cat foo | sh")); + assert!(!is_read_only_command("echo `rm bar`")); + assert!(!is_read_only_command("echo $(rm bar)")); + assert!(!is_read_only_command("echo x>file")); // redirect without spaces + } + + #[test] + fn read_only_rejects_interpreters_and_build_drivers() { + // These execute arbitrary code and are no longer read-only. + assert!(!is_read_only_command( + "python3 -c \"import os; os.system('rm -rf .')\"" + )); + assert!(!is_read_only_command("python script.py")); + assert!(!is_read_only_command("node app.js")); + assert!(!is_read_only_command("ruby x.rb")); + assert!(!is_read_only_command("cargo run")); + assert!(!is_read_only_command("rustc evil.rs")); + } + + #[test] + fn read_only_gates_git_subcommands() { + // Read-only git subcommands remain allowed... + assert!(is_read_only_command("git status")); + assert!(is_read_only_command("git diff HEAD~1")); + assert!(is_read_only_command("git show abc123")); + // ...but mutating/exfiltrating ones are rejected. + assert!(!is_read_only_command("git commit -m x")); + assert!(!is_read_only_command("git push origin main")); + assert!(!is_read_only_command("git reset --hard")); + assert!(!is_read_only_command("git clean -fd")); + assert!(!is_read_only_command("git config user.email a@b.c")); + } + + #[test] + fn read_only_rejects_find_actions() { + assert!(is_read_only_command("find . -name Cargo.toml")); + assert!(!is_read_only_command("find . -delete")); + // -exec uses braces/semicolon which also trip the metachar guard, + // but the explicit action check is the primary defense. + assert!(!is_read_only_command("find . -execdir rm rf")); + } + + // --- Hardening regression tests (#1: workspace path traversal) --- + + #[test] + fn workspace_rejects_parent_traversal() { + assert!(!is_within_workspace("/workspace/../etc/passwd", "/workspace")); + assert!(!is_within_workspace( + "/workspace/../../etc/crontab", + "/workspace" + )); + assert!(!is_within_workspace("../etc/passwd", "/workspace")); + assert!(!is_within_workspace( + "/workspace/sub/../../outside", + "/workspace" + )); + // Legitimate paths still resolve inside. + assert!(is_within_workspace("/workspace/./src/main.rs", "/workspace")); + assert!(is_within_workspace( + "/workspace/src/../src/main.rs", + "/workspace" + )); + } + + #[test] + fn workspace_write_denies_traversal_escape() { + let enforcer = make_enforcer(PermissionMode::WorkspaceWrite); + let result = enforcer.check_file_write("/workspace/../../etc/crontab", "/workspace"); + assert!(matches!(result, EnforcementResult::Denied { .. })); + } + #[test] fn active_mode_returns_policy_mode() { // given diff --git a/rust/crates/tools/src/lib.rs b/rust/crates/tools/src/lib.rs index a42bdcb328..bf820ced78 100644 --- a/rust/crates/tools/src/lib.rs +++ b/rust/crates/tools/src/lib.rs @@ -2571,6 +2571,20 @@ fn is_within_workspace(path: &str) -> bool { let path = PathBuf::from(trimmed); + // Reject any parent-directory traversal. Callers never need `..` to refer + // to files inside the workspace, and `..` defeats both checks below: the + // relative branch only inspects the leading component, and the absolute + // branch's `canonicalize()` silently falls back to the literal `..` path + // when the target does not exist yet (e.g. a file about to be created). + // Returning false here is the safe direction: it classifies the command as + // requiring full-access permission rather than workspace-write. + if path + .components() + .any(|component| matches!(component, std::path::Component::ParentDir)) + { + return false; + } + // If path is absolute, check if it starts with CWD if path.is_absolute() { if let Ok(cwd) = std::env::current_dir() { @@ -2588,6 +2602,26 @@ fn run_powershell(input: PowerShellInput) -> Result { to_pretty_json(execute_powershell(input).map_err(|error| error.to_string())?) } +#[cfg(test)] +mod workspace_traversal_guard_tests { + use super::is_within_workspace; + + #[test] + fn rejects_parent_traversal_components() { + // Leading and embedded `..` must both be rejected (was previously a hole + // because only the leading component was inspected). + assert!(!is_within_workspace("../secrets")); + assert!(!is_within_workspace("src/../../etc/passwd")); + assert!(!is_within_workspace("a/b/../../../etc/crontab")); + } + + #[test] + fn allows_plain_relative_paths() { + assert!(is_within_workspace("src/main.rs")); + assert!(is_within_workspace("Cargo.toml")); + } +} + fn to_pretty_json(value: T) -> Result { serde_json::to_string_pretty(&value).map_err(|error| error.to_string()) }