|
| 1 | +#!/bin/bash |
| 2 | +# |
| 3 | +# trace-command: trace filesystem and network activity of a command |
| 4 | +# |
| 5 | +# Usage: sudo trace-command <command> [args...] |
| 6 | +# |
| 7 | +# Strategy: ktrace dump captures a raw system trace to a file (reliably |
| 8 | +# records short-lived processes). fs_usage -R replays that file with |
| 9 | +# human-readable paths and network info. We filter by command name. |
| 10 | + |
| 11 | +set -euo pipefail |
| 12 | + |
| 13 | +if [[ $# -lt 1 ]]; then |
| 14 | + echo "Usage: sudo $0 <command> [args...]" |
| 15 | + exit 1 |
| 16 | +fi |
| 17 | + |
| 18 | +if [[ $EUID -ne 0 ]]; then |
| 19 | + echo "Error: must run with sudo (ktrace requires root)" |
| 20 | + echo "Usage: sudo $0 <command> [args...]" |
| 21 | + exit 1 |
| 22 | +fi |
| 23 | + |
| 24 | +if [[ -z "${SUDO_USER:-}" ]]; then |
| 25 | + echo "Error: SUDO_USER not set. Run with sudo, not as root directly." |
| 26 | + exit 1 |
| 27 | +fi |
| 28 | + |
| 29 | +# Build output directory |
| 30 | +cmd_basename="$(basename "$1")" |
| 31 | +timestamp="$(date +%Y%m%d-%H%M%S)" |
| 32 | +outdir="${TMPDIR:-/tmp}/trace-${cmd_basename}-${timestamp}" |
| 33 | +mkdir -p "$outdir" |
| 34 | +chown "$SUDO_USER" "$outdir" |
| 35 | + |
| 36 | +raw_trace="$outdir/trace.ktrace" |
| 37 | +dns_pcap="$outdir/dns.pcap" |
| 38 | +dns_log="$outdir/dns.log" |
| 39 | +raw_log="$outdir/raw.log" |
| 40 | +filtered_log="$outdir/filtered.log" |
| 41 | +summary_log="$outdir/summary.log" |
| 42 | + |
| 43 | +echo "Tracing: $*" |
| 44 | +echo "Output: $outdir/" |
| 45 | +echo "" |
| 46 | + |
| 47 | +# Step 1: Start system-wide ktrace dump in background |
| 48 | +ktrace dump "$raw_trace" & |
| 49 | +ktrace_pid=$! |
| 50 | + |
| 51 | +# Also capture DNS queries via packet capture. This is the only reliable |
| 52 | +# way to get hostnames on macOS with SIP enabled. |
| 53 | +tcpdump -i any -w "$dns_pcap" 'port 53' > /dev/null 2>&1 & |
| 54 | +tcpdump_pid=$! |
| 55 | + |
| 56 | +# Give ktrace and tcpdump time to initialize |
| 57 | +sleep 1 |
| 58 | + |
| 59 | +# Step 2: Run the command as the original user |
| 60 | +sudo -u "$SUDO_USER" -- "$@" |
| 61 | +cmd_exit=$? |
| 62 | + |
| 63 | +# Step 3: Stop ktrace and tcpdump |
| 64 | +sleep 1 |
| 65 | +kill "$ktrace_pid" 2> /dev/null |
| 66 | +kill "$tcpdump_pid" 2> /dev/null |
| 67 | +wait "$ktrace_pid" 2> /dev/null || true |
| 68 | +wait "$tcpdump_pid" 2> /dev/null || true |
| 69 | + |
| 70 | +# Decode DNS queries from the pcap |
| 71 | +tcpdump -r "$dns_pcap" -nn 'port 53' 2> /dev/null > "$dns_log" || true |
| 72 | + |
| 73 | +echo "" |
| 74 | +echo "Command exited with status: $cmd_exit" |
| 75 | + |
| 76 | +# Step 4: Replay the raw trace through fs_usage to get resolved paths |
| 77 | +echo "Processing trace..." |
| 78 | +fs_usage -w -R "$raw_trace" > "$raw_log" 2>&1 || true |
| 79 | + |
| 80 | +# Step 5: Filter to just our command |
| 81 | +grep -i "$cmd_basename" "$raw_log" > "$filtered_log" || true |
| 82 | + |
| 83 | +echo "" |
| 84 | + |
| 85 | +# Patterns for system read-only paths and relative path noise from fs_usage |
| 86 | +sys_pattern='^/(usr|System|Library|dev|var|private/(var|etc)|etc|Applications|AppleInternal)' |
| 87 | +noise_pattern='^/(\.\.|\.\./|Cellar|C\.|/)' |
| 88 | +# PATH search: any path ending in /<cmd_basename> is bash looking for the binary |
| 89 | +path_search_pattern="/${cmd_basename}$" |
| 90 | + |
| 91 | +# Generate summary |
| 92 | +{ |
| 93 | + echo "=== Trace Summary ===" |
| 94 | + echo "Command: $*" |
| 95 | + echo "Exit status: $cmd_exit" |
| 96 | + echo "Timestamp: $(date)" |
| 97 | + echo "Raw events: $(wc -l < "$raw_log"), $(wc -l < "$filtered_log") for $cmd_basename" |
| 98 | + echo "" |
| 99 | + |
| 100 | + echo "=== DNS Lookups (review: may include background noise) ===" |
| 101 | + if [[ -s "$dns_log" ]]; then |
| 102 | + grep -oE '[A-Z]+\? [^ ]+' "$dns_log" \ |
| 103 | + | sed 's/^.*? //' \ |
| 104 | + | sed 's/\.$//' \ |
| 105 | + | sort -u \ |
| 106 | + || echo "(none)" |
| 107 | + else |
| 108 | + echo "(none)" |
| 109 | + fi |
| 110 | + echo "" |
| 111 | + |
| 112 | + # Extract unique paths, filtering out: |
| 113 | + # - Relative path artifacts from fs_usage (../, /Cellar/, /C.UTF-8/, //) |
| 114 | + # - PATH search: any path ending in /<cmd_basename> (shell looking for binary) |
| 115 | + # - Our own trace artifacts (output dir, previous trace dirs) |
| 116 | + trace_dir_pattern="trace-${cmd_basename}-" |
| 117 | + |
| 118 | + all_paths=$(grep -oE '/[^ ]+' "$filtered_log" \ |
| 119 | + | grep -vE "$noise_pattern" \ |
| 120 | + | grep -vE "$path_search_pattern" \ |
| 121 | + | grep -v "$trace_dir_pattern" \ |
| 122 | + | sort -u \ |
| 123 | + | grep -v '^\.$' \ |
| 124 | + || true) |
| 125 | + |
| 126 | + # Split into user/app paths vs system read-only |
| 127 | + writable_paths=$(echo "$all_paths" | grep -vE "$sys_pattern" || true) |
| 128 | + system_paths=$(echo "$all_paths" | grep -E "$sys_pattern" || true) |
| 129 | + |
| 130 | + echo "=== Paths Accessed (user/app, review for sandbox allowWrite) ===" |
| 131 | + if [[ -n "$writable_paths" ]]; then |
| 132 | + echo "$writable_paths" |
| 133 | + else |
| 134 | + echo "(none)" |
| 135 | + fi |
| 136 | + echo "" |
| 137 | + |
| 138 | + echo "=== Paths Accessed (system, read-only, likely no config needed) ===" |
| 139 | + if [[ -n "$system_paths" ]]; then |
| 140 | + echo "$system_paths" |
| 141 | + else |
| 142 | + echo "(none)" |
| 143 | + fi |
| 144 | + echo "" |
| 145 | + |
| 146 | + echo "=== Network Activity ===" |
| 147 | + net_lines=$(grep -iE '^.*\s(socket|connect|socketpair)\s' "$filtered_log" || true) |
| 148 | + if [[ -n "$net_lines" ]]; then |
| 149 | + echo "$net_lines" | while read -r line; do |
| 150 | + op=$(echo "$line" | awk '{print $2}') |
| 151 | + case "$op" in |
| 152 | + socket) |
| 153 | + desc=$(echo "$line" | grep -oE '<[^>]+>' || true) |
| 154 | + case "$desc" in |
| 155 | + *AF_INET*SOCK_STREAM*) echo " socket: TCP (IPv4)" ;; |
| 156 | + *AF_INET6*SOCK_STREAM*) echo " socket: TCP (IPv6)" ;; |
| 157 | + *AF_INET*SOCK_DGRAM*) echo " socket: UDP (IPv4)" ;; |
| 158 | + *AF_INET6*SOCK_DGRAM*) echo " socket: UDP (IPv6)" ;; |
| 159 | + *AF_UNIX*) echo " socket: Unix domain" ;; |
| 160 | + *SOCK_DGRAM*) echo " socket: datagram (likely UDP)" ;; |
| 161 | + *SOCK_STREAM*) echo " socket: stream (likely TCP)" ;; |
| 162 | + *) echo " socket: $desc" ;; |
| 163 | + esac |
| 164 | + ;; |
| 165 | + connect) |
| 166 | + if echo "$line" | grep -q 'mDNSResponder'; then |
| 167 | + echo " connect: /var/run/mDNSResponder (DNS resolution)" |
| 168 | + elif echo "$line" | grep -qE '\[ *[0-9]+\]'; then |
| 169 | + # [errno] means in-progress (non-blocking) or failed |
| 170 | + errno=$(echo "$line" | grep -oE '\[ *[0-9]+\]') |
| 171 | + echo " connect: TCP to remote host (see DNS lookups) $errno" |
| 172 | + elif echo "$line" | grep -qE 'F=[0-9]+\s*$'; then |
| 173 | + echo " connect: completed" |
| 174 | + else |
| 175 | + echo " connect: (unknown target)" |
| 176 | + fi |
| 177 | + ;; |
| 178 | + socketpair) |
| 179 | + echo " socketpair: internal pipe between threads" |
| 180 | + ;; |
| 181 | + esac |
| 182 | + done |
| 183 | + else |
| 184 | + echo " (none)" |
| 185 | + fi |
| 186 | + |
| 187 | +} > "$summary_log" |
| 188 | + |
| 189 | +echo "Raw log: $raw_log ($(wc -l < "$raw_log") lines)" |
| 190 | +echo "Filtered: $filtered_log ($(wc -l < "$filtered_log") lines)" |
| 191 | +echo "Summary: $summary_log" |
| 192 | +echo "" |
| 193 | +cat "$summary_log" |
0 commit comments