fix(growth): security hardening — bun -e interpolation, pkill race, input validation (#3294)

Closes a batch of real security findings filed against growth.sh and reddit-fetch.ts.

growth.sh:
- Switch all four `bun -e "...${VAR}..."` sites to env-var passing
  (_VAR="..." bun -e 'process.env._VAR'), per .claude/rules/shell-scripts.md.
  Closes #3188, #3221, #3223.
- Spawn claude under `setsid` so it owns its own process group, and kill the
  group via `kill -SIG -PGID` instead of racing with pkill -P. Adds a numeric
  guard on CLAUDE_PID. Closes #3193, #3205.
- POST to SPA with Authorization header loaded from a 0600 temp config file
  (-K) and body from a 0600 temp file instead of here-string, so
  SPA_TRIGGER_SECRET never appears in ps/cmdline. Closes #3224.
- Drop dead REDDIT_JSON=$(cat ...) line.
- Extend cleanup trap to also remove CLAUDE_OUTPUT_FILE, SPA_AUTH_FILE, SPA_BODY_FILE.

reddit-fetch.ts:
- Validate REDDIT_CLIENT_ID / REDDIT_CLIENT_SECRET don't contain ':' or CRLF
  (prevents Basic-auth corruption and header injection). Closes #3198.
- Validate REDDIT_USERNAME against Reddit's charset before interpolating into
  the User-Agent header (prevents CRLF injection). Closes #3207.
- Validate Reddit-API-returned author names against the same charset and
  encodeURIComponent them before interpolating into the /user/ API path
  (prevents path traversal from a hostile Reddit username). Closes #3202.
This commit is contained in:
Ahmed Abushagur 2026-04-13 17:44:31 -07:00 committed by GitHub
parent ace5aa94d1
commit 2d7a23a460
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 82 additions and 40 deletions

View file

@ -33,7 +33,8 @@ cleanup() {
local exit_code=$?
log "Running cleanup (exit_code=${exit_code})..."
rm -f "${PROMPT_FILE:-}" "${REDDIT_DATA_FILE:-}" "${CLAUDE_STREAM_FILE:-}" 2>/dev/null || true
rm -f "${PROMPT_FILE:-}" "${REDDIT_DATA_FILE:-}" "${CLAUDE_STREAM_FILE:-}" \
"${CLAUDE_OUTPUT_FILE:-}" "${SPA_AUTH_FILE:-}" "${SPA_BODY_FILE:-}" 2>/dev/null || true
if [[ -n "${CLAUDE_PID:-}" ]] && kill -0 "${CLAUDE_PID}" 2>/dev/null; then
kill -TERM "${CLAUDE_PID}" 2>/dev/null || true
fi
@ -64,7 +65,7 @@ if ! bun run "${SCRIPT_DIR}/reddit-fetch.ts" > "${REDDIT_DATA_FILE}" 2>> "${LOG_
exit 1
fi
POST_COUNT=$(bun -e "const d=JSON.parse(await Bun.file('${REDDIT_DATA_FILE}').text()); console.log(d.postsScanned ?? d.posts?.length ?? 0)")
POST_COUNT=$(_DATA_FILE="${REDDIT_DATA_FILE}" bun -e 'const d=JSON.parse(await Bun.file(process.env._DATA_FILE).text()); console.log(d.postsScanned ?? d.posts?.length ?? 0)')
log "Phase 1 done: ${POST_COUNT} posts fetched"
# --- Phase 2: Score with Claude ---
@ -79,40 +80,50 @@ if [[ ! -f "$PROMPT_TEMPLATE" ]]; then
exit 1
fi
# Inject Reddit data into prompt template
REDDIT_JSON=$(cat "${REDDIT_DATA_FILE}")
# Use bun for safe substitution to avoid sed escaping issues with JSON
# Inject Reddit data into prompt template.
# Paths are passed via env vars — never interpolated into the JS string — per
# .claude/rules/shell-scripts.md ("Pass data to bun via environment variables").
DECISIONS_FILE="${HOME}/.config/spawn/growth-decisions.md"
bun -e "
import { existsSync } from 'node:fs';
const template = await Bun.file('${PROMPT_TEMPLATE}').text();
const data = await Bun.file('${REDDIT_DATA_FILE}').text();
const decisionsPath = '${DECISIONS_FILE}';
const decisions = existsSync(decisionsPath) ? await Bun.file(decisionsPath).text() : 'No past decisions yet.';
_TEMPLATE="${PROMPT_TEMPLATE}" \
_DATA_FILE="${REDDIT_DATA_FILE}" \
_DECISIONS="${DECISIONS_FILE}" \
_OUT="${PROMPT_FILE}" \
bun -e '
import { existsSync } from "node:fs";
const template = await Bun.file(process.env._TEMPLATE).text();
const data = await Bun.file(process.env._DATA_FILE).text();
const decisionsPath = process.env._DECISIONS;
const decisions = existsSync(decisionsPath) ? await Bun.file(decisionsPath).text() : "No past decisions yet.";
const result = template
.replace('REDDIT_DATA_PLACEHOLDER', data.trim())
.replace('DECISIONS_PLACEHOLDER', decisions.trim());
await Bun.write('${PROMPT_FILE}', result);
"
.replace("REDDIT_DATA_PLACEHOLDER", data.trim())
.replace("DECISIONS_PLACEHOLDER", decisions.trim());
await Bun.write(process.env._OUT, result);
'
log "Hard timeout: ${HARD_TIMEOUT}s"
# Run claude with stream-json to capture text (plain -p stdout is empty with extended thinking)
CLAUDE_STREAM_FILE=$(mktemp /tmp/growth-stream-XXXXXX.jsonl)
CLAUDE_OUTPUT_FILE=$(mktemp /tmp/growth-output-XXXXXX.txt)
claude -p - --model sonnet --output-format stream-json --verbose < "${PROMPT_FILE}" > "${CLAUDE_STREAM_FILE}" 2>> "${LOG_FILE}" &
# Run claude in its own session/process group (setsid) so we can signal the
# whole tree atomically via `kill -SIG -PGID` instead of racing with pkill -P.
setsid claude -p - --model sonnet --output-format stream-json --verbose \
< "${PROMPT_FILE}" > "${CLAUDE_STREAM_FILE}" 2>> "${LOG_FILE}" &
CLAUDE_PID=$!
log "Claude started (pid=${CLAUDE_PID})"
log "Claude started (pid=${CLAUDE_PID}, pgid=${CLAUDE_PID})"
# Kill claude and its full process tree
# Kill claude and its full process tree by signalling the process group.
# Guards against empty/non-numeric CLAUDE_PID (defensive — should never happen).
kill_claude() {
if [[ -z "${CLAUDE_PID:-}" ]] || ! [[ "${CLAUDE_PID}" =~ ^[0-9]+$ ]]; then
log "kill_claude: CLAUDE_PID is unset or non-numeric, skipping"
return
fi
if kill -0 "${CLAUDE_PID}" 2>/dev/null; then
log "Killing claude (pid=${CLAUDE_PID}) and its process tree"
pkill -TERM -P "${CLAUDE_PID}" 2>/dev/null || true
kill -TERM "${CLAUDE_PID}" 2>/dev/null || true
log "Killing claude process group (pgid=${CLAUDE_PID})"
kill -TERM -"${CLAUDE_PID}" 2>/dev/null || true
sleep 5
pkill -KILL -P "${CLAUDE_PID}" 2>/dev/null || true
kill -KILL "${CLAUDE_PID}" 2>/dev/null || true
kill -KILL -"${CLAUDE_PID}" 2>/dev/null || true
fi
}
@ -133,22 +144,24 @@ done
wait "${CLAUDE_PID}" 2>/dev/null
CLAUDE_EXIT=$?
# Extract text content from stream-json into plain text output file
bun -e "
const lines = (await Bun.file('${CLAUDE_STREAM_FILE}').text()).split('\n').filter(Boolean);
# Extract text content from stream-json into plain text output file.
_STREAM="${CLAUDE_STREAM_FILE}" \
_OUT="${CLAUDE_OUTPUT_FILE}" \
bun -e '
const lines = (await Bun.file(process.env._STREAM).text()).split("\n").filter(Boolean);
const texts = [];
for (const line of lines) {
try {
const ev = JSON.parse(line);
if (ev.type === 'assistant' && Array.isArray(ev.message?.content)) {
if (ev.type === "assistant" && Array.isArray(ev.message?.content)) {
for (const block of ev.message.content) {
if (block.type === 'text' && block.text) texts.push(block.text);
if (block.type === "text" && block.text) texts.push(block.text);
}
}
} catch {}
}
await Bun.write('${CLAUDE_OUTPUT_FILE}', texts.join('\n'));
" 2>> "${LOG_FILE}" || true
await Bun.write(process.env._OUT, texts.join("\n"));
' 2>> "${LOG_FILE}" || true
# Append Claude output to log
cat "${CLAUDE_OUTPUT_FILE}" >> "${LOG_FILE}" 2>/dev/null || true
@ -164,15 +177,15 @@ CANDIDATE_JSON=""
# Extract the last valid json:candidate block from Claude's output
if [[ -f "${CLAUDE_OUTPUT_FILE}" ]]; then
CANDIDATE_JSON=$(bun -e "
const text = await Bun.file('${CLAUDE_OUTPUT_FILE}').text();
const blocks = [...text.matchAll(/\`\`\`json:candidate\n([\s\S]*?)\n\`\`\`/g)];
let result = '';
CANDIDATE_JSON=$(_OUT="${CLAUDE_OUTPUT_FILE}" bun -e '
const text = await Bun.file(process.env._OUT).text();
const blocks = [...text.matchAll(/```json:candidate\n([\s\S]*?)\n```/g)];
let result = "";
for (const block of blocks) {
try { result = JSON.stringify(JSON.parse(block[1].trim())); } catch {}
}
if (result) console.log(result);
" 2>/dev/null)
' 2>/dev/null)
fi
if [[ -z "${CANDIDATE_JSON}" ]]; then
@ -182,15 +195,23 @@ fi
log "Candidate JSON: ${CANDIDATE_JSON}"
# POST to SPA if SPA_TRIGGER_URL is configured
# POST to SPA if SPA_TRIGGER_URL is configured.
# Secret + body are written to 0600 temp files so SPA_TRIGGER_SECRET never
# appears on the curl command line (visible via ps / /proc/*/cmdline).
if [[ -n "${SPA_TRIGGER_URL:-}" && -n "${SPA_TRIGGER_SECRET:-}" ]]; then
log "Posting candidate to SPA at ${SPA_TRIGGER_URL}/candidate"
SPA_AUTH_FILE=$(mktemp /tmp/growth-auth-XXXXXX.conf)
SPA_BODY_FILE=$(mktemp /tmp/growth-body-XXXXXX.json)
chmod 0600 "${SPA_AUTH_FILE}" "${SPA_BODY_FILE}"
printf 'header = "Authorization: Bearer %s"\n' "${SPA_TRIGGER_SECRET}" > "${SPA_AUTH_FILE}"
printf '%s' "${CANDIDATE_JSON}" > "${SPA_BODY_FILE}"
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
-X POST "${SPA_TRIGGER_URL}/candidate" \
-H "Authorization: Bearer ${SPA_TRIGGER_SECRET}" \
-K "${SPA_AUTH_FILE}" \
-H "Content-Type: application/json" \
--data-binary @- <<< "${CANDIDATE_JSON}" \
--data-binary @"${SPA_BODY_FILE}" \
--max-time 30) || HTTP_STATUS="000"
rm -f "${SPA_AUTH_FILE}" "${SPA_BODY_FILE}"
log "SPA response: HTTP ${HTTP_STATUS}"
else
log "SPA_TRIGGER_URL or SPA_TRIGGER_SECRET not set, skipping Slack notification"

View file

@ -15,13 +15,29 @@ const CLIENT_ID = process.env.REDDIT_CLIENT_ID ?? "";
const CLIENT_SECRET = process.env.REDDIT_CLIENT_SECRET ?? "";
const USERNAME = process.env.REDDIT_USERNAME ?? "";
const PASSWORD = process.env.REDDIT_PASSWORD ?? "";
const USER_AGENT = `spawn-growth:v1.0.0 (by /u/${USERNAME})`;
if (!CLIENT_ID || !CLIENT_SECRET || !USERNAME || !PASSWORD) {
console.error("Missing Reddit credentials");
process.exit(1);
}
// Validate credential format to prevent Basic-auth corruption and header
// injection (colons split the user:pass pair; CR/LF splits HTTP headers).
if (/[:\r\n]/.test(CLIENT_ID) || /[:\r\n]/.test(CLIENT_SECRET)) {
console.error("Invalid REDDIT_CLIENT_ID / REDDIT_CLIENT_SECRET: must not contain ':' or newlines");
process.exit(1);
}
// Reddit usernames are [A-Za-z0-9_-], 320 chars. Reject anything else so the
// User-Agent header can't be CRLF-injected via a hostile env var.
const REDDIT_USERNAME_RE = /^[A-Za-z0-9_-]{1,64}$/;
if (!REDDIT_USERNAME_RE.test(USERNAME)) {
console.error("Invalid REDDIT_USERNAME format");
process.exit(1);
}
const USER_AGENT = `spawn-growth:v1.0.0 (by /u/${USERNAME})`;
// Subreddits — shuffled each run so we don't always hit the same ones first
const SUBREDDITS = shuffle([
"Vibecoding",
@ -199,7 +215,12 @@ function extractPosts(data: unknown): Map<string, RedditPost> {
/** Fetch a user's recent comments. */
async function fetchUserComments(token: string, username: string): Promise<string[]> {
if (!username || username === "[deleted]") return [];
const data = await redditGet(token, `/user/${username}/comments?limit=25&sort=new`);
// The author field comes from the Reddit API and is therefore untrusted.
// Reject anything outside Reddit's real username charset to prevent path
// traversal into other API endpoints, and encodeURIComponent as defense in
// depth.
if (!REDDIT_USERNAME_RE.test(username)) return [];
const data = await redditGet(token, `/user/${encodeURIComponent(username)}/comments?limit=25&sort=new`);
if (!data || typeof data !== "object") return [];
const listing = data as Record<string, unknown>;
const listingData = listing.data as Record<string, unknown> | undefined;