From 16d0e75f497fc7d3cffc7e2c2f3dd9848652a29f Mon Sep 17 00:00:00 2001
From: A <258483684+la14-1@users.noreply.github.com>
Date: Sun, 5 Apr 2026 13:12:05 -0700
Subject: [PATCH] feat(growth): batch Reddit fetching for faster growth cycles
 (#3184)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Splits the growth agent into two phases:
1. reddit-fetch.ts — parallel batch fetch of all Reddit posts (~30s)
2. Claude scoring — pure text analysis of pre-fetched data (~30s)

Previously Claude made 56+ sequential tool calls through the LLM loop,
taking 5-10 minutes. Now the full cycle completes in ~1-2 minutes.

Also fixes empty stdout issue by using stream-json output format and
extracting text content from the event stream.

Co-authored-by: Claude <claude@anthropic.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../skills/setup-agent-team/growth-prompt.md  | 114 ++------
 .claude/skills/setup-agent-team/growth.sh     | 111 ++++----
 .../skills/setup-agent-team/reddit-fetch.ts   | 259 ++++++++++++++++++
 3 files changed, 346 insertions(+), 138 deletions(-)
 create mode 100644 .claude/skills/setup-agent-team/reddit-fetch.ts

diff --git a/.claude/skills/setup-agent-team/growth-prompt.md b/.claude/skills/setup-agent-team/growth-prompt.md
index 004f3534..effa7c3d 100644
--- a/.claude/skills/setup-agent-team/growth-prompt.md
+++ b/.claude/skills/setup-agent-team/growth-prompt.md
@@ -2,84 +2,19 @@ You are the Reddit growth discovery agent for Spawn (https://github.com/OpenRout
 
 Spawn lets developers spin up AI coding agents (Claude Code, Codex, Kilo Code, etc.) on cloud servers with one command: `curl -fsSL openrouter.ai/labs/spawn | bash`
 
-Your job: find the ONE best Reddit thread where someone is asking for something Spawn solves, verify the poster looks like a real developer who could use it, and output a summary. You do NOT post replies. You only find and report.
+Your job: from the pre-fetched Reddit posts below, find the ONE best thread where someone is asking for something Spawn solves, verify the poster looks like a real developer, and output a structured summary. You do NOT post replies. You only score and report.
 
-## Credentials
+**IMPORTANT: Do NOT use any tools.** All data is provided below. Your entire response should be plain text output — no bash commands, no file reads, no tool calls. Just analyze the data and respond with your findings.
 
-Reddit OAuth (script grant):
-- Client ID: `REDDIT_CLIENT_ID_PLACEHOLDER`
-- Client Secret: `REDDIT_CLIENT_SECRET_PLACEHOLDER`
-- Username: `REDDIT_USERNAME_PLACEHOLDER`
-- Password: `REDDIT_PASSWORD_PLACEHOLDER`
+## Pre-fetched Reddit data
 
-## Step 1: Authenticate with Reddit
+The following posts were fetched automatically. Each post includes the title, selftext, subreddit, engagement stats, and the poster's recent comment history.
 
-Get an OAuth token using the script grant type:
-
-```bash
-bun -e "
-const auth = Buffer.from('REDDIT_CLIENT_ID_PLACEHOLDER:REDDIT_CLIENT_SECRET_PLACEHOLDER').toString('base64');
-const res = await fetch('https://www.reddit.com/api/v1/access_token', {
-  method: 'POST',
-  headers: {
-    'Authorization': 'Basic ' + auth,
-    'Content-Type': 'application/x-www-form-urlencoded',
-    'User-Agent': 'spawn-growth:v1.0.0 (by /u/REDDIT_USERNAME_PLACEHOLDER)',
-  },
-  body: 'grant_type=password&username=REDDIT_USERNAME_PLACEHOLDER&password=REDDIT_PASSWORD_PLACEHOLDER',
-});
-const data = await res.json();
-console.log(JSON.stringify(data));
-"
+```json
+REDDIT_DATA_PLACEHOLDER
 ```
 
-Save the `access_token`. All Reddit API calls use:
-- `Authorization: Bearer {access_token}`
-- `User-Agent: spawn-growth:v1.0.0 (by /u/REDDIT_USERNAME_PLACEHOLDER)`
-- Base URL: `https://oauth.reddit.com`
-
-## Step 2: Search for "feature ask" threads
-
-You are looking for a very specific type of post: someone asking how to do something that Spawn directly solves. Not general AI discussion. Not news. Not opinions. A concrete ask.
-
-**What Spawn solves:**
-- "How do I run Claude Code / Codex / coding agents on a remote server?"
-- "What's the cheapest way to get a cloud VM for AI coding?"
-- "How do I set up a dev environment with AI tools on Hetzner/AWS/GCP?"
-- "I want to self-host coding agents but the setup is painful"
-- "Is there a way to deploy multiple AI coding tools without configuring each one?"
-
-**Subreddits to scan:**
-- r/Vibecoding
-- r/AIAgents
-- r/LocalLLaMA
-- r/ChatGPT
-- r/SelfHosted
-- r/programming
-- r/commandline
-- r/devops
-
-**Search queries** (run against each subreddit, wait 1s between calls):
-- "coding agent cloud"
-- "coding agent server"
-- "self host AI coding"
-- "remote dev AI"
-- "vibe coding setup"
-- "deploy coding agent"
-- "cloud dev environment AI"
-
-```
-GET https://oauth.reddit.com/r/{subreddit}/search?q={query}&sort=new&t=week&restrict_sr=true&limit=25
-```
-
-Also check for direct mentions:
-```
-GET https://oauth.reddit.com/search?q=openrouter+spawn&sort=new&t=week&limit=25
-```
-
-Collect all unique posts. Deduplicate by post ID.
-
-## Step 3: Score for relevance
+## Step 1: Score for relevance
 
 For each post, score it on these criteria:
 
@@ -89,6 +24,13 @@ For each post, score it on these criteria:
 - 1: Tangentially related discussion
 - 0: News, opinion, or not a question
 
+**What Spawn solves (use this to judge relevance):**
+- "How do I run Claude Code / Codex / coding agents on a remote server?"
+- "What's the cheapest way to get a cloud VM for AI coding?"
+- "How do I set up a dev environment with AI tools on Hetzner/AWS/GCP?"
+- "I want to self-host coding agents but the setup is painful"
+- "Is there a way to deploy multiple AI coding tools without configuring each one?"
+
 **Is the thread alive?** (0-2 points)
 - 2: Posted in last 48h with 3+ comments or 5+ upvotes
 - 1: Posted in last week, some engagement
@@ -102,13 +44,9 @@ For each post, score it on these criteria:
 
 Only consider posts scoring 7+ out of 10.
 
-## Step 4: Qualify the poster
+## Step 2: Qualify the poster
 
-For the top candidates (scored 7+), check if the poster is a real developer who could actually use Spawn. Fetch their recent comments:
-
-```
-GET https://oauth.reddit.com/user/{username}/comments?limit=25&sort=new
-```
+For the top candidates (scored 7+), check the poster's comment history (provided in `authorComments`).
 
 **Positive signals (look for ANY of these):**
 - Mentions cloud providers (AWS, Hetzner, GCP, DigitalOcean, Azure, Vultr, Linode)
@@ -119,18 +57,17 @@ GET https://oauth.reddit.com/user/{username}/comments?limit=25&sort=new
 - Mentions paying for services or having accounts
 
 **Disqualifying signals:**
-- Account is < 30 days old (likely bot/throwaway)
-- Only posts in non-tech subreddits
+- Account only posts in non-tech subreddits
 - Posting history suggests they're not a developer
 - Already uses Spawn or OpenRouter (check for mentions)
 
-## Step 5: Pick the ONE best candidate
+## Step 3: Pick the ONE best candidate
 
 From all qualified, high-scoring posts, pick exactly 1. The best one. If nothing scores 7+ after qualification, that's fine. Say "no candidates this cycle" and stop.
 
-## Step 6: Output summary
+## Step 4: Output summary
 
-Print a structured summary of what you found. This goes to the log file.
+Print a structured summary of what you found.
 
 **If a candidate was found:**
 
@@ -185,7 +122,7 @@ Draft reply:
 
 ```
 === GROWTH SCAN COMPLETE ===
-Posts scanned: {total}
+Posts scanned: {total from postsScanned field}
 Scored 7+: 0
 No candidates this cycle.
 === END SCAN ===
@@ -202,11 +139,6 @@ And the machine-readable JSON:
 ## Safety rules
 
 1. **Pick exactly 1 candidate per cycle.** No more.
-2. **Do NOT post replies to Reddit.** You only scan and report.
+2. **Do NOT post replies to Reddit.** You only score and report.
 3. **No candidates is a valid outcome.** Don't force bad matches.
-4. **Respect Reddit rate limits.** 1 second between API calls minimum.
-5. **Don't surface threads from Spawn/OpenRouter team members.**
-
-## Time budget
-
-Complete within 25 minutes. If still searching at 20 minutes, stop and report what you have.
+4. **Don't surface threads from Spawn/OpenRouter team members.**
diff --git a/.claude/skills/setup-agent-team/growth.sh b/.claude/skills/setup-agent-team/growth.sh
index b37845d2..0d67fbce 100644
--- a/.claude/skills/setup-agent-team/growth.sh
+++ b/.claude/skills/setup-agent-team/growth.sh
@@ -2,11 +2,9 @@
 set -eo pipefail
 
 # Reddit Growth Agent — Single Cycle (Discovery Only)
-# Triggered by trigger-server.ts via GitHub Actions (daily)
-#
-# Scans Reddit for "feature ask" threads that Spawn solves,
-# qualifies the poster, picks the 1 best candidate, and outputs
-# a summary to the log. Does NOT post replies or notify externally.
+# Phase 1: Batch-fetch Reddit posts via reddit-fetch.ts (fast, parallel)
+# Phase 2: Pass results to Claude for scoring/qualification (no tool use)
+# Phase 3: POST candidate to SPA for Slack notification
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
@@ -14,11 +12,11 @@ cd "${REPO_ROOT}"
 
 SPAWN_REASON="${SPAWN_REASON:-manual}"
 TEAM_NAME="spawn-growth"
-CYCLE_TIMEOUT=1800   # 30 min
-HARD_TIMEOUT=2400    # 40 min grace
+HARD_TIMEOUT=300   # 5 min (scoring is fast, no tool use)
 
 LOG_FILE="${REPO_ROOT}/.docs/${TEAM_NAME}.log"
 PROMPT_FILE=""
+REDDIT_DATA_FILE=""
 
 # Ensure .docs directory exists
 mkdir -p "$(dirname "${LOG_FILE}")"
@@ -27,22 +25,6 @@ log() {
     echo "[$(date +'%Y-%m-%d %H:%M:%S')] [growth] $*" | tee -a "${LOG_FILE}"
 }
 
-# --- Safe sed substitution (escapes sed metacharacters in replacement) ---
-safe_substitute() {
-    local placeholder="$1"
-    local value="$2"
-    local file="$3"
-    if printf '%s' "$value" | grep -qP '\x01'; then
-        log "ERROR: safe_substitute value contains illegal \\x01 character"
-        return 1
-    fi
-    local escaped
-    escaped=$(printf '%s' "$value" | sed -e 's/[\\]/\\&/g' -e 's/[&]/\\&/g')
-    escaped="${escaped//$'\n'/\\$'\n'}"
-    sed -i.bak "s$(printf '\x01')${placeholder}$(printf '\x01')${escaped}$(printf '\x01')g" "$file"
-    rm -f "${file}.bak"
-}
-
 # Cleanup function
 cleanup() {
     if [[ -n "${_cleanup_done:-}" ]]; then return; fi
@@ -51,7 +33,7 @@ cleanup() {
     local exit_code=$?
     log "Running cleanup (exit_code=${exit_code})..."
 
-    rm -f "${PROMPT_FILE:-}" 2>/dev/null || true
+    rm -f "${PROMPT_FILE:-}" "${REDDIT_DATA_FILE:-}" "${CLAUDE_STREAM_FILE:-}" 2>/dev/null || true
     if [[ -n "${CLAUDE_PID:-}" ]] && kill -0 "${CLAUDE_PID}" 2>/dev/null; then
         kill -TERM "${CLAUDE_PID}" 2>/dev/null || true
     fi
@@ -65,19 +47,28 @@ trap cleanup EXIT SIGTERM SIGINT
 log "=== Starting growth cycle ==="
 log "Working directory: ${REPO_ROOT}"
 log "Reason: ${SPAWN_REASON}"
-log "Timeout: ${CYCLE_TIMEOUT}s"
 
 # Fetch latest refs
 log "Fetching latest refs..."
 git fetch --prune origin 2>&1 | tee -a "${LOG_FILE}" || true
 git reset --hard origin/main 2>&1 | tee -a "${LOG_FILE}" || true
 
-# Update Claude Code to latest version
-log "Updating Claude Code..."
-claude update --yes 2>&1 | tee -a "${LOG_FILE}" || log "WARNING: Claude Code update failed (continuing with current version)"
+# --- Phase 1: Batch fetch Reddit posts ---
+log "Phase 1: Fetching Reddit posts..."
 
-# Prepare prompt
-log "Launching growth cycle..."
+REDDIT_DATA_FILE=$(mktemp /tmp/growth-reddit-XXXXXX.json)
+chmod 0600 "${REDDIT_DATA_FILE}"
+
+if ! bun run "${SCRIPT_DIR}/reddit-fetch.ts" > "${REDDIT_DATA_FILE}" 2>> "${LOG_FILE}"; then
+    log "ERROR: reddit-fetch.ts failed"
+    exit 1
+fi
+
+POST_COUNT=$(bun -e "const d=JSON.parse(await Bun.file('${REDDIT_DATA_FILE}').text()); console.log(d.postsScanned ?? d.posts?.length ?? 0)")
+log "Phase 1 done: ${POST_COUNT} posts fetched"
+
+# --- Phase 2: Score with Claude ---
+log "Phase 2: Scoring with Claude..."
 
 PROMPT_FILE=$(mktemp /tmp/growth-prompt-XXXXXX.md)
 chmod 0600 "${PROMPT_FILE}"
@@ -88,18 +79,22 @@ if [[ ! -f "$PROMPT_TEMPLATE" ]]; then
     exit 1
 fi
 
-cat "$PROMPT_TEMPLATE" > "${PROMPT_FILE}"
-
-# Substitute env vars into prompt
-safe_substitute "REDDIT_CLIENT_ID_PLACEHOLDER" "${REDDIT_CLIENT_ID:-}" "${PROMPT_FILE}"
-safe_substitute "REDDIT_CLIENT_SECRET_PLACEHOLDER" "${REDDIT_CLIENT_SECRET:-}" "${PROMPT_FILE}"
-safe_substitute "REDDIT_USERNAME_PLACEHOLDER" "${REDDIT_USERNAME:-}" "${PROMPT_FILE}"
-safe_substitute "REDDIT_PASSWORD_PLACEHOLDER" "${REDDIT_PASSWORD:-}" "${PROMPT_FILE}"
+# Inject Reddit data into prompt template
+REDDIT_JSON=$(cat "${REDDIT_DATA_FILE}")
+# Use bun for safe substitution to avoid sed escaping issues with JSON
+bun -e "
+const template = await Bun.file('${PROMPT_TEMPLATE}').text();
+const data = await Bun.file('${REDDIT_DATA_FILE}').text();
+const result = template.replace('REDDIT_DATA_PLACEHOLDER', data.trim());
+await Bun.write('${PROMPT_FILE}', result);
+"
 
 log "Hard timeout: ${HARD_TIMEOUT}s"
 
-# Run claude in background
-claude -p - --dangerously-skip-permissions --model sonnet < "${PROMPT_FILE}" >> "${LOG_FILE}" 2>&1 &
+# Run claude with stream-json to capture text (plain -p stdout is empty with extended thinking)
+CLAUDE_STREAM_FILE=$(mktemp /tmp/growth-stream-XXXXXX.jsonl)
+CLAUDE_OUTPUT_FILE=$(mktemp /tmp/growth-output-XXXXXX.txt)
+claude -p - --model sonnet --output-format stream-json --verbose < "${PROMPT_FILE}" > "${CLAUDE_STREAM_FILE}" 2>> "${LOG_FILE}" &
 CLAUDE_PID=$!
 log "Claude started (pid=${CLAUDE_PID})"
 
@@ -119,7 +114,7 @@ kill_claude() {
 WALL_START=$(date +%s)
 
 while kill -0 "${CLAUDE_PID}" 2>/dev/null; do
-    sleep 30
+    sleep 10
     WALL_ELAPSED=$(( $(date +%s) - WALL_START ))
 
     if [[ "${WALL_ELAPSED}" -ge "${HARD_TIMEOUT}" ]]; then
@@ -132,23 +127,43 @@ done
 wait "${CLAUDE_PID}" 2>/dev/null
 CLAUDE_EXIT=$?
 
+# Extract text content from stream-json into plain text output file
+bun -e "
+const lines = (await Bun.file('${CLAUDE_STREAM_FILE}').text()).split('\n').filter(Boolean);
+const texts = [];
+for (const line of lines) {
+  try {
+    const ev = JSON.parse(line);
+    if (ev.type === 'assistant' && Array.isArray(ev.message?.content)) {
+      for (const block of ev.message.content) {
+        if (block.type === 'text' && block.text) texts.push(block.text);
+      }
+    }
+  } catch {}
+}
+await Bun.write('${CLAUDE_OUTPUT_FILE}', texts.join('\n'));
+" 2>> "${LOG_FILE}" || true
+
+# Append Claude output to log
+cat "${CLAUDE_OUTPUT_FILE}" >> "${LOG_FILE}" 2>/dev/null || true
+
 if [[ "${CLAUDE_EXIT}" -eq 0 ]]; then
-    log "Cycle completed successfully"
+    log "Phase 2 done: scoring completed"
 else
-    log "Cycle failed (exit_code=${CLAUDE_EXIT})"
+    log "Phase 2 failed (exit_code=${CLAUDE_EXIT})"
 fi
 
-# --- Extract candidate JSON and POST to SPA ---
+# --- Phase 3: Extract candidate and POST to SPA ---
 CANDIDATE_JSON=""
 
-# Extract the json:candidate block from the log (between ```json:candidate and ```)
-if [[ -f "${LOG_FILE}" ]]; then
-    CANDIDATE_JSON=$(sed -n '/^```json:candidate$/,/^```$/{/^```/d;p;}' "${LOG_FILE}" | tail -1)
+# Extract the json:candidate block from Claude's output
+if [[ -f "${CLAUDE_OUTPUT_FILE}" ]]; then
+    CANDIDATE_JSON=$(sed -n '/^```json:candidate$/,/^```$/{/^```/d;p;}' "${CLAUDE_OUTPUT_FILE}" | tail -1)
 fi
 
 if [[ -z "${CANDIDATE_JSON}" ]]; then
     log "No json:candidate block found in output"
-    CANDIDATE_JSON='{"found":false}'
+    CANDIDATE_JSON="{\"found\":false,\"postsScanned\":${POST_COUNT}}"
 fi
 
 log "Candidate JSON: ${CANDIDATE_JSON}"
@@ -166,3 +181,5 @@ if [[ -n "${SPA_TRIGGER_URL:-}" && -n "${SPA_TRIGGER_SECRET:-}" ]]; then
 else
     log "SPA_TRIGGER_URL or SPA_TRIGGER_SECRET not set, skipping Slack notification"
 fi
+
+rm -f "${CLAUDE_OUTPUT_FILE}" "${CLAUDE_STREAM_FILE}" 2>/dev/null || true
diff --git a/.claude/skills/setup-agent-team/reddit-fetch.ts b/.claude/skills/setup-agent-team/reddit-fetch.ts
new file mode 100644
index 00000000..ee55e155
--- /dev/null
+++ b/.claude/skills/setup-agent-team/reddit-fetch.ts
@@ -0,0 +1,259 @@
+/**
+ * Reddit Fetch — Batch scanner for the growth agent.
+ *
+ * Authenticates with Reddit, fires all subreddit×query searches concurrently,
+ * deduplicates, pre-fetches poster comment histories, and outputs JSON to stdout.
+ *
+ * Env vars: REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USERNAME, REDDIT_PASSWORD
+ */
+
+const CLIENT_ID = process.env.REDDIT_CLIENT_ID ?? "";
+const CLIENT_SECRET = process.env.REDDIT_CLIENT_SECRET ?? "";
+const USERNAME = process.env.REDDIT_USERNAME ?? "";
+const PASSWORD = process.env.REDDIT_PASSWORD ?? "";
+const USER_AGENT = `spawn-growth:v1.0.0 (by /u/${USERNAME})`;
+
+if (!CLIENT_ID || !CLIENT_SECRET || !USERNAME || !PASSWORD) {
+  console.error("Missing Reddit credentials");
+  process.exit(1);
+}
+
+const SUBREDDITS = [
+  "Vibecoding",
+  "AIAgents",
+  "LocalLLaMA",
+  "ChatGPT",
+  "SelfHosted",
+  "programming",
+  "commandline",
+  "devops",
+];
+
+const QUERIES = [
+  "coding agent cloud",
+  "coding agent server",
+  "self host AI coding",
+  "remote dev AI",
+  "vibe coding setup",
+  "deploy coding agent",
+  "cloud dev environment AI",
+];
+
+const MAX_CONCURRENT = 5;
+
+interface RedditPost {
+  title: string;
+  permalink: string;
+  subreddit: string;
+  postId: string;
+  score: number;
+  numComments: number;
+  createdUtc: number;
+  selftext: string;
+  authorName: string;
+  authorComments: string[];
+}
+
+/** Simple concurrency limiter. */
+async function pooled<T>(tasks: Array<() => Promise<T>>, limit: number): Promise<T[]> {
+  const results: T[] = [];
+  let idx = 0;
+
+  async function worker(): Promise<void> {
+    while (idx < tasks.length) {
+      const i = idx++;
+      results[i] = await tasks[i]();
+    }
+  }
+
+  await Promise.all(
+    Array.from(
+      {
+        length: Math.min(limit, tasks.length),
+      },
+      () => worker(),
+    ),
+  );
+  return results;
+}
+
+/** Authenticate and get bearer token. */
+async function getToken(): Promise<string> {
+  const auth = Buffer.from(`${CLIENT_ID}:${CLIENT_SECRET}`).toString("base64");
+  const res = await fetch("https://www.reddit.com/api/v1/access_token", {
+    method: "POST",
+    headers: {
+      Authorization: `Basic ${auth}`,
+      "Content-Type": "application/x-www-form-urlencoded",
+      "User-Agent": USER_AGENT,
+    },
+    body: `grant_type=password&username=${encodeURIComponent(USERNAME)}&password=${encodeURIComponent(PASSWORD)}`,
+  });
+  const data = (await res.json()) as Record<string, unknown>;
+  const token = typeof data.access_token === "string" ? data.access_token : "";
+  if (!token) {
+    console.error("Reddit auth failed:", JSON.stringify(data));
+    process.exit(1);
+  }
+  return token;
+}
+
+/** Fetch a Reddit API endpoint with auth. */
+async function redditGet(token: string, path: string): Promise<unknown> {
+  const res = await fetch(`https://oauth.reddit.com${path}`, {
+    headers: {
+      Authorization: `Bearer ${token}`,
+      "User-Agent": USER_AGENT,
+    },
+  });
+  if (!res.ok) {
+    console.error(`Reddit API ${res.status}: ${path}`);
+    return null;
+  }
+  return res.json();
+}
+
+/** Extract posts from a Reddit listing response. */
+function extractPosts(data: unknown): Map<string, RedditPost> {
+  const posts = new Map<string, RedditPost>();
+  if (!data || typeof data !== "object") return posts;
+  const listing = data as Record<string, unknown>;
+  const listingData = listing.data as Record<string, unknown> | undefined;
+  const children = listingData?.children;
+  if (!Array.isArray(children)) return posts;
+
+  for (const child of children) {
+    const c = child as Record<string, unknown>;
+    const d = c.data as Record<string, unknown> | undefined;
+    if (!d) continue;
+    const id = String(d.name ?? "");
+    if (!id || posts.has(id)) continue;
+
+    posts.set(id, {
+      title: String(d.title ?? ""),
+      permalink: String(d.permalink ?? ""),
+      subreddit: String(d.subreddit ?? ""),
+      postId: id,
+      score: Number(d.score ?? 0),
+      numComments: Number(d.num_comments ?? 0),
+      createdUtc: Number(d.created_utc ?? 0),
+      selftext: String(d.selftext ?? "").slice(0, 2000),
+      authorName: String(d.author ?? ""),
+      authorComments: [],
+    });
+  }
+  return posts;
+}
+
+/** Fetch a user's recent comments. */
+async function fetchUserComments(token: string, username: string): Promise<string[]> {
+  if (!username || username === "[deleted]") return [];
+  const data = await redditGet(token, `/user/${username}/comments?limit=25&sort=new`);
+  if (!data || typeof data !== "object") return [];
+  const listing = data as Record<string, unknown>;
+  const listingData = listing.data as Record<string, unknown> | undefined;
+  const children = listingData?.children;
+  if (!Array.isArray(children)) return [];
+
+  return children
+    .map((child) => {
+      const c = child as Record<string, unknown>;
+      const d = c.data as Record<string, unknown> | undefined;
+      const body = String(d?.body ?? "").slice(0, 500);
+      const sub = String(d?.subreddit ?? "");
+      return sub ? `[r/${sub}] ${body}` : body;
+    })
+    .filter(Boolean);
+}
+
+async function main(): Promise<void> {
+  const token = await getToken();
+  console.error("[reddit-fetch] Authenticated");
+
+  // Build all search tasks
+  const searchTasks: Array<() => Promise<Map<string, RedditPost>>> = [];
+
+  for (const sub of SUBREDDITS) {
+    for (const query of QUERIES) {
+      const q = encodeURIComponent(query);
+      searchTasks.push(async () => {
+        const data = await redditGet(token, `/r/${sub}/search?q=${q}&sort=new&t=week&restrict_sr=true&limit=25`);
+        return extractPosts(data);
+      });
+    }
+  }
+
+  // Direct mention search
+  searchTasks.push(async () => {
+    const data = await redditGet(token, "/search?q=openrouter+spawn&sort=new&t=week&limit=25");
+    return extractPosts(data);
+  });
+
+  console.error(`[reddit-fetch] Firing ${searchTasks.length} searches (concurrency=${MAX_CONCURRENT})...`);
+
+  const allResults = await pooled(searchTasks, MAX_CONCURRENT);
+
+  // Merge and deduplicate
+  const allPosts = new Map<string, RedditPost>();
+  for (const resultMap of allResults) {
+    for (const [id, post] of resultMap) {
+      if (!allPosts.has(id)) {
+        allPosts.set(id, post);
+      }
+    }
+  }
+
+  console.error(`[reddit-fetch] Found ${allPosts.size} unique posts`);
+
+  // Pre-fetch poster comments for posts with some engagement
+  const postsArray = [
+    ...allPosts.values(),
+  ];
+  const worthQualifying = postsArray.filter((p) => p.score >= 2 || p.numComments >= 2);
+  const uniqueAuthors = [
+    ...new Set(worthQualifying.map((p) => p.authorName)),
+  ];
+
+  console.error(`[reddit-fetch] Fetching comments for ${uniqueAuthors.length} authors...`);
+
+  const commentMap = new Map<string, string[]>();
+  const commentTasks = uniqueAuthors.map((author) => async () => {
+    const comments = await fetchUserComments(token, author);
+    commentMap.set(author, comments);
+  });
+  await pooled(commentTasks, MAX_CONCURRENT);
+
+  // Attach comments to posts
+  for (const post of postsArray) {
+    post.authorComments = commentMap.get(post.authorName) ?? [];
+  }
+
+  // Filter to posts with some engagement, sort by score descending
+  const filtered = postsArray.filter((p) => p.score >= 2 || p.numComments >= 2);
+  filtered.sort((a, b) => b.score - a.score);
+
+  // Output JSON to stdout (trimmed to keep prompt size reasonable)
+  const output = {
+    posts: filtered.map((p) => ({
+      title: p.title,
+      permalink: p.permalink,
+      subreddit: p.subreddit,
+      postId: p.postId,
+      score: p.score,
+      numComments: p.numComments,
+      createdUtc: p.createdUtc,
+      selftext: p.selftext.slice(0, 500),
+      authorName: p.authorName,
+      authorComments: p.authorComments.slice(0, 5).map((c) => c.slice(0, 200)),
+    })),
+    postsScanned: allPosts.size,
+  };
+
+  console.log(JSON.stringify(output));
+  console.error(`[reddit-fetch] Done — ${postsArray.length} posts output`);
+}
+
+main().catch((err) => {
+  console.error("Fatal:", err);
+  process.exit(1);
+});