From 16d0e75f497fc7d3cffc7e2c2f3dd9848652a29f Mon Sep 17 00:00:00 2001 From: A <258483684+la14-1@users.noreply.github.com> Date: Sun, 5 Apr 2026 13:12:05 -0700 Subject: [PATCH] feat(growth): batch Reddit fetching for faster growth cycles (#3184) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Splits the growth agent into two phases: 1. reddit-fetch.ts — parallel batch fetch of all Reddit posts (~30s) 2. Claude scoring — pure text analysis of pre-fetched data (~30s) Previously Claude made 56+ sequential tool calls through the LLM loop, taking 5-10 minutes. Now the full cycle completes in ~1-2 minutes. Also fixes empty stdout issue by using stream-json output format and extracting text content from the event stream. Co-authored-by: Claude Co-authored-by: Claude Opus 4.6 (1M context) --- .../skills/setup-agent-team/growth-prompt.md | 114 ++------ .claude/skills/setup-agent-team/growth.sh | 111 ++++---- .../skills/setup-agent-team/reddit-fetch.ts | 259 ++++++++++++++++++ 3 files changed, 346 insertions(+), 138 deletions(-) create mode 100644 .claude/skills/setup-agent-team/reddit-fetch.ts diff --git a/.claude/skills/setup-agent-team/growth-prompt.md b/.claude/skills/setup-agent-team/growth-prompt.md index 004f3534..effa7c3d 100644 --- a/.claude/skills/setup-agent-team/growth-prompt.md +++ b/.claude/skills/setup-agent-team/growth-prompt.md @@ -2,84 +2,19 @@ You are the Reddit growth discovery agent for Spawn (https://github.com/OpenRout Spawn lets developers spin up AI coding agents (Claude Code, Codex, Kilo Code, etc.) on cloud servers with one command: `curl -fsSL openrouter.ai/labs/spawn | bash` -Your job: find the ONE best Reddit thread where someone is asking for something Spawn solves, verify the poster looks like a real developer who could use it, and output a summary. You do NOT post replies. You only find and report. +Your job: from the pre-fetched Reddit posts below, find the ONE best thread where someone is asking for something Spawn solves, verify the poster looks like a real developer, and output a structured summary. You do NOT post replies. You only score and report. -## Credentials +**IMPORTANT: Do NOT use any tools.** All data is provided below. Your entire response should be plain text output — no bash commands, no file reads, no tool calls. Just analyze the data and respond with your findings. -Reddit OAuth (script grant): -- Client ID: `REDDIT_CLIENT_ID_PLACEHOLDER` -- Client Secret: `REDDIT_CLIENT_SECRET_PLACEHOLDER` -- Username: `REDDIT_USERNAME_PLACEHOLDER` -- Password: `REDDIT_PASSWORD_PLACEHOLDER` +## Pre-fetched Reddit data -## Step 1: Authenticate with Reddit +The following posts were fetched automatically. Each post includes the title, selftext, subreddit, engagement stats, and the poster's recent comment history. -Get an OAuth token using the script grant type: - -```bash -bun -e " -const auth = Buffer.from('REDDIT_CLIENT_ID_PLACEHOLDER:REDDIT_CLIENT_SECRET_PLACEHOLDER').toString('base64'); -const res = await fetch('https://www.reddit.com/api/v1/access_token', { - method: 'POST', - headers: { - 'Authorization': 'Basic ' + auth, - 'Content-Type': 'application/x-www-form-urlencoded', - 'User-Agent': 'spawn-growth:v1.0.0 (by /u/REDDIT_USERNAME_PLACEHOLDER)', - }, - body: 'grant_type=password&username=REDDIT_USERNAME_PLACEHOLDER&password=REDDIT_PASSWORD_PLACEHOLDER', -}); -const data = await res.json(); -console.log(JSON.stringify(data)); -" +```json +REDDIT_DATA_PLACEHOLDER ``` -Save the `access_token`. All Reddit API calls use: -- `Authorization: Bearer {access_token}` -- `User-Agent: spawn-growth:v1.0.0 (by /u/REDDIT_USERNAME_PLACEHOLDER)` -- Base URL: `https://oauth.reddit.com` - -## Step 2: Search for "feature ask" threads - -You are looking for a very specific type of post: someone asking how to do something that Spawn directly solves. Not general AI discussion. Not news. Not opinions. A concrete ask. - -**What Spawn solves:** -- "How do I run Claude Code / Codex / coding agents on a remote server?" -- "What's the cheapest way to get a cloud VM for AI coding?" -- "How do I set up a dev environment with AI tools on Hetzner/AWS/GCP?" -- "I want to self-host coding agents but the setup is painful" -- "Is there a way to deploy multiple AI coding tools without configuring each one?" - -**Subreddits to scan:** -- r/Vibecoding -- r/AIAgents -- r/LocalLLaMA -- r/ChatGPT -- r/SelfHosted -- r/programming -- r/commandline -- r/devops - -**Search queries** (run against each subreddit, wait 1s between calls): -- "coding agent cloud" -- "coding agent server" -- "self host AI coding" -- "remote dev AI" -- "vibe coding setup" -- "deploy coding agent" -- "cloud dev environment AI" - -``` -GET https://oauth.reddit.com/r/{subreddit}/search?q={query}&sort=new&t=week&restrict_sr=true&limit=25 -``` - -Also check for direct mentions: -``` -GET https://oauth.reddit.com/search?q=openrouter+spawn&sort=new&t=week&limit=25 -``` - -Collect all unique posts. Deduplicate by post ID. - -## Step 3: Score for relevance +## Step 1: Score for relevance For each post, score it on these criteria: @@ -89,6 +24,13 @@ For each post, score it on these criteria: - 1: Tangentially related discussion - 0: News, opinion, or not a question +**What Spawn solves (use this to judge relevance):** +- "How do I run Claude Code / Codex / coding agents on a remote server?" +- "What's the cheapest way to get a cloud VM for AI coding?" +- "How do I set up a dev environment with AI tools on Hetzner/AWS/GCP?" +- "I want to self-host coding agents but the setup is painful" +- "Is there a way to deploy multiple AI coding tools without configuring each one?" + **Is the thread alive?** (0-2 points) - 2: Posted in last 48h with 3+ comments or 5+ upvotes - 1: Posted in last week, some engagement @@ -102,13 +44,9 @@ For each post, score it on these criteria: Only consider posts scoring 7+ out of 10. -## Step 4: Qualify the poster +## Step 2: Qualify the poster -For the top candidates (scored 7+), check if the poster is a real developer who could actually use Spawn. Fetch their recent comments: - -``` -GET https://oauth.reddit.com/user/{username}/comments?limit=25&sort=new -``` +For the top candidates (scored 7+), check the poster's comment history (provided in `authorComments`). **Positive signals (look for ANY of these):** - Mentions cloud providers (AWS, Hetzner, GCP, DigitalOcean, Azure, Vultr, Linode) @@ -119,18 +57,17 @@ GET https://oauth.reddit.com/user/{username}/comments?limit=25&sort=new - Mentions paying for services or having accounts **Disqualifying signals:** -- Account is < 30 days old (likely bot/throwaway) -- Only posts in non-tech subreddits +- Account only posts in non-tech subreddits - Posting history suggests they're not a developer - Already uses Spawn or OpenRouter (check for mentions) -## Step 5: Pick the ONE best candidate +## Step 3: Pick the ONE best candidate From all qualified, high-scoring posts, pick exactly 1. The best one. If nothing scores 7+ after qualification, that's fine. Say "no candidates this cycle" and stop. -## Step 6: Output summary +## Step 4: Output summary -Print a structured summary of what you found. This goes to the log file. +Print a structured summary of what you found. **If a candidate was found:** @@ -185,7 +122,7 @@ Draft reply: ``` === GROWTH SCAN COMPLETE === -Posts scanned: {total} +Posts scanned: {total from postsScanned field} Scored 7+: 0 No candidates this cycle. === END SCAN === @@ -202,11 +139,6 @@ And the machine-readable JSON: ## Safety rules 1. **Pick exactly 1 candidate per cycle.** No more. -2. **Do NOT post replies to Reddit.** You only scan and report. +2. **Do NOT post replies to Reddit.** You only score and report. 3. **No candidates is a valid outcome.** Don't force bad matches. -4. **Respect Reddit rate limits.** 1 second between API calls minimum. -5. **Don't surface threads from Spawn/OpenRouter team members.** - -## Time budget - -Complete within 25 minutes. If still searching at 20 minutes, stop and report what you have. +4. **Don't surface threads from Spawn/OpenRouter team members.** diff --git a/.claude/skills/setup-agent-team/growth.sh b/.claude/skills/setup-agent-team/growth.sh index b37845d2..0d67fbce 100644 --- a/.claude/skills/setup-agent-team/growth.sh +++ b/.claude/skills/setup-agent-team/growth.sh @@ -2,11 +2,9 @@ set -eo pipefail # Reddit Growth Agent — Single Cycle (Discovery Only) -# Triggered by trigger-server.ts via GitHub Actions (daily) -# -# Scans Reddit for "feature ask" threads that Spawn solves, -# qualifies the poster, picks the 1 best candidate, and outputs -# a summary to the log. Does NOT post replies or notify externally. +# Phase 1: Batch-fetch Reddit posts via reddit-fetch.ts (fast, parallel) +# Phase 2: Pass results to Claude for scoring/qualification (no tool use) +# Phase 3: POST candidate to SPA for Slack notification SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" @@ -14,11 +12,11 @@ cd "${REPO_ROOT}" SPAWN_REASON="${SPAWN_REASON:-manual}" TEAM_NAME="spawn-growth" -CYCLE_TIMEOUT=1800 # 30 min -HARD_TIMEOUT=2400 # 40 min grace +HARD_TIMEOUT=300 # 5 min (scoring is fast, no tool use) LOG_FILE="${REPO_ROOT}/.docs/${TEAM_NAME}.log" PROMPT_FILE="" +REDDIT_DATA_FILE="" # Ensure .docs directory exists mkdir -p "$(dirname "${LOG_FILE}")" @@ -27,22 +25,6 @@ log() { echo "[$(date +'%Y-%m-%d %H:%M:%S')] [growth] $*" | tee -a "${LOG_FILE}" } -# --- Safe sed substitution (escapes sed metacharacters in replacement) --- -safe_substitute() { - local placeholder="$1" - local value="$2" - local file="$3" - if printf '%s' "$value" | grep -qP '\x01'; then - log "ERROR: safe_substitute value contains illegal \\x01 character" - return 1 - fi - local escaped - escaped=$(printf '%s' "$value" | sed -e 's/[\\]/\\&/g' -e 's/[&]/\\&/g') - escaped="${escaped//$'\n'/\\$'\n'}" - sed -i.bak "s$(printf '\x01')${placeholder}$(printf '\x01')${escaped}$(printf '\x01')g" "$file" - rm -f "${file}.bak" -} - # Cleanup function cleanup() { if [[ -n "${_cleanup_done:-}" ]]; then return; fi @@ -51,7 +33,7 @@ cleanup() { local exit_code=$? log "Running cleanup (exit_code=${exit_code})..." - rm -f "${PROMPT_FILE:-}" 2>/dev/null || true + rm -f "${PROMPT_FILE:-}" "${REDDIT_DATA_FILE:-}" "${CLAUDE_STREAM_FILE:-}" 2>/dev/null || true if [[ -n "${CLAUDE_PID:-}" ]] && kill -0 "${CLAUDE_PID}" 2>/dev/null; then kill -TERM "${CLAUDE_PID}" 2>/dev/null || true fi @@ -65,19 +47,28 @@ trap cleanup EXIT SIGTERM SIGINT log "=== Starting growth cycle ===" log "Working directory: ${REPO_ROOT}" log "Reason: ${SPAWN_REASON}" -log "Timeout: ${CYCLE_TIMEOUT}s" # Fetch latest refs log "Fetching latest refs..." git fetch --prune origin 2>&1 | tee -a "${LOG_FILE}" || true git reset --hard origin/main 2>&1 | tee -a "${LOG_FILE}" || true -# Update Claude Code to latest version -log "Updating Claude Code..." -claude update --yes 2>&1 | tee -a "${LOG_FILE}" || log "WARNING: Claude Code update failed (continuing with current version)" +# --- Phase 1: Batch fetch Reddit posts --- +log "Phase 1: Fetching Reddit posts..." -# Prepare prompt -log "Launching growth cycle..." +REDDIT_DATA_FILE=$(mktemp /tmp/growth-reddit-XXXXXX.json) +chmod 0600 "${REDDIT_DATA_FILE}" + +if ! bun run "${SCRIPT_DIR}/reddit-fetch.ts" > "${REDDIT_DATA_FILE}" 2>> "${LOG_FILE}"; then + log "ERROR: reddit-fetch.ts failed" + exit 1 +fi + +POST_COUNT=$(bun -e "const d=JSON.parse(await Bun.file('${REDDIT_DATA_FILE}').text()); console.log(d.postsScanned ?? d.posts?.length ?? 0)") +log "Phase 1 done: ${POST_COUNT} posts fetched" + +# --- Phase 2: Score with Claude --- +log "Phase 2: Scoring with Claude..." PROMPT_FILE=$(mktemp /tmp/growth-prompt-XXXXXX.md) chmod 0600 "${PROMPT_FILE}" @@ -88,18 +79,22 @@ if [[ ! -f "$PROMPT_TEMPLATE" ]]; then exit 1 fi -cat "$PROMPT_TEMPLATE" > "${PROMPT_FILE}" - -# Substitute env vars into prompt -safe_substitute "REDDIT_CLIENT_ID_PLACEHOLDER" "${REDDIT_CLIENT_ID:-}" "${PROMPT_FILE}" -safe_substitute "REDDIT_CLIENT_SECRET_PLACEHOLDER" "${REDDIT_CLIENT_SECRET:-}" "${PROMPT_FILE}" -safe_substitute "REDDIT_USERNAME_PLACEHOLDER" "${REDDIT_USERNAME:-}" "${PROMPT_FILE}" -safe_substitute "REDDIT_PASSWORD_PLACEHOLDER" "${REDDIT_PASSWORD:-}" "${PROMPT_FILE}" +# Inject Reddit data into prompt template +REDDIT_JSON=$(cat "${REDDIT_DATA_FILE}") +# Use bun for safe substitution to avoid sed escaping issues with JSON +bun -e " +const template = await Bun.file('${PROMPT_TEMPLATE}').text(); +const data = await Bun.file('${REDDIT_DATA_FILE}').text(); +const result = template.replace('REDDIT_DATA_PLACEHOLDER', data.trim()); +await Bun.write('${PROMPT_FILE}', result); +" log "Hard timeout: ${HARD_TIMEOUT}s" -# Run claude in background -claude -p - --dangerously-skip-permissions --model sonnet < "${PROMPT_FILE}" >> "${LOG_FILE}" 2>&1 & +# Run claude with stream-json to capture text (plain -p stdout is empty with extended thinking) +CLAUDE_STREAM_FILE=$(mktemp /tmp/growth-stream-XXXXXX.jsonl) +CLAUDE_OUTPUT_FILE=$(mktemp /tmp/growth-output-XXXXXX.txt) +claude -p - --model sonnet --output-format stream-json --verbose < "${PROMPT_FILE}" > "${CLAUDE_STREAM_FILE}" 2>> "${LOG_FILE}" & CLAUDE_PID=$! log "Claude started (pid=${CLAUDE_PID})" @@ -119,7 +114,7 @@ kill_claude() { WALL_START=$(date +%s) while kill -0 "${CLAUDE_PID}" 2>/dev/null; do - sleep 30 + sleep 10 WALL_ELAPSED=$(( $(date +%s) - WALL_START )) if [[ "${WALL_ELAPSED}" -ge "${HARD_TIMEOUT}" ]]; then @@ -132,23 +127,43 @@ done wait "${CLAUDE_PID}" 2>/dev/null CLAUDE_EXIT=$? +# Extract text content from stream-json into plain text output file +bun -e " +const lines = (await Bun.file('${CLAUDE_STREAM_FILE}').text()).split('\n').filter(Boolean); +const texts = []; +for (const line of lines) { + try { + const ev = JSON.parse(line); + if (ev.type === 'assistant' && Array.isArray(ev.message?.content)) { + for (const block of ev.message.content) { + if (block.type === 'text' && block.text) texts.push(block.text); + } + } + } catch {} +} +await Bun.write('${CLAUDE_OUTPUT_FILE}', texts.join('\n')); +" 2>> "${LOG_FILE}" || true + +# Append Claude output to log +cat "${CLAUDE_OUTPUT_FILE}" >> "${LOG_FILE}" 2>/dev/null || true + if [[ "${CLAUDE_EXIT}" -eq 0 ]]; then - log "Cycle completed successfully" + log "Phase 2 done: scoring completed" else - log "Cycle failed (exit_code=${CLAUDE_EXIT})" + log "Phase 2 failed (exit_code=${CLAUDE_EXIT})" fi -# --- Extract candidate JSON and POST to SPA --- +# --- Phase 3: Extract candidate and POST to SPA --- CANDIDATE_JSON="" -# Extract the json:candidate block from the log (between ```json:candidate and ```) -if [[ -f "${LOG_FILE}" ]]; then - CANDIDATE_JSON=$(sed -n '/^```json:candidate$/,/^```$/{/^```/d;p;}' "${LOG_FILE}" | tail -1) +# Extract the json:candidate block from Claude's output +if [[ -f "${CLAUDE_OUTPUT_FILE}" ]]; then + CANDIDATE_JSON=$(sed -n '/^```json:candidate$/,/^```$/{/^```/d;p;}' "${CLAUDE_OUTPUT_FILE}" | tail -1) fi if [[ -z "${CANDIDATE_JSON}" ]]; then log "No json:candidate block found in output" - CANDIDATE_JSON='{"found":false}' + CANDIDATE_JSON="{\"found\":false,\"postsScanned\":${POST_COUNT}}" fi log "Candidate JSON: ${CANDIDATE_JSON}" @@ -166,3 +181,5 @@ if [[ -n "${SPA_TRIGGER_URL:-}" && -n "${SPA_TRIGGER_SECRET:-}" ]]; then else log "SPA_TRIGGER_URL or SPA_TRIGGER_SECRET not set, skipping Slack notification" fi + +rm -f "${CLAUDE_OUTPUT_FILE}" "${CLAUDE_STREAM_FILE}" 2>/dev/null || true diff --git a/.claude/skills/setup-agent-team/reddit-fetch.ts b/.claude/skills/setup-agent-team/reddit-fetch.ts new file mode 100644 index 00000000..ee55e155 --- /dev/null +++ b/.claude/skills/setup-agent-team/reddit-fetch.ts @@ -0,0 +1,259 @@ +/** + * Reddit Fetch — Batch scanner for the growth agent. + * + * Authenticates with Reddit, fires all subreddit×query searches concurrently, + * deduplicates, pre-fetches poster comment histories, and outputs JSON to stdout. + * + * Env vars: REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USERNAME, REDDIT_PASSWORD + */ + +const CLIENT_ID = process.env.REDDIT_CLIENT_ID ?? ""; +const CLIENT_SECRET = process.env.REDDIT_CLIENT_SECRET ?? ""; +const USERNAME = process.env.REDDIT_USERNAME ?? ""; +const PASSWORD = process.env.REDDIT_PASSWORD ?? ""; +const USER_AGENT = `spawn-growth:v1.0.0 (by /u/${USERNAME})`; + +if (!CLIENT_ID || !CLIENT_SECRET || !USERNAME || !PASSWORD) { + console.error("Missing Reddit credentials"); + process.exit(1); +} + +const SUBREDDITS = [ + "Vibecoding", + "AIAgents", + "LocalLLaMA", + "ChatGPT", + "SelfHosted", + "programming", + "commandline", + "devops", +]; + +const QUERIES = [ + "coding agent cloud", + "coding agent server", + "self host AI coding", + "remote dev AI", + "vibe coding setup", + "deploy coding agent", + "cloud dev environment AI", +]; + +const MAX_CONCURRENT = 5; + +interface RedditPost { + title: string; + permalink: string; + subreddit: string; + postId: string; + score: number; + numComments: number; + createdUtc: number; + selftext: string; + authorName: string; + authorComments: string[]; +} + +/** Simple concurrency limiter. */ +async function pooled(tasks: Array<() => Promise>, limit: number): Promise { + const results: T[] = []; + let idx = 0; + + async function worker(): Promise { + while (idx < tasks.length) { + const i = idx++; + results[i] = await tasks[i](); + } + } + + await Promise.all( + Array.from( + { + length: Math.min(limit, tasks.length), + }, + () => worker(), + ), + ); + return results; +} + +/** Authenticate and get bearer token. */ +async function getToken(): Promise { + const auth = Buffer.from(`${CLIENT_ID}:${CLIENT_SECRET}`).toString("base64"); + const res = await fetch("https://www.reddit.com/api/v1/access_token", { + method: "POST", + headers: { + Authorization: `Basic ${auth}`, + "Content-Type": "application/x-www-form-urlencoded", + "User-Agent": USER_AGENT, + }, + body: `grant_type=password&username=${encodeURIComponent(USERNAME)}&password=${encodeURIComponent(PASSWORD)}`, + }); + const data = (await res.json()) as Record; + const token = typeof data.access_token === "string" ? data.access_token : ""; + if (!token) { + console.error("Reddit auth failed:", JSON.stringify(data)); + process.exit(1); + } + return token; +} + +/** Fetch a Reddit API endpoint with auth. */ +async function redditGet(token: string, path: string): Promise { + const res = await fetch(`https://oauth.reddit.com${path}`, { + headers: { + Authorization: `Bearer ${token}`, + "User-Agent": USER_AGENT, + }, + }); + if (!res.ok) { + console.error(`Reddit API ${res.status}: ${path}`); + return null; + } + return res.json(); +} + +/** Extract posts from a Reddit listing response. */ +function extractPosts(data: unknown): Map { + const posts = new Map(); + if (!data || typeof data !== "object") return posts; + const listing = data as Record; + const listingData = listing.data as Record | undefined; + const children = listingData?.children; + if (!Array.isArray(children)) return posts; + + for (const child of children) { + const c = child as Record; + const d = c.data as Record | undefined; + if (!d) continue; + const id = String(d.name ?? ""); + if (!id || posts.has(id)) continue; + + posts.set(id, { + title: String(d.title ?? ""), + permalink: String(d.permalink ?? ""), + subreddit: String(d.subreddit ?? ""), + postId: id, + score: Number(d.score ?? 0), + numComments: Number(d.num_comments ?? 0), + createdUtc: Number(d.created_utc ?? 0), + selftext: String(d.selftext ?? "").slice(0, 2000), + authorName: String(d.author ?? ""), + authorComments: [], + }); + } + return posts; +} + +/** Fetch a user's recent comments. */ +async function fetchUserComments(token: string, username: string): Promise { + if (!username || username === "[deleted]") return []; + const data = await redditGet(token, `/user/${username}/comments?limit=25&sort=new`); + if (!data || typeof data !== "object") return []; + const listing = data as Record; + const listingData = listing.data as Record | undefined; + const children = listingData?.children; + if (!Array.isArray(children)) return []; + + return children + .map((child) => { + const c = child as Record; + const d = c.data as Record | undefined; + const body = String(d?.body ?? "").slice(0, 500); + const sub = String(d?.subreddit ?? ""); + return sub ? `[r/${sub}] ${body}` : body; + }) + .filter(Boolean); +} + +async function main(): Promise { + const token = await getToken(); + console.error("[reddit-fetch] Authenticated"); + + // Build all search tasks + const searchTasks: Array<() => Promise>> = []; + + for (const sub of SUBREDDITS) { + for (const query of QUERIES) { + const q = encodeURIComponent(query); + searchTasks.push(async () => { + const data = await redditGet(token, `/r/${sub}/search?q=${q}&sort=new&t=week&restrict_sr=true&limit=25`); + return extractPosts(data); + }); + } + } + + // Direct mention search + searchTasks.push(async () => { + const data = await redditGet(token, "/search?q=openrouter+spawn&sort=new&t=week&limit=25"); + return extractPosts(data); + }); + + console.error(`[reddit-fetch] Firing ${searchTasks.length} searches (concurrency=${MAX_CONCURRENT})...`); + + const allResults = await pooled(searchTasks, MAX_CONCURRENT); + + // Merge and deduplicate + const allPosts = new Map(); + for (const resultMap of allResults) { + for (const [id, post] of resultMap) { + if (!allPosts.has(id)) { + allPosts.set(id, post); + } + } + } + + console.error(`[reddit-fetch] Found ${allPosts.size} unique posts`); + + // Pre-fetch poster comments for posts with some engagement + const postsArray = [ + ...allPosts.values(), + ]; + const worthQualifying = postsArray.filter((p) => p.score >= 2 || p.numComments >= 2); + const uniqueAuthors = [ + ...new Set(worthQualifying.map((p) => p.authorName)), + ]; + + console.error(`[reddit-fetch] Fetching comments for ${uniqueAuthors.length} authors...`); + + const commentMap = new Map(); + const commentTasks = uniqueAuthors.map((author) => async () => { + const comments = await fetchUserComments(token, author); + commentMap.set(author, comments); + }); + await pooled(commentTasks, MAX_CONCURRENT); + + // Attach comments to posts + for (const post of postsArray) { + post.authorComments = commentMap.get(post.authorName) ?? []; + } + + // Filter to posts with some engagement, sort by score descending + const filtered = postsArray.filter((p) => p.score >= 2 || p.numComments >= 2); + filtered.sort((a, b) => b.score - a.score); + + // Output JSON to stdout (trimmed to keep prompt size reasonable) + const output = { + posts: filtered.map((p) => ({ + title: p.title, + permalink: p.permalink, + subreddit: p.subreddit, + postId: p.postId, + score: p.score, + numComments: p.numComments, + createdUtc: p.createdUtc, + selftext: p.selftext.slice(0, 500), + authorName: p.authorName, + authorComments: p.authorComments.slice(0, 5).map((c) => c.slice(0, 200)), + })), + postsScanned: allPosts.size, + }; + + console.log(JSON.stringify(output)); + console.error(`[reddit-fetch] Done — ${postsArray.length} posts output`); +} + +main().catch((err) => { + console.error("Fatal:", err); + process.exit(1); +});