diff --git a/packages/cli/package.json b/packages/cli/package.json index ede748d6..38f81cd1 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,6 +1,6 @@ { "name": "@openrouter/spawn", - "version": "1.0.22", + "version": "1.0.23", "type": "module", "bin": { "spawn": "cli.js" diff --git a/packages/cli/src/__tests__/feature-flags.test.ts b/packages/cli/src/__tests__/feature-flags.test.ts new file mode 100644 index 00000000..36890682 --- /dev/null +++ b/packages/cli/src/__tests__/feature-flags.test.ts @@ -0,0 +1,228 @@ +// Unit tests for shared/feature-flags.ts — fetch, cache, exposure events. + +import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"; +import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { + _awaitBackgroundRefreshForTest, + _resetFeatureFlagsForTest, + getFeatureFlag, + initFeatureFlags, +} from "../shared/feature-flags.js"; +import { _resetInstallIdCache } from "../shared/install-id.js"; +import { getSpawnDir } from "../shared/paths.js"; + +const cachePath = (): string => join(getSpawnDir(), "feature-flags-cache.json"); + +function writeCache(flags: Record, ageMs = 0): void { + const path = cachePath(); + if (!existsSync(dirname(path))) { + mkdirSync(dirname(path), { + recursive: true, + }); + } + writeFileSync( + path, + JSON.stringify({ + fetchedAt: Date.now() - ageMs, + flags, + }), + ); +} + +describe("feature flags", () => { + const originalFetch = global.fetch; + const originalSpawnHome = process.env.SPAWN_HOME; + const originalDisabled = process.env.SPAWN_FEATURE_FLAGS_DISABLED; + let testHome: string; + + beforeEach(() => { + // Pin SPAWN_HOME to a fresh dir under the sandboxed HOME — other tests in + // the suite mutate it and don't always restore. We need a known-empty dir + // for the cache tests. SPAWN_HOME is required to live inside HOME so we + // mkdtemp inside the preload-provided test HOME, not the system tmp. + testHome = mkdtempSync(join(process.env.HOME ?? "", "spawn-ff-test-")); + process.env.SPAWN_HOME = testHome; + _resetFeatureFlagsForTest(); + _resetInstallIdCache(); + delete process.env.SPAWN_FEATURE_FLAGS_DISABLED; + }); + + afterEach(() => { + global.fetch = originalFetch; + if (originalSpawnHome === undefined) { + delete process.env.SPAWN_HOME; + } else { + process.env.SPAWN_HOME = originalSpawnHome; + } + if (originalDisabled === undefined) { + delete process.env.SPAWN_FEATURE_FLAGS_DISABLED; + } else { + process.env.SPAWN_FEATURE_FLAGS_DISABLED = originalDisabled; + } + rmSync(testHome, { + recursive: true, + force: true, + }); + }); + + describe("initFeatureFlags", () => { + it("populates flags from a successful /decide response", async () => { + global.fetch = mock(() => + Promise.resolve( + new Response( + JSON.stringify({ + featureFlags: { + fast_provision: "test", + other: true, + }, + }), + ), + ), + ); + await initFeatureFlags(); + expect(getFeatureFlag("fast_provision", "control")).toBe("test"); + expect(getFeatureFlag("other", false)).toBe(true); + }); + + it("falls open on a network error — getFeatureFlag returns the fallback", async () => { + global.fetch = mock(() => Promise.reject(new Error("network down"))); + await initFeatureFlags(); + expect(getFeatureFlag("fast_provision", "control")).toBe("control"); + }); + + it("falls open on HTTP 500", async () => { + global.fetch = mock(() => + Promise.resolve( + new Response("oops", { + status: 500, + }), + ), + ); + await initFeatureFlags(); + expect(getFeatureFlag("fast_provision", "control")).toBe("control"); + }); + + it("falls open on malformed JSON", async () => { + global.fetch = mock(() => Promise.resolve(new Response("not json"))); + await initFeatureFlags(); + expect(getFeatureFlag("fast_provision", "control")).toBe("control"); + }); + + it("serves stale cache (>1h old) immediately and refreshes in the background", async () => { + writeCache( + { + fast_provision: "stale", + }, + 2 * 60 * 60 * 1000, + ); + global.fetch = mock(() => + Promise.resolve( + new Response( + JSON.stringify({ + featureFlags: { + fast_provision: "fresh", + }, + }), + ), + ), + ); + await initFeatureFlags(); + // Stale value is served immediately — this is the point of SWR. + expect(getFeatureFlag("fast_provision", "control")).toBe("stale"); + // After the background refresh settles, the fresh value takes over. + await _awaitBackgroundRefreshForTest(); + _resetFeatureFlagsForTest(); + await initFeatureFlags(); + expect(getFeatureFlag("fast_provision", "control")).toBe("fresh"); + }); + + it("does NOT fetch when cache is fresh (<1h old)", async () => { + writeCache( + { + fast_provision: "cached", + }, + 5 * 60 * 1000, + ); + let fetched = false; + global.fetch = mock(() => { + fetched = true; + return Promise.resolve(new Response("{}")); + }); + await initFeatureFlags(); + expect(fetched).toBe(false); + expect(getFeatureFlag("fast_provision", "control")).toBe("cached"); + }); + + it("writes the response to the cache file", async () => { + global.fetch = mock(() => + Promise.resolve( + new Response( + JSON.stringify({ + featureFlags: { + fast_provision: "test", + }, + }), + ), + ), + ); + await initFeatureFlags(); + expect(existsSync(cachePath())).toBe(true); + }); + + it("short-circuits when SPAWN_FEATURE_FLAGS_DISABLED=1 is set", async () => { + process.env.SPAWN_FEATURE_FLAGS_DISABLED = "1"; + let fetched = false; + global.fetch = mock(() => { + fetched = true; + return Promise.resolve(new Response("{}")); + }); + await initFeatureFlags(); + expect(fetched).toBe(false); + expect(getFeatureFlag("fast_provision", "control")).toBe("control"); + }); + + it("is idempotent — second call does not refetch", async () => { + let count = 0; + global.fetch = mock(() => { + count++; + return Promise.resolve( + new Response( + JSON.stringify({ + featureFlags: { + fast_provision: "test", + }, + }), + ), + ); + }); + await initFeatureFlags(); + await initFeatureFlags(); + expect(count).toBe(1); + }); + }); + + describe("getFeatureFlag", () => { + it("returns fallback when flags were never initialized", () => { + expect(getFeatureFlag("missing", "default")).toBe("default"); + expect(getFeatureFlag("missing-bool", false)).toBe(false); + }); + + it("returns fallback for unknown keys when flags are loaded", async () => { + global.fetch = mock(() => + Promise.resolve( + new Response( + JSON.stringify({ + featureFlags: { + known: "yes", + }, + }), + ), + ), + ); + await initFeatureFlags(); + expect(getFeatureFlag("known", "default")).toBe("yes"); + expect(getFeatureFlag("unknown", "default")).toBe("default"); + }); + }); +}); diff --git a/packages/cli/src/__tests__/install-id.test.ts b/packages/cli/src/__tests__/install-id.test.ts new file mode 100644 index 00000000..ff8df4a9 --- /dev/null +++ b/packages/cli/src/__tests__/install-id.test.ts @@ -0,0 +1,48 @@ +// Unit tests for shared/install-id.ts — persistent UUID generation and read. + +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { existsSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { _resetInstallIdCache, getInstallId } from "../shared/install-id.js"; +import { getInstallIdPath } from "../shared/paths.js"; + +describe("getInstallId", () => { + beforeEach(() => { + _resetInstallIdCache(); + const path = getInstallIdPath(); + if (existsSync(path)) { + rmSync(path); + } + }); + + afterEach(() => { + _resetInstallIdCache(); + }); + + it("creates a UUID on first call and persists it", () => { + const id = getInstallId(); + expect(id).toMatch(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/); + expect(existsSync(getInstallIdPath())).toBe(true); + expect(readFileSync(getInstallIdPath(), "utf8").trim()).toBe(id); + }); + + it("returns the same value on subsequent calls (in-memory cache)", () => { + const a = getInstallId(); + const b = getInstallId(); + expect(a).toBe(b); + }); + + it("reads from disk on a fresh module state", () => { + const a = getInstallId(); + _resetInstallIdCache(); + const b = getInstallId(); + expect(a).toBe(b); + }); + + it("regenerates if the persisted file is malformed", () => { + writeFileSync(getInstallIdPath(), "not-a-uuid"); + _resetInstallIdCache(); + const id = getInstallId(); + expect(id).toMatch(/^[0-9a-f-]{36}$/); + expect(id).not.toBe("not-a-uuid"); + }); +}); diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index a5e2a443..9db9f46d 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -39,6 +39,7 @@ import { } from "./commands/index.js"; import { expandEqualsFlags, findUnknownFlag } from "./flags.js"; import { agentKeys, cloudKeys, getCacheAge, loadManifest } from "./manifest.js"; +import { getFeatureFlag, initFeatureFlags } from "./shared/feature-flags.js"; import { getInstallRefPath } from "./shared/paths.js"; import { asyncTryCatch, asyncTryCatchIf, isFileError, isNetworkError, tryCatch, tryCatchIf } from "./shared/result.js"; import { captureError, initTelemetry, setTelemetryContext } from "./shared/telemetry.js"; @@ -848,6 +849,8 @@ async function main(): Promise { // ── `spawn pick` — bypass all flag parsing; used by bash scripts ────────── // Must be handled before expandEqualsFlags / resolvePrompt so that pick's // own --prompt flag is not mistakenly consumed by the top-level prompt logic. + // Runs before initFeatureFlags() — this is a hot path called by shell + // scripts and must stay fast; it has no code paths that gate on a flag. if (rawArgs[0] === "pick") { const pickResult = await asyncTryCatch(() => cmdPick(expandEqualsFlags(rawArgs.slice(1)))); if (!pickResult.ok) { @@ -857,11 +860,18 @@ async function main(): Promise { } // ── `spawn feedback` — bypass flag parsing; rest of args are the message ─── + // Also runs before initFeatureFlags() for the same reason as `pick`. if (rawArgs[0] === "feedback") { await cmdFeedback(rawArgs.slice(1)); return; } + // Fetch feature flags (1.5s timeout, fail-open). Must run before any code + // path that gates on a flag — currently the SPAWN_BETA composition for the + // `fast_provision` experiment. Placed AFTER the pick/feedback bypasses so + // those fast paths never pay the flag-fetch cost. + await initFeatureFlags(); + const args = expandEqualsFlags(rawArgs); // Pre-scan for --output json before checkForUpdates() so install script @@ -927,6 +937,7 @@ async function main(): Promise { "skills", ]); const betaFeatures = extractAllFlagValues(filteredArgs, "--beta", "spawn --beta parallel"); + const userOptedIntoBeta = betaFeatures.length > 0 || process.env.SPAWN_FAST === "1"; for (const flag of betaFeatures) { if (!VALID_BETA_FEATURES.has(flag)) { console.error(pc.red(`Unknown beta feature: ${pc.bold(flag)}`)); @@ -945,6 +956,18 @@ async function main(): Promise { if (process.env.SPAWN_FAST === "1") { betaFeatures.push("tarball", "images", "parallel", "docker"); } + + // fast_provision experiment: if the user did NOT pass --beta or --fast, + // bucket them on the PostHog `fast_provision` flag. The `test` variant + // turns on tarball + images by default; control behaves as before. + // Exposure is captured for both variants so PostHog can compute conversion. + if (!userOptedIntoBeta) { + const variant = getFeatureFlag("fast_provision", "control"); + if (variant === "test") { + betaFeatures.push("tarball", "images"); + } + } + if (betaFeatures.length > 0) { process.env.SPAWN_BETA = [ ...new Set(betaFeatures), diff --git a/packages/cli/src/shared/feature-flags.ts b/packages/cli/src/shared/feature-flags.ts new file mode 100644 index 00000000..48435743 --- /dev/null +++ b/packages/cli/src/shared/feature-flags.ts @@ -0,0 +1,216 @@ +// shared/feature-flags.ts — PostHog feature-flag evaluation for the CLI. +// +// We do NOT use the PostHog Node SDK; we hand-roll a single POST to /decide, +// same project as telemetry.ts. Bucketing key is the install ID (stable per +// machine), not the per-run session UUID. +// +// Behavior: +// - 1.5s timeout, fail-open (variants treated as missing — control wins) +// - On-disk cache at $SPAWN_HOME/feature-flags-cache.json with 1h TTL +// - Stale-while-revalidate: +// • fresh cache (; +type CacheEntry = { + flags: FlagMap; + fetchedAt: number; +}; + +let _flags: FlagMap | null = null; +let _initialized = false; +let _backgroundRefresh: Promise | null = null; +const _exposed = new Set(); + +function getCachePath(): string { + return join(getSpawnDir(), "feature-flags-cache.json"); +} + +function isDisabled(): boolean { + return process.env.SPAWN_FEATURE_FLAGS_DISABLED === "1"; +} + +/** Read the cache file. Returns the entry (including fetchedAt) or null if + * the file is missing/corrupt. Does NOT filter by TTL — callers decide + * whether the entry is fresh enough. */ +function readCache(): CacheEntry | null { + const readResult = tryCatch(() => readFileSync(getCachePath(), "utf8")); + if (!readResult.ok) { + return null; + } + const parsed = parseJsonWith(readResult.data, CacheFileSchema); + if (!parsed) { + return null; + } + return { + flags: parsed.flags, + fetchedAt: parsed.fetchedAt, + }; +} + +function isFresh(entry: CacheEntry): boolean { + return Date.now() - entry.fetchedAt <= CACHE_TTL_MS; +} + +function writeCache(flags: FlagMap): void { + const path = getCachePath(); + const payload = JSON.stringify({ + fetchedAt: Date.now(), + flags, + }); + tryCatch(() => { + const dir = dirname(path); + if (!existsSync(dir)) { + mkdirSync(dir, { + recursive: true, + }); + } + writeFileSync(path, payload, { + mode: 0o600, + }); + }); +} + +async function fetchFlags(): Promise { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); + const result = await asyncTryCatch(async () => { + const res = await fetch(DECIDE_URL, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + api_key: POSTHOG_TOKEN, + distinct_id: getInstallId(), + }), + signal: controller.signal, + }); + if (!res.ok) { + return null; + } + return await res.text(); + }); + clearTimeout(timer); + if (!result.ok || !result.data) { + return null; + } + const parsed = parseJsonWith(result.data, DecideResponseSchema); + if (!parsed) { + return null; + } + return parsed.featureFlags ?? {}; +} + +/** Background refresh: fetch, write cache, swallow errors. Fire-and-forget + * by callers, but exported promise lets tests await completion. */ +function startBackgroundRefresh(): Promise { + return fetchFlags().then((fresh) => { + if (fresh) { + _flags = fresh; + writeCache(fresh); + } + }); +} + +/** + * Initialize feature flags. Implements stale-while-revalidate against the + * on-disk cache: + * - fresh cache ( { + if (_initialized || isDisabled()) { + _initialized = true; + return; + } + _initialized = true; + + const cached = readCache(); + if (cached) { + // Use the cached value immediately so this call is ~instant. + _flags = cached.flags; + if (!isFresh(cached)) { + // Stale — refresh in the background. The refresh runs fire-and-forget; + // if the process exits before it completes, the next run will refresh. + _backgroundRefresh = startBackgroundRefresh(); + } + return; + } + + // No cache at all — await a sync fetch so the first run still gets a + // variant. Bounded by FETCH_TIMEOUT_MS; fail-open on timeout/error. + const fresh = await fetchFlags(); + if (fresh) { + _flags = fresh; + writeCache(fresh); + } +} + +/** + * Look up a feature flag variant. Returns `fallback` if flags weren't fetched + * (timeout, disabled, network error) or the key is unknown. + * + * Captures a $feature_flag_called event the first time each key is read in + * this process — required for PostHog to attribute conversions to the variant. + */ +export function getFeatureFlag(key: string, fallback: T): string | boolean { + const value = _flags && key in _flags ? _flags[key] : fallback; + if (!_exposed.has(key) && !isDisabled()) { + _exposed.add(key); + captureEvent("$feature_flag_called", { + $feature_flag: key, + $feature_flag_response: value, + }); + } + return value; +} + +/** Test-only: reset module state between tests. */ +export function _resetFeatureFlagsForTest(): void { + _flags = null; + _initialized = false; + _backgroundRefresh = null; + _exposed.clear(); +} + +/** Test-only: await the in-flight background refresh (if any). Returns + * immediately when there is no refresh pending. */ +export function _awaitBackgroundRefreshForTest(): Promise { + return _backgroundRefresh ?? Promise.resolve(); +} diff --git a/packages/cli/src/shared/install-id.ts b/packages/cli/src/shared/install-id.ts new file mode 100644 index 00000000..62596b11 --- /dev/null +++ b/packages/cli/src/shared/install-id.ts @@ -0,0 +1,61 @@ +// shared/install-id.ts — Stable per-machine identifier for PostHog bucketing. +// +// Generated lazily on first call and persisted to $SPAWN_HOME/install-id. +// Used as the PostHog `distinct_id` for telemetry events and feature-flag +// evaluation, so the same machine reliably gets the same flag variant across +// runs (per-run session UUIDs would re-bucket every invocation). + +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { dirname } from "node:path"; +import { getInstallIdPath } from "./paths.js"; +import { tryCatch } from "./result.js"; + +const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/; + +let _cached: string | null = null; + +/** + * Return the persistent install ID, creating it on first call. + * Falls back to an ephemeral UUID if the disk write fails (read-only home, + * permission errors). Never throws. + */ +export function getInstallId(): string { + if (_cached) { + return _cached; + } + const path = getInstallIdPath(); + + // Try to read existing + const readResult = tryCatch(() => readFileSync(path, "utf8").trim()); + if (readResult.ok && UUID_RE.test(readResult.data)) { + _cached = readResult.data; + return _cached; + } + + // Generate and persist + const id = crypto.randomUUID(); + const writeResult = tryCatch(() => { + const dir = dirname(path); + if (!existsSync(dir)) { + mkdirSync(dir, { + recursive: true, + }); + } + writeFileSync(path, id, { + mode: 0o600, + }); + }); + if (!writeResult.ok) { + // Disk-write failure: still return a UUID so flag evaluation works for + // this run. The user gets re-bucketed next time, but no breakage. + _cached = id; + return _cached; + } + _cached = id; + return _cached; +} + +/** Test-only: reset the in-memory cache so a fresh getInstallId() reads disk. */ +export function _resetInstallIdCache(): void { + _cached = null; +} diff --git a/packages/cli/src/shared/paths.ts b/packages/cli/src/shared/paths.ts index 52d1781e..e2a511c5 100644 --- a/packages/cli/src/shared/paths.ts +++ b/packages/cli/src/shared/paths.ts @@ -63,6 +63,16 @@ export function getInstallRefPath(): string { return join(getUserHome(), ".config", "spawn", ".ref"); } +/** + * Return the path to the persistent install ID file. + * Stable per machine across `spawn` invocations — used as PostHog `distinct_id` + * for telemetry events and feature-flag bucketing. Path matches the legacy + * telemetry-id location so existing users keep their identity. + */ +export function getInstallIdPath(): string { + return join(getUserHome(), ".config", "spawn", ".telemetry-id"); +} + /** Return the cache directory for spawn, respecting XDG_CACHE_HOME. */ export function getCacheDir(): string { return join(process.env.XDG_CACHE_HOME || join(getUserHome(), ".cache"), "spawn"); diff --git a/packages/cli/src/shared/telemetry.ts b/packages/cli/src/shared/telemetry.ts index 72cd5791..f9d45dde 100644 --- a/packages/cli/src/shared/telemetry.ts +++ b/packages/cli/src/shared/telemetry.ts @@ -4,10 +4,9 @@ // Never sends command args, file paths, or user prompt content. // Events are sent immediately — no batching, no lost events on process.exit(). -import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; -import { dirname, join } from "node:path"; import { isString } from "@openrouter/spawn-shared"; -import { asyncTryCatch, tryCatch } from "./result.js"; +import { getInstallId } from "./install-id.js"; +import { asyncTryCatch } from "./result.js"; // Same PostHog project as feedback.ts const POSTHOG_TOKEN = "phc_7ToS2jDeWBlMu4n2JoNzoA1FnArdKwFMFoHVnAqQ6O1"; @@ -117,37 +116,8 @@ let _userId = ""; let _sessionId = ""; let _context: Record = {}; -// ── Persistent User ID ───────────────────────────────────────────────────── - -function getTelemetryIdPath(): string { - return join(process.env.HOME ?? "/tmp", ".config", "spawn", ".telemetry-id"); -} - -function loadOrCreateUserId(): string { - const idPath = getTelemetryIdPath(); - const loadResult = tryCatch(() => { - if (existsSync(idPath)) { - const id = readFileSync(idPath, "utf-8").trim(); - if (id.length > 0) { - return id; - } - } - return null; - }); - if (loadResult.ok && loadResult.data) { - return loadResult.data; - } - const id = crypto.randomUUID(); - tryCatch(() => { - mkdirSync(dirname(idPath), { - recursive: true, - }); - writeFileSync(idPath, id, { - mode: 0o600, - }); - }); - return id; -} +// Persistent user ID is provided by shared/install-id.ts so feature flags and +// telemetry share the same PostHog identity. // ── Public API ────────────────────────────────────────────────────────────── @@ -163,8 +133,8 @@ export function initTelemetry(version: string): void { return; } - // Persistent user ID — same across all runs - _userId = loadOrCreateUserId(); + // Persistent user ID — same across all runs (shared with feature flags) + _userId = getInstallId(); // Session ID — shared between parent and child processes within one spawn run _sessionId = process.env.SPAWN_TELEMETRY_SESSION || crypto.randomUUID();