qwen-code/scripts/benchmark-api-latency.mjs

#!/usr/bin/env node
/**
 * API Preconnect Latency Benchmark
 *
 * Measures the real TCP+TLS connection reuse benefit of preconnect by using
 * undici (the same library as apiPreconnect.ts) within a single process.
 *
 * Unlike the previous curl-based approach, this correctly measures connection
 * pool reuse: the same dispatcher instance is shared between the preconnect
 * HEAD request and the subsequent measured request, just like in production.
 *
 * Usage:
 *   node scripts/benchmark-api-latency.mjs
 *
 * Environment variables:
 *   ITERATIONS=3              Number of cold/warm pairs per endpoint (default: 3)
 *   REQUEST_TIMEOUT_MS=5000   Per-request timeout in ms (default: 5000)
 *   BENCHMARK_URLS            Space-separated extra URLs to benchmark
 */

import { createRequire } from 'module';
import { performance } from 'perf_hooks';

// Resolve undici from the core package (same version used by preconnect)
const require = createRequire(import.meta.url);
const { Agent } = require('../packages/core/node_modules/undici/index.js');

const ITERATIONS = parseInt(process.env['ITERATIONS'] ?? '3', 10);
const REQUEST_TIMEOUT_MS = parseInt(process.env['REQUEST_TIMEOUT_MS'] ?? '5000', 10);

const DEFAULT_ENDPOINTS = [
  { url: 'https://api.openai.com',                              label: 'OpenAI' },
  { url: 'https://api.anthropic.com',                          label: 'Anthropic' },
  { url: 'https://dashscope.aliyuncs.com/compatible-mode/v1', label: 'DashScope (openai-compatible)' },
];

const extraUrls = process.env['BENCHMARK_URLS']
  ? process.env['BENCHMARK_URLS'].split(' ').filter(Boolean).map((url) => ({ url, label: url }))
  : [];

const ENDPOINTS = [...DEFAULT_ENDPOINTS, ...extraUrls];

// ---------------------------------------------------------------------------

function newDispatcher() {
  return new Agent({
    headersTimeout: 0,
    bodyTimeout: 0,
    keepAliveTimeout: 60_000,
  });
}

async function fetchOnce(url, dispatcher, method = 'HEAD') {
  const start = performance.now();
  try {
    await fetch(url, {
      method,
      signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS),
      headers: { 'User-Agent': 'QwenCode-Benchmark/1.0' },
      dispatcher,
    });
  } catch (err) {
    // Timeouts and non-2xx are fine — we only care about connection timing
    if (err?.name === 'TimeoutError') {
      return performance.now() - start; // still records the time spent
    }
  }
  return performance.now() - start;
}

/**
 * Cold measurement: brand-new dispatcher, no preconnect.
 * Returns elapsed ms of the measured request.
 */
async function measureCold(url) {
  const dispatcher = newDispatcher();
  return fetchOnce(url, dispatcher, 'HEAD');
}

/**
 * Warm measurement: same dispatcher for preconnect HEAD + measured request.
 * Returns elapsed ms of the measured request only (not the preconnect time).
 */
async function measureWarm(url) {
  const dispatcher = newDispatcher();
  // Preconnect — mirrors apiPreconnect.ts behaviour
  await fetchOnce(url, dispatcher, 'HEAD').catch(() => {});
  // Measured request reuses the warmed connection from the same pool
  return fetchOnce(url, dispatcher, 'HEAD');
}

// ---------------------------------------------------------------------------

function fmt(ms) {
  return `${ms.toFixed(1)}ms`;
}

function avg(arr) {
  return arr.reduce((a, b) => a + b, 0) / arr.length;
}

async function benchmarkEndpoint({ url, label }) {
  console.log(`\n  ${label}`);
  console.log(`  ${url}`);

  const coldTimes = [];
  const warmTimes = [];

  for (let i = 0; i < ITERATIONS; i++) {
    const cold = await measureCold(url);
    coldTimes.push(cold);

    // Brief pause so the OS can release the cold connection
    await new Promise((r) => setTimeout(r, 500));

    const warm = await measureWarm(url);
    warmTimes.push(warm);

    console.log(`    run ${i + 1}: cold=${fmt(cold)}  warm=${fmt(warm)}`);

    await new Promise((r) => setTimeout(r, 500));
  }

  const avgCold = avg(coldTimes);
  const avgWarm = avg(warmTimes);
  const saved = avgCold - avgWarm;
  const pct = avgCold > 0 ? (saved / avgCold) * 100 : 0;

  return { label, url, avgCold, avgWarm, saved, pct };
}

// ---------------------------------------------------------------------------

console.log('=== Qwen Code API Preconnect Latency Benchmark ===');
console.log(`Iterations per endpoint : ${ITERATIONS}`);
console.log(`Request timeout         : ${REQUEST_TIMEOUT_MS}ms`);
console.log('\nRunning...');

const results = [];
for (const endpoint of ENDPOINTS) {
  const result = await benchmarkEndpoint(endpoint);
  results.push(result);
}

// Summary table
console.log('\n\n=== Results ===\n');
console.log(
  'Endpoint'.padEnd(36) +
  'Cold (avg)'.padStart(12) +
  'Warm (avg)'.padStart(12) +
  'Saved'.padStart(10) +
  'Improvement'.padStart(13),
);
console.log('─'.repeat(83));

for (const r of results) {
  const status = r.pct >= 30 ? '✓' : r.pct >= 10 ? '~' : '✗';
  console.log(
    r.label.slice(0, 35).padEnd(36) +
    fmt(r.avgCold).padStart(12) +
    fmt(r.avgWarm).padStart(12) +
    fmt(r.saved).padStart(10) +
    `${r.pct.toFixed(1)}% ${status}`.padStart(13),
  );
}

console.log('\nLegend: ✓ ≥30% improvement   ~ 10–30%   ✗ <10%');