mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-02 05:31:02 +00:00
Fire a fire-and-forget HEAD request early in startup to warm the TCP+TLS connection. Subsequent SDK calls share an undici dispatcher with preconnect, reusing the warmed connection to save 100-200ms on the first request. Skip conditions: - NODE_EXTRA_CA_CERTS set (enterprise TLS inspection) - Sandbox mode (process-restart context) - Non-default baseUrl (mTLS / private deployment) - Non-Node runtimes (Bun) Disable via QWEN_CODE_DISABLE_PRECONNECT=1. Closes #3223
167 lines
5.1 KiB
JavaScript
167 lines
5.1 KiB
JavaScript
#!/usr/bin/env node
|
||
/**
|
||
* API Preconnect Latency Benchmark
|
||
*
|
||
* Measures the real TCP+TLS connection reuse benefit of preconnect by using
|
||
* undici (the same library as apiPreconnect.ts) within a single process.
|
||
*
|
||
* Unlike the previous curl-based approach, this correctly measures connection
|
||
* pool reuse: the same dispatcher instance is shared between the preconnect
|
||
* HEAD request and the subsequent measured request, just like in production.
|
||
*
|
||
* Usage:
|
||
* node scripts/benchmark-api-latency.mjs
|
||
*
|
||
* Environment variables:
|
||
* ITERATIONS=3 Number of cold/warm pairs per endpoint (default: 3)
|
||
* REQUEST_TIMEOUT_MS=5000 Per-request timeout in ms (default: 5000)
|
||
* BENCHMARK_URLS Space-separated extra URLs to benchmark
|
||
*/
|
||
|
||
import { createRequire } from 'module';
|
||
import { performance } from 'perf_hooks';
|
||
|
||
// Resolve undici from the core package (same version used by preconnect)
|
||
const require = createRequire(import.meta.url);
|
||
const { Agent } = require('../packages/core/node_modules/undici/index.js');
|
||
|
||
const ITERATIONS = parseInt(process.env['ITERATIONS'] ?? '3', 10);
|
||
const REQUEST_TIMEOUT_MS = parseInt(process.env['REQUEST_TIMEOUT_MS'] ?? '5000', 10);
|
||
|
||
const DEFAULT_ENDPOINTS = [
|
||
{ url: 'https://api.openai.com', label: 'OpenAI' },
|
||
{ url: 'https://api.anthropic.com', label: 'Anthropic' },
|
||
{ url: 'https://dashscope.aliyuncs.com/compatible-mode/v1', label: 'DashScope (openai-compatible)' },
|
||
];
|
||
|
||
const extraUrls = process.env['BENCHMARK_URLS']
|
||
? process.env['BENCHMARK_URLS'].split(' ').filter(Boolean).map((url) => ({ url, label: url }))
|
||
: [];
|
||
|
||
const ENDPOINTS = [...DEFAULT_ENDPOINTS, ...extraUrls];
|
||
|
||
// ---------------------------------------------------------------------------
|
||
|
||
function newDispatcher() {
|
||
return new Agent({
|
||
headersTimeout: 0,
|
||
bodyTimeout: 0,
|
||
keepAliveTimeout: 60_000,
|
||
});
|
||
}
|
||
|
||
async function fetchOnce(url, dispatcher, method = 'HEAD') {
|
||
const start = performance.now();
|
||
try {
|
||
await fetch(url, {
|
||
method,
|
||
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS),
|
||
headers: { 'User-Agent': 'QwenCode-Benchmark/1.0' },
|
||
dispatcher,
|
||
});
|
||
} catch (err) {
|
||
// Timeouts and non-2xx are fine — we only care about connection timing
|
||
if (err?.name === 'TimeoutError') {
|
||
return performance.now() - start; // still records the time spent
|
||
}
|
||
}
|
||
return performance.now() - start;
|
||
}
|
||
|
||
/**
|
||
* Cold measurement: brand-new dispatcher, no preconnect.
|
||
* Returns elapsed ms of the measured request.
|
||
*/
|
||
async function measureCold(url) {
|
||
const dispatcher = newDispatcher();
|
||
return fetchOnce(url, dispatcher, 'HEAD');
|
||
}
|
||
|
||
/**
|
||
* Warm measurement: same dispatcher for preconnect HEAD + measured request.
|
||
* Returns elapsed ms of the measured request only (not the preconnect time).
|
||
*/
|
||
async function measureWarm(url) {
|
||
const dispatcher = newDispatcher();
|
||
// Preconnect — mirrors apiPreconnect.ts behaviour
|
||
await fetchOnce(url, dispatcher, 'HEAD').catch(() => {});
|
||
// Measured request reuses the warmed connection from the same pool
|
||
return fetchOnce(url, dispatcher, 'HEAD');
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
|
||
function fmt(ms) {
|
||
return `${ms.toFixed(1)}ms`;
|
||
}
|
||
|
||
function avg(arr) {
|
||
return arr.reduce((a, b) => a + b, 0) / arr.length;
|
||
}
|
||
|
||
async function benchmarkEndpoint({ url, label }) {
|
||
console.log(`\n ${label}`);
|
||
console.log(` ${url}`);
|
||
|
||
const coldTimes = [];
|
||
const warmTimes = [];
|
||
|
||
for (let i = 0; i < ITERATIONS; i++) {
|
||
const cold = await measureCold(url);
|
||
coldTimes.push(cold);
|
||
|
||
// Brief pause so the OS can release the cold connection
|
||
await new Promise((r) => setTimeout(r, 500));
|
||
|
||
const warm = await measureWarm(url);
|
||
warmTimes.push(warm);
|
||
|
||
console.log(` run ${i + 1}: cold=${fmt(cold)} warm=${fmt(warm)}`);
|
||
|
||
await new Promise((r) => setTimeout(r, 500));
|
||
}
|
||
|
||
const avgCold = avg(coldTimes);
|
||
const avgWarm = avg(warmTimes);
|
||
const saved = avgCold - avgWarm;
|
||
const pct = avgCold > 0 ? (saved / avgCold) * 100 : 0;
|
||
|
||
return { label, url, avgCold, avgWarm, saved, pct };
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
|
||
console.log('=== Qwen Code API Preconnect Latency Benchmark ===');
|
||
console.log(`Iterations per endpoint : ${ITERATIONS}`);
|
||
console.log(`Request timeout : ${REQUEST_TIMEOUT_MS}ms`);
|
||
console.log('\nRunning...');
|
||
|
||
const results = [];
|
||
for (const endpoint of ENDPOINTS) {
|
||
const result = await benchmarkEndpoint(endpoint);
|
||
results.push(result);
|
||
}
|
||
|
||
// Summary table
|
||
console.log('\n\n=== Results ===\n');
|
||
console.log(
|
||
'Endpoint'.padEnd(36) +
|
||
'Cold (avg)'.padStart(12) +
|
||
'Warm (avg)'.padStart(12) +
|
||
'Saved'.padStart(10) +
|
||
'Improvement'.padStart(13),
|
||
);
|
||
console.log('─'.repeat(83));
|
||
|
||
for (const r of results) {
|
||
const status = r.pct >= 30 ? '✓' : r.pct >= 10 ? '~' : '✗';
|
||
console.log(
|
||
r.label.slice(0, 35).padEnd(36) +
|
||
fmt(r.avgCold).padStart(12) +
|
||
fmt(r.avgWarm).padStart(12) +
|
||
fmt(r.saved).padStart(10) +
|
||
`${r.pct.toFixed(1)}% ${status}`.padStart(13),
|
||
);
|
||
}
|
||
|
||
console.log('\nLegend: ✓ ≥30% improvement ~ 10–30% ✗ <10%');
|