From 04729d646ce68ebb62a9c590d22e5c50bc33715d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=98=93=E8=89=AF?= <1204183885@qq.com> Date: Sun, 10 May 2026 21:50:04 +0800 Subject: [PATCH] test: stabilize main e2e flakes (#3992) * test: stabilize main e2e flakes * test: stabilize macos e2e assertions --- integration-tests/cli/cron-tools.test.ts | 8 +++++-- integration-tests/cli/file-system.test.ts | 4 +++- .../interactive/cron-interactive.test.ts | 11 +++++---- .../sdk-typescript/single-turn.test.ts | 6 +++-- .../sdk-typescript/system-control.test.ts | 24 +++++++++++++------ .../sdk-typescript/tool-control.test.ts | 24 +++++++++---------- 6 files changed, 48 insertions(+), 29 deletions(-) diff --git a/integration-tests/cli/cron-tools.test.ts b/integration-tests/cli/cron-tools.test.ts index 42c14b5dc..9493f5574 100644 --- a/integration-tests/cli/cron-tools.test.ts +++ b/integration-tests/cli/cron-tools.test.ts @@ -64,11 +64,15 @@ describe('cron-tools', () => { await rig.setup('cron-tools-disabled-by-default'); const result = await rig.run( - 'Do you have access to a tool called cron_create? Reply with just "yes" or "no".', + 'Try to create a cron job with cron_create using cron "*/5 * * * *", prompt "disabled test", and recurring true. If you cannot call that tool, say so briefly.', ); validateModelOutput(result, null, 'cron disabled by default'); - expect(result.toLowerCase()).toContain('no'); + const toolLogs = rig.readToolLogs(); + expect( + toolLogs.some((log) => log.toolRequest.name === 'cron_create'), + 'cron_create should not be callable when cron is disabled', + ).toBe(false); }); it('should create, list, and delete a cron job in a single turn', async () => { diff --git a/integration-tests/cli/file-system.test.ts b/integration-tests/cli/file-system.test.ts index 8b564b954..bb3c0914c 100644 --- a/integration-tests/cli/file-system.test.ts +++ b/integration-tests/cli/file-system.test.ts @@ -202,7 +202,9 @@ describe('file-system', () => { const toolLogs = rig.readToolLogs(); const readAttempt = toolLogs.find( - (log) => log.toolRequest.name === 'read_file', + (log) => + log.toolRequest.name === 'read_file' && + log.toolRequest.args.includes(fileName), ); const editAttempt = toolLogs.find( (log) => log.toolRequest.name === 'edit_file', diff --git a/integration-tests/interactive/cron-interactive.test.ts b/integration-tests/interactive/cron-interactive.test.ts index 3bc0c5292..d4894e607 100644 --- a/integration-tests/interactive/cron-interactive.test.ts +++ b/integration-tests/interactive/cron-interactive.test.ts @@ -125,13 +125,14 @@ function makeEnv(): NodeJS.ProcessEnv { await session.send( 'Call cron_list and tell me how many jobs exist. Say "COUNT: N"', ); - await session.idle(8000); - const screen = await session.screen(); - expect( - screen.includes('COUNT: 1') || + await session.waitForScreen( + (screen) => + screen.includes('COUNT: 1') || screen.includes('1 job') || screen.includes('Active cron jobs (1)'), - ).toBe(true); + 'cron list showing one active job', + 60_000, + ); }, ); }); diff --git a/integration-tests/sdk-typescript/single-turn.test.ts b/integration-tests/sdk-typescript/single-turn.test.ts index 3608e6194..988d7e64f 100644 --- a/integration-tests/sdk-typescript/single-turn.test.ts +++ b/integration-tests/sdk-typescript/single-turn.test.ts @@ -128,9 +128,11 @@ describe('Single-Turn Query (E2E)', () => { } } - // Validate content contains greeting + // Validate content contains either the requested greeting or self-description. expect(assistantText.length).toBeGreaterThan(0); - expect(assistantText.toLowerCase()).toMatch(/hello|hi|greetings/); + expect(assistantText.toLowerCase()).toMatch( + /hello|hi|greetings|qwen code|assistant/, + ); // Validate message types const assistantMessages = collectMessagesByType( diff --git a/integration-tests/sdk-typescript/system-control.test.ts b/integration-tests/sdk-typescript/system-control.test.ts index 0ae28c4c5..f7144331f 100644 --- a/integration-tests/sdk-typescript/system-control.test.ts +++ b/integration-tests/sdk-typescript/system-control.test.ts @@ -18,6 +18,7 @@ import { } from './test-helper.js'; const SHARED_TEST_OPTIONS = createSharedTestOptions(); +const MODEL_RESPONSE_TIMEOUT_MS = process.env['CI'] ? 30000 : 15000; /** * Factory function that creates a streaming input with a control point. @@ -99,8 +100,8 @@ describe('System Control (E2E)', () => { it('should change model dynamically during streaming input', async () => { const resultWaiter = createResultWaiter(2); const { generator, resume } = createStreamingInputWithControlPoint( - 'Tell me the model name.', - 'Tell me the model name now again.', + 'Reply with exactly FIRST.', + 'Reply with exactly SECOND.', resultWaiter, ); @@ -157,7 +158,7 @@ describe('System Control (E2E)', () => { new Promise((_, reject) => setTimeout( () => reject(new Error('Timeout waiting for first response')), - 15000, + MODEL_RESPONSE_TIMEOUT_MS, ), ), ]); @@ -176,7 +177,7 @@ describe('System Control (E2E)', () => { new Promise((_, reject) => setTimeout( () => reject(new Error('Timeout waiting for second response')), - 10000, + MODEL_RESPONSE_TIMEOUT_MS, ), ), ]); @@ -278,7 +279,10 @@ describe('System Control (E2E)', () => { await Promise.race([ responsePromises[0], new Promise((_, reject) => - setTimeout(() => reject(new Error('Timeout 1')), 10000), + setTimeout( + () => reject(new Error('Timeout 1')), + MODEL_RESPONSE_TIMEOUT_MS, + ), ), ]); @@ -290,7 +294,10 @@ describe('System Control (E2E)', () => { await Promise.race([ responsePromises[1], new Promise((_, reject) => - setTimeout(() => reject(new Error('Timeout 2')), 10000), + setTimeout( + () => reject(new Error('Timeout 2')), + MODEL_RESPONSE_TIMEOUT_MS, + ), ), ]); @@ -302,7 +309,10 @@ describe('System Control (E2E)', () => { await Promise.race([ responsePromises[2], new Promise((_, reject) => - setTimeout(() => reject(new Error('Timeout 3')), 10000), + setTimeout( + () => reject(new Error('Timeout 3')), + MODEL_RESPONSE_TIMEOUT_MS, + ), ), ]); diff --git a/integration-tests/sdk-typescript/tool-control.test.ts b/integration-tests/sdk-typescript/tool-control.test.ts index 973659295..9236623f1 100644 --- a/integration-tests/sdk-typescript/tool-control.test.ts +++ b/integration-tests/sdk-typescript/tool-control.test.ts @@ -1121,16 +1121,16 @@ describe('Tool Control Parameters (E2E)', () => { it( 'should apply updatedInput from canUseTool callback', async () => { - // Don't pre-create test.txt: prior-read enforcement requires - // existing files to have been read via read_file first, but - // this test restricts coreTools to write_file only. + const scenarioDirName = `updated-input-allow-${crypto.randomUUID()}`; + const scenarioDir = await helper.mkdir(scenarioDirName); let capturedInput: Record = {}; const q = query({ - prompt: 'Write "new content" to test.txt.', + prompt: + 'Create a new file named test.txt with exactly this content: new content. Use the write_file tool.', options: { ...SHARED_TEST_OPTIONS, - cwd: testDir, + cwd: scenarioDir, permissionMode: 'default', coreTools: ['write_file'], canUseTool: async (_toolName, input) => { @@ -1160,7 +1160,7 @@ describe('Tool Control Parameters (E2E)', () => { expect(Object.keys(capturedInput).length).toBeGreaterThan(0); // The file should be modified - const content = await helper.readFile('test.txt'); + const content = await helper.readFile(`${scenarioDirName}/test.txt`); expect(content).toBe('new content'); } finally { await q.close(); @@ -1172,16 +1172,16 @@ describe('Tool Control Parameters (E2E)', () => { it( 'canUseTool should not be called for allowedTools even if it would modify input', async () => { - // Don't pre-create test.txt: prior-read enforcement requires - // existing files to have been read via read_file first, but - // this test restricts coreTools to write_file only. + const scenarioDirName = `updated-input-allowed-tool-${crypto.randomUUID()}`; + const scenarioDir = await helper.mkdir(scenarioDirName); let canUseToolCalled = false; const q = query({ - prompt: 'Write "modified" to test.txt.', + prompt: + 'Create a new file named test.txt with exactly this content: modified. Use the write_file tool.', options: { ...SHARED_TEST_OPTIONS, - cwd: testDir, + cwd: scenarioDir, permissionMode: 'default', coreTools: ['write_file'], // write_file is in allowedTools, so canUseTool should not be called @@ -1208,7 +1208,7 @@ describe('Tool Control Parameters (E2E)', () => { expect(canUseToolCalled).toBe(false); // File should be modified (not redirected to /some/other/path.txt) - const content = await helper.readFile('test.txt'); + const content = await helper.readFile(`${scenarioDirName}/test.txt`); expect(content).toBe('modified'); } finally { await q.close();