mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-19 16:28:28 +00:00
test: stabilize main e2e flakes (#3992)
* test: stabilize main e2e flakes * test: stabilize macos e2e assertions
This commit is contained in:
parent
1777b20e93
commit
04729d646c
6 changed files with 48 additions and 29 deletions
|
|
@ -64,11 +64,15 @@ describe('cron-tools', () => {
|
|||
await rig.setup('cron-tools-disabled-by-default');
|
||||
|
||||
const result = await rig.run(
|
||||
'Do you have access to a tool called cron_create? Reply with just "yes" or "no".',
|
||||
'Try to create a cron job with cron_create using cron "*/5 * * * *", prompt "disabled test", and recurring true. If you cannot call that tool, say so briefly.',
|
||||
);
|
||||
|
||||
validateModelOutput(result, null, 'cron disabled by default');
|
||||
expect(result.toLowerCase()).toContain('no');
|
||||
const toolLogs = rig.readToolLogs();
|
||||
expect(
|
||||
toolLogs.some((log) => log.toolRequest.name === 'cron_create'),
|
||||
'cron_create should not be callable when cron is disabled',
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it('should create, list, and delete a cron job in a single turn', async () => {
|
||||
|
|
|
|||
|
|
@ -202,7 +202,9 @@ describe('file-system', () => {
|
|||
const toolLogs = rig.readToolLogs();
|
||||
|
||||
const readAttempt = toolLogs.find(
|
||||
(log) => log.toolRequest.name === 'read_file',
|
||||
(log) =>
|
||||
log.toolRequest.name === 'read_file' &&
|
||||
log.toolRequest.args.includes(fileName),
|
||||
);
|
||||
const editAttempt = toolLogs.find(
|
||||
(log) => log.toolRequest.name === 'edit_file',
|
||||
|
|
|
|||
|
|
@ -125,13 +125,14 @@ function makeEnv(): NodeJS.ProcessEnv {
|
|||
await session.send(
|
||||
'Call cron_list and tell me how many jobs exist. Say "COUNT: N"',
|
||||
);
|
||||
await session.idle(8000);
|
||||
const screen = await session.screen();
|
||||
expect(
|
||||
screen.includes('COUNT: 1') ||
|
||||
await session.waitForScreen(
|
||||
(screen) =>
|
||||
screen.includes('COUNT: 1') ||
|
||||
screen.includes('1 job') ||
|
||||
screen.includes('Active cron jobs (1)'),
|
||||
).toBe(true);
|
||||
'cron list showing one active job',
|
||||
60_000,
|
||||
);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -128,9 +128,11 @@ describe('Single-Turn Query (E2E)', () => {
|
|||
}
|
||||
}
|
||||
|
||||
// Validate content contains greeting
|
||||
// Validate content contains either the requested greeting or self-description.
|
||||
expect(assistantText.length).toBeGreaterThan(0);
|
||||
expect(assistantText.toLowerCase()).toMatch(/hello|hi|greetings/);
|
||||
expect(assistantText.toLowerCase()).toMatch(
|
||||
/hello|hi|greetings|qwen code|assistant/,
|
||||
);
|
||||
|
||||
// Validate message types
|
||||
const assistantMessages = collectMessagesByType(
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import {
|
|||
} from './test-helper.js';
|
||||
|
||||
const SHARED_TEST_OPTIONS = createSharedTestOptions();
|
||||
const MODEL_RESPONSE_TIMEOUT_MS = process.env['CI'] ? 30000 : 15000;
|
||||
|
||||
/**
|
||||
* Factory function that creates a streaming input with a control point.
|
||||
|
|
@ -99,8 +100,8 @@ describe('System Control (E2E)', () => {
|
|||
it('should change model dynamically during streaming input', async () => {
|
||||
const resultWaiter = createResultWaiter(2);
|
||||
const { generator, resume } = createStreamingInputWithControlPoint(
|
||||
'Tell me the model name.',
|
||||
'Tell me the model name now again.',
|
||||
'Reply with exactly FIRST.',
|
||||
'Reply with exactly SECOND.',
|
||||
resultWaiter,
|
||||
);
|
||||
|
||||
|
|
@ -157,7 +158,7 @@ describe('System Control (E2E)', () => {
|
|||
new Promise((_, reject) =>
|
||||
setTimeout(
|
||||
() => reject(new Error('Timeout waiting for first response')),
|
||||
15000,
|
||||
MODEL_RESPONSE_TIMEOUT_MS,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
|
@ -176,7 +177,7 @@ describe('System Control (E2E)', () => {
|
|||
new Promise((_, reject) =>
|
||||
setTimeout(
|
||||
() => reject(new Error('Timeout waiting for second response')),
|
||||
10000,
|
||||
MODEL_RESPONSE_TIMEOUT_MS,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
|
@ -278,7 +279,10 @@ describe('System Control (E2E)', () => {
|
|||
await Promise.race([
|
||||
responsePromises[0],
|
||||
new Promise((_, reject) =>
|
||||
setTimeout(() => reject(new Error('Timeout 1')), 10000),
|
||||
setTimeout(
|
||||
() => reject(new Error('Timeout 1')),
|
||||
MODEL_RESPONSE_TIMEOUT_MS,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
|
|
@ -290,7 +294,10 @@ describe('System Control (E2E)', () => {
|
|||
await Promise.race([
|
||||
responsePromises[1],
|
||||
new Promise((_, reject) =>
|
||||
setTimeout(() => reject(new Error('Timeout 2')), 10000),
|
||||
setTimeout(
|
||||
() => reject(new Error('Timeout 2')),
|
||||
MODEL_RESPONSE_TIMEOUT_MS,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
|
|
@ -302,7 +309,10 @@ describe('System Control (E2E)', () => {
|
|||
await Promise.race([
|
||||
responsePromises[2],
|
||||
new Promise((_, reject) =>
|
||||
setTimeout(() => reject(new Error('Timeout 3')), 10000),
|
||||
setTimeout(
|
||||
() => reject(new Error('Timeout 3')),
|
||||
MODEL_RESPONSE_TIMEOUT_MS,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
|
|
|
|||
|
|
@ -1121,16 +1121,16 @@ describe('Tool Control Parameters (E2E)', () => {
|
|||
it(
|
||||
'should apply updatedInput from canUseTool callback',
|
||||
async () => {
|
||||
// Don't pre-create test.txt: prior-read enforcement requires
|
||||
// existing files to have been read via read_file first, but
|
||||
// this test restricts coreTools to write_file only.
|
||||
const scenarioDirName = `updated-input-allow-${crypto.randomUUID()}`;
|
||||
const scenarioDir = await helper.mkdir(scenarioDirName);
|
||||
let capturedInput: Record<string, unknown> = {};
|
||||
|
||||
const q = query({
|
||||
prompt: 'Write "new content" to test.txt.',
|
||||
prompt:
|
||||
'Create a new file named test.txt with exactly this content: new content. Use the write_file tool.',
|
||||
options: {
|
||||
...SHARED_TEST_OPTIONS,
|
||||
cwd: testDir,
|
||||
cwd: scenarioDir,
|
||||
permissionMode: 'default',
|
||||
coreTools: ['write_file'],
|
||||
canUseTool: async (_toolName, input) => {
|
||||
|
|
@ -1160,7 +1160,7 @@ describe('Tool Control Parameters (E2E)', () => {
|
|||
expect(Object.keys(capturedInput).length).toBeGreaterThan(0);
|
||||
|
||||
// The file should be modified
|
||||
const content = await helper.readFile('test.txt');
|
||||
const content = await helper.readFile(`${scenarioDirName}/test.txt`);
|
||||
expect(content).toBe('new content');
|
||||
} finally {
|
||||
await q.close();
|
||||
|
|
@ -1172,16 +1172,16 @@ describe('Tool Control Parameters (E2E)', () => {
|
|||
it(
|
||||
'canUseTool should not be called for allowedTools even if it would modify input',
|
||||
async () => {
|
||||
// Don't pre-create test.txt: prior-read enforcement requires
|
||||
// existing files to have been read via read_file first, but
|
||||
// this test restricts coreTools to write_file only.
|
||||
const scenarioDirName = `updated-input-allowed-tool-${crypto.randomUUID()}`;
|
||||
const scenarioDir = await helper.mkdir(scenarioDirName);
|
||||
let canUseToolCalled = false;
|
||||
|
||||
const q = query({
|
||||
prompt: 'Write "modified" to test.txt.',
|
||||
prompt:
|
||||
'Create a new file named test.txt with exactly this content: modified. Use the write_file tool.',
|
||||
options: {
|
||||
...SHARED_TEST_OPTIONS,
|
||||
cwd: testDir,
|
||||
cwd: scenarioDir,
|
||||
permissionMode: 'default',
|
||||
coreTools: ['write_file'],
|
||||
// write_file is in allowedTools, so canUseTool should not be called
|
||||
|
|
@ -1208,7 +1208,7 @@ describe('Tool Control Parameters (E2E)', () => {
|
|||
expect(canUseToolCalled).toBe(false);
|
||||
|
||||
// File should be modified (not redirected to /some/other/path.txt)
|
||||
const content = await helper.readFile('test.txt');
|
||||
const content = await helper.readFile(`${scenarioDirName}/test.txt`);
|
||||
expect(content).toBe('modified');
|
||||
} finally {
|
||||
await q.close();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue