mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-04-28 11:41:04 +00:00
refactor(tests): reorganize integration tests by execution mode
Move non-interactive tests to cli/, interactive tests to interactive/. Add cron-interactive.test.ts wrapping terminal-capture E2E in vitest. Update npm scripts and release workflow for new directory layout.
This commit is contained in:
parent
707b06ca48
commit
ded89618ec
28 changed files with 261 additions and 327 deletions
251
integration-tests/cli/file-system.test.ts
Normal file
251
integration-tests/cli/file-system.test.ts
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import {
|
||||
TestRig,
|
||||
printDebugInfo,
|
||||
validateModelOutput,
|
||||
} from '../test-helper.js';
|
||||
|
||||
describe('file-system', () => {
|
||||
it('should be able to read a file', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to read a file');
|
||||
rig.createFile('test.txt', 'hello world');
|
||||
|
||||
const result = await rig.run(
|
||||
`read the file test.txt and show me its contents`,
|
||||
);
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('read_file');
|
||||
|
||||
// Add debugging information
|
||||
if (!foundToolCall || !result.includes('hello world')) {
|
||||
printDebugInfo(rig, result, {
|
||||
'Found tool call': foundToolCall,
|
||||
'Contains hello world': result.includes('hello world'),
|
||||
});
|
||||
}
|
||||
|
||||
expect(
|
||||
foundToolCall,
|
||||
'Expected to find a read_file tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, 'hello world', 'File read test');
|
||||
});
|
||||
|
||||
it('should be able to write a file', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to write a file');
|
||||
rig.createFile('test.txt', '');
|
||||
|
||||
const result = await rig.run(`edit test.txt to have a hello world message`);
|
||||
|
||||
// Accept multiple valid tools for editing files
|
||||
const foundToolCall = await rig.waitForAnyToolCall(['write_file', 'edit']);
|
||||
|
||||
// Add debugging information
|
||||
if (!foundToolCall) {
|
||||
printDebugInfo(rig, result);
|
||||
}
|
||||
|
||||
expect(
|
||||
foundToolCall,
|
||||
'Expected to find a write_file or edit tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output
|
||||
validateModelOutput(result, null, 'File write test');
|
||||
|
||||
const fileContent = rig.readFile('test.txt');
|
||||
|
||||
// Add debugging for file content
|
||||
if (!fileContent.toLowerCase().includes('hello')) {
|
||||
const writeCalls = rig
|
||||
.readToolLogs()
|
||||
.filter((t) => t.toolRequest.name === 'write_file')
|
||||
.map((t) => t.toolRequest.args);
|
||||
|
||||
printDebugInfo(rig, result, {
|
||||
'File content mismatch': true,
|
||||
'Expected to contain': 'hello',
|
||||
'Actual content': fileContent,
|
||||
'Write tool calls': JSON.stringify(writeCalls),
|
||||
});
|
||||
}
|
||||
|
||||
expect(
|
||||
fileContent.toLowerCase().includes('hello'),
|
||||
'Expected file to contain hello',
|
||||
).toBeTruthy();
|
||||
|
||||
// Log success info if verbose
|
||||
if (process.env['VERBOSE'] === 'true') {
|
||||
console.log('File written successfully with hello message.');
|
||||
}
|
||||
});
|
||||
|
||||
it('should correctly handle file paths with spaces', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should correctly handle file paths with spaces');
|
||||
const fileName = 'my test file.txt';
|
||||
|
||||
const result = await rig.run(`write "hello" to "${fileName}"`);
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('write_file');
|
||||
if (!foundToolCall) {
|
||||
printDebugInfo(rig, result);
|
||||
}
|
||||
expect(
|
||||
foundToolCall,
|
||||
'Expected to find a write_file tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
const newFileContent = rig.readFile(fileName);
|
||||
expect(newFileContent).toBe('hello');
|
||||
});
|
||||
|
||||
it('should perform a read-then-write sequence', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should perform a read-then-write sequence');
|
||||
const fileName = 'version.txt';
|
||||
rig.createFile(fileName, '1.0.0');
|
||||
|
||||
const prompt = `Read the version from ${fileName} and write the next version 1.0.1 back to the file.`;
|
||||
const result = await rig.run(prompt);
|
||||
|
||||
await rig.waitForTelemetryReady();
|
||||
const toolLogs = rig.readToolLogs();
|
||||
|
||||
const readCall = toolLogs.find(
|
||||
(log) => log.toolRequest.name === 'read_file',
|
||||
);
|
||||
const writeCall = toolLogs.find(
|
||||
(log) =>
|
||||
log.toolRequest.name === 'write_file' ||
|
||||
log.toolRequest.name === 'replace',
|
||||
);
|
||||
|
||||
if (!readCall || !writeCall) {
|
||||
printDebugInfo(rig, result, { readCall, writeCall });
|
||||
}
|
||||
|
||||
expect(readCall, 'Expected to find a read_file tool call').toBeDefined();
|
||||
expect(
|
||||
writeCall,
|
||||
'Expected to find a write_file or replace tool call',
|
||||
).toBeDefined();
|
||||
|
||||
const newFileContent = rig.readFile(fileName);
|
||||
expect(newFileContent).toContain('1.0.1');
|
||||
});
|
||||
|
||||
it.skip('should replace multiple instances of a string', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should replace multiple instances of a string');
|
||||
const fileName = 'ambiguous.txt';
|
||||
const fileContent = 'Hey there, \ntest line\ntest line';
|
||||
const expectedContent = 'Hey there, \nnew line\nnew line';
|
||||
rig.createFile(fileName, fileContent);
|
||||
|
||||
const result = await rig.run(
|
||||
`replace "test line" with "new line" in ${fileName}`,
|
||||
);
|
||||
|
||||
const foundToolCall = await rig.waitForAnyToolCall([
|
||||
'replace',
|
||||
'write_file',
|
||||
]);
|
||||
if (!foundToolCall) {
|
||||
printDebugInfo(rig, result);
|
||||
}
|
||||
expect(
|
||||
foundToolCall,
|
||||
'Expected to find a replace or write_file tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
const toolLogs = rig.readToolLogs();
|
||||
const successfulEdit = toolLogs.some(
|
||||
(log) =>
|
||||
(log.toolRequest.name === 'replace' ||
|
||||
log.toolRequest.name === 'write_file') &&
|
||||
log.toolRequest.success,
|
||||
);
|
||||
if (!successfulEdit) {
|
||||
console.error(
|
||||
'Expected a successful edit tool call, but none was found.',
|
||||
);
|
||||
printDebugInfo(rig, result);
|
||||
}
|
||||
expect(successfulEdit, 'Expected a successful edit tool call').toBeTruthy();
|
||||
|
||||
const newFileContent = rig.readFile(fileName);
|
||||
expect(newFileContent).toBe(expectedContent);
|
||||
});
|
||||
|
||||
it('should fail safely when trying to edit a non-existent file', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup(
|
||||
'should fail safely when trying to edit a non-existent file',
|
||||
);
|
||||
const fileName = 'non_existent.txt';
|
||||
|
||||
const result = await rig.run(`In ${fileName}, replace "a" with "b"`);
|
||||
|
||||
await rig.waitForTelemetryReady();
|
||||
const toolLogs = rig.readToolLogs();
|
||||
|
||||
const readAttempt = toolLogs.find(
|
||||
(log) => log.toolRequest.name === 'read_file',
|
||||
);
|
||||
const editAttempt = toolLogs.find(
|
||||
(log) => log.toolRequest.name === 'edit_file',
|
||||
);
|
||||
const successfulReplace = toolLogs.find(
|
||||
(log) => log.toolRequest.name === 'replace' && log.toolRequest.success,
|
||||
);
|
||||
|
||||
// The model can either investigate (and fail) or do nothing.
|
||||
// If it chose to investigate by reading, that read must have failed.
|
||||
if (readAttempt && readAttempt.toolRequest.success) {
|
||||
console.error(
|
||||
'A read_file attempt succeeded for a non-existent file when it should have failed.',
|
||||
);
|
||||
printDebugInfo(rig, result);
|
||||
}
|
||||
if (readAttempt) {
|
||||
expect(
|
||||
readAttempt.toolRequest.success,
|
||||
'If model tries to read the file, that attempt must fail',
|
||||
).toBe(false);
|
||||
}
|
||||
|
||||
// CRITICAL: Verify that no matter what the model did, it never successfully
|
||||
// wrote or replaced anything.
|
||||
if (editAttempt) {
|
||||
console.error(
|
||||
'A edit_file attempt was made when no file should be written.',
|
||||
);
|
||||
printDebugInfo(rig, result);
|
||||
}
|
||||
expect(
|
||||
editAttempt,
|
||||
'edit_file should not have been called',
|
||||
).toBeUndefined();
|
||||
|
||||
if (successfulReplace) {
|
||||
console.error('A successful replace occurred when it should not have.');
|
||||
printDebugInfo(rig, result);
|
||||
}
|
||||
expect(
|
||||
successfulReplace,
|
||||
'A successful replace should not have occurred',
|
||||
).toBeUndefined();
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue