refactor(tests): reorganize integration tests by execution mode

Move non-interactive tests to cli/, interactive tests to interactive/. Add cron-interactive.test.ts wrapping terminal-capture E2E in vitest. Update npm scripts and release workflow for new directory layout.
2026-04-28 11:41:04 +00:00 · 2026-03-29 05:46:37 +00:00 · 2026-03-29 05:46:37 +00:00 · ded89618ec
commit ded89618ec
parent 707b06ca48
28 changed files with 261 additions and 327 deletions
--- a/integration-tests/cli/file-system.test.ts
+++ b/integration-tests/cli/file-system.test.ts
@ -0,0 +1,251 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import {
+  TestRig,
+  printDebugInfo,
+  validateModelOutput,
+} from '../test-helper.js';
+
+describe('file-system', () => {
+  it('should be able to read a file', async () => {
+    const rig = new TestRig();
+    await rig.setup('should be able to read a file');
+    rig.createFile('test.txt', 'hello world');
+
+    const result = await rig.run(
+      `read the file test.txt and show me its contents`,
+    );
+
+    const foundToolCall = await rig.waitForToolCall('read_file');
+
+    // Add debugging information
+    if (!foundToolCall || !result.includes('hello world')) {
+      printDebugInfo(rig, result, {
+        'Found tool call': foundToolCall,
+        'Contains hello world': result.includes('hello world'),
+      });
+    }
+
+    expect(
+      foundToolCall,
+      'Expected to find a read_file tool call',
+    ).toBeTruthy();
+
+    // Validate model output - will throw if no output, warn if missing expected content
+    validateModelOutput(result, 'hello world', 'File read test');
+  });
+
+  it('should be able to write a file', async () => {
+    const rig = new TestRig();
+    await rig.setup('should be able to write a file');
+    rig.createFile('test.txt', '');
+
+    const result = await rig.run(`edit test.txt to have a hello world message`);
+
+    // Accept multiple valid tools for editing files
+    const foundToolCall = await rig.waitForAnyToolCall(['write_file', 'edit']);
+
+    // Add debugging information
+    if (!foundToolCall) {
+      printDebugInfo(rig, result);
+    }
+
+    expect(
+      foundToolCall,
+      'Expected to find a write_file or edit tool call',
+    ).toBeTruthy();
+
+    // Validate model output - will throw if no output
+    validateModelOutput(result, null, 'File write test');
+
+    const fileContent = rig.readFile('test.txt');
+
+    // Add debugging for file content
+    if (!fileContent.toLowerCase().includes('hello')) {
+      const writeCalls = rig
+        .readToolLogs()
+        .filter((t) => t.toolRequest.name === 'write_file')
+        .map((t) => t.toolRequest.args);
+
+      printDebugInfo(rig, result, {
+        'File content mismatch': true,
+        'Expected to contain': 'hello',
+        'Actual content': fileContent,
+        'Write tool calls': JSON.stringify(writeCalls),
+      });
+    }
+
+    expect(
+      fileContent.toLowerCase().includes('hello'),
+      'Expected file to contain hello',
+    ).toBeTruthy();
+
+    // Log success info if verbose
+    if (process.env['VERBOSE'] === 'true') {
+      console.log('File written successfully with hello message.');
+    }
+  });
+
+  it('should correctly handle file paths with spaces', async () => {
+    const rig = new TestRig();
+    await rig.setup('should correctly handle file paths with spaces');
+    const fileName = 'my test file.txt';
+
+    const result = await rig.run(`write "hello" to "${fileName}"`);
+
+    const foundToolCall = await rig.waitForToolCall('write_file');
+    if (!foundToolCall) {
+      printDebugInfo(rig, result);
+    }
+    expect(
+      foundToolCall,
+      'Expected to find a write_file tool call',
+    ).toBeTruthy();
+
+    const newFileContent = rig.readFile(fileName);
+    expect(newFileContent).toBe('hello');
+  });
+
+  it('should perform a read-then-write sequence', async () => {
+    const rig = new TestRig();
+    await rig.setup('should perform a read-then-write sequence');
+    const fileName = 'version.txt';
+    rig.createFile(fileName, '1.0.0');
+
+    const prompt = `Read the version from ${fileName} and write the next version 1.0.1 back to the file.`;
+    const result = await rig.run(prompt);
+
+    await rig.waitForTelemetryReady();
+    const toolLogs = rig.readToolLogs();
+
+    const readCall = toolLogs.find(
+      (log) => log.toolRequest.name === 'read_file',
+    );
+    const writeCall = toolLogs.find(
+      (log) =>
+        log.toolRequest.name === 'write_file' ||
+        log.toolRequest.name === 'replace',
+    );
+
+    if (!readCall || !writeCall) {
+      printDebugInfo(rig, result, { readCall, writeCall });
+    }
+
+    expect(readCall, 'Expected to find a read_file tool call').toBeDefined();
+    expect(
+      writeCall,
+      'Expected to find a write_file or replace tool call',
+    ).toBeDefined();
+
+    const newFileContent = rig.readFile(fileName);
+    expect(newFileContent).toContain('1.0.1');
+  });
+
+  it.skip('should replace multiple instances of a string', async () => {
+    const rig = new TestRig();
+    await rig.setup('should replace multiple instances of a string');
+    const fileName = 'ambiguous.txt';
+    const fileContent = 'Hey there, \ntest line\ntest line';
+    const expectedContent = 'Hey there, \nnew line\nnew line';
+    rig.createFile(fileName, fileContent);
+
+    const result = await rig.run(
+      `replace "test line" with "new line" in ${fileName}`,
+    );
+
+    const foundToolCall = await rig.waitForAnyToolCall([
+      'replace',
+      'write_file',
+    ]);
+    if (!foundToolCall) {
+      printDebugInfo(rig, result);
+    }
+    expect(
+      foundToolCall,
+      'Expected to find a replace or write_file tool call',
+    ).toBeTruthy();
+
+    const toolLogs = rig.readToolLogs();
+    const successfulEdit = toolLogs.some(
+      (log) =>
+        (log.toolRequest.name === 'replace' ||
+          log.toolRequest.name === 'write_file') &&
+        log.toolRequest.success,
+    );
+    if (!successfulEdit) {
+      console.error(
+        'Expected a successful edit tool call, but none was found.',
+      );
+      printDebugInfo(rig, result);
+    }
+    expect(successfulEdit, 'Expected a successful edit tool call').toBeTruthy();
+
+    const newFileContent = rig.readFile(fileName);
+    expect(newFileContent).toBe(expectedContent);
+  });
+
+  it('should fail safely when trying to edit a non-existent file', async () => {
+    const rig = new TestRig();
+    await rig.setup(
+      'should fail safely when trying to edit a non-existent file',
+    );
+    const fileName = 'non_existent.txt';
+
+    const result = await rig.run(`In ${fileName}, replace "a" with "b"`);
+
+    await rig.waitForTelemetryReady();
+    const toolLogs = rig.readToolLogs();
+
+    const readAttempt = toolLogs.find(
+      (log) => log.toolRequest.name === 'read_file',
+    );
+    const editAttempt = toolLogs.find(
+      (log) => log.toolRequest.name === 'edit_file',
+    );
+    const successfulReplace = toolLogs.find(
+      (log) => log.toolRequest.name === 'replace' && log.toolRequest.success,
+    );
+
+    // The model can either investigate (and fail) or do nothing.
+    // If it chose to investigate by reading, that read must have failed.
+    if (readAttempt && readAttempt.toolRequest.success) {
+      console.error(
+        'A read_file attempt succeeded for a non-existent file when it should have failed.',
+      );
+      printDebugInfo(rig, result);
+    }
+    if (readAttempt) {
+      expect(
+        readAttempt.toolRequest.success,
+        'If model tries to read the file, that attempt must fail',
+      ).toBe(false);
+    }
+
+    // CRITICAL: Verify that no matter what the model did, it never successfully
+    // wrote or replaced anything.
+    if (editAttempt) {
+      console.error(
+        'A edit_file attempt was made when no file should be written.',
+      );
+      printDebugInfo(rig, result);
+    }
+    expect(
+      editAttempt,
+      'edit_file should not have been called',
+    ).toBeUndefined();
+
+    if (successfulReplace) {
+      console.error('A successful replace occurred when it should not have.');
+      printDebugInfo(rig, result);
+    }
+    expect(
+      successfulReplace,
+      'A successful replace should not have occurred',
+    ).toBeUndefined();
+  });
+});