diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md index 2ce94c38b..a7d40ee54 100644 --- a/docs/users/configuration/settings.md +++ b/docs/users/configuration/settings.md @@ -51,14 +51,15 @@ Settings are organized into categories. All settings should be placed within the #### general -| Setting | Type | Description | Default | -| ------------------------------- | ------- | ---------------------------------------------------------------------------------------------------------- | ----------- | -| `general.preferredEditor` | string | The preferred editor to open files in. | `undefined` | -| `general.vimMode` | boolean | Enable Vim keybindings. | `false` | -| `general.disableAutoUpdate` | boolean | Disable automatic updates. | `false` | -| `general.disableUpdateNag` | boolean | Disable update notification prompts. | `false` | -| `general.gitCoAuthor` | boolean | Automatically add a Co-authored-by trailer to git commit messages when commits are made through Qwen Code. | `true` | -| `general.checkpointing.enabled` | boolean | Enable session checkpointing for recovery. | `false` | +| Setting | Type | Description | Default | +| ------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | +| `general.preferredEditor` | string | The preferred editor to open files in. | `undefined` | +| `general.vimMode` | boolean | Enable Vim keybindings. | `false` | +| `general.disableAutoUpdate` | boolean | Disable automatic updates. | `false` | +| `general.disableUpdateNag` | boolean | Disable update notification prompts. | `false` | +| `general.gitCoAuthor` | boolean | Automatically add a Co-authored-by trailer to git commit messages when commits are made through Qwen Code. | `true` | +| `general.checkpointing.enabled` | boolean | Enable session checkpointing for recovery. | `false` | +| `general.defaultFileEncoding` | string | Default encoding for new files. Use `"utf-8"` (default) for UTF-8 without BOM, or `"utf-8-bom"` for UTF-8 with BOM. Only change this if your project specifically requires BOM. | `"utf-8"` | #### output diff --git a/integration-tests/utf-bom-encoding.test.ts b/integration-tests/utf-bom-encoding.test.ts index bb682de1e..31dd41522 100644 --- a/integration-tests/utf-bom-encoding.test.ts +++ b/integration-tests/utf-bom-encoding.test.ts @@ -5,7 +5,7 @@ */ import { describe, it, expect, beforeAll, afterAll } from 'vitest'; -import { writeFileSync } from 'node:fs'; +import { writeFileSync, readFileSync } from 'node:fs'; import { join } from 'node:path'; import { TestRig } from './test-helper.js'; @@ -121,4 +121,98 @@ d('BOM end-to-end integration', () => { 'BOM_OK UTF-32BE', ); }); + + it('should preserve UTF-8 BOM when editing existing file', async () => { + // Create a file with UTF-8 BOM and Chinese content + const originalContent = + '// 这是一个测试文件\n// 包含中文注释\nfunction test() {\n return "hello";\n}\n'; + const fileWithBOM = Buffer.concat([ + Buffer.from([0xef, 0xbb, 0xbf]), + Buffer.from(originalContent, 'utf8'), + ]); + + const filename = 'bom-test.js'; + writeFileSync(join(dir, filename), fileWithBOM); + + // Ask Qwen Code to edit the file + const prompt = `edit the file ${filename} to change the return value from "hello" to "world"`; + await rig.run(prompt); + await rig.waitForToolCall('edit_file'); + + // Read the modified file as raw bytes + const modifiedBuffer = readFileSync(join(dir, filename)); + + // Verify BOM is preserved (first 3 bytes should be EF BB BF) + expect(modifiedBuffer[0]).toBe(0xef); + expect(modifiedBuffer[1]).toBe(0xbb); + expect(modifiedBuffer[2]).toBe(0xbf); + + // Verify the content was actually changed to include 'world' + const modifiedContent = modifiedBuffer.toString('utf8'); + expect(modifiedContent).toContain('world'); + }); + + it('should preserve UTF-8 BOM when overwriting file with write_file', async () => { + // Create a file with UTF-8 BOM + const originalContent = '// Original BOM file\nconst x = 1;\n'; + const fileWithBOM = Buffer.concat([ + Buffer.from([0xef, 0xbb, 0xbf]), + Buffer.from(originalContent, 'utf8'), + ]); + + const filename = 'bom-overwrite.js'; + writeFileSync(join(dir, filename), fileWithBOM); + + // Ask Qwen Code to overwrite the file with new content + const prompt = `overwrite the file ${filename} with: const y = 2;\n// new content`; + await rig.run(prompt); + await rig.waitForToolCall('write_file'); + + // Read the modified file as raw bytes + const modifiedBuffer = readFileSync(join(dir, filename)); + + // Verify BOM is preserved (first 3 bytes should be EF BB BF) + expect(modifiedBuffer[0]).toBe(0xef); + expect(modifiedBuffer[1]).toBe(0xbb); + expect(modifiedBuffer[2]).toBe(0xbf); + + // Verify the new content includes 'const y = 2' + const modifiedContent = modifiedBuffer.toString('utf8'); + expect(modifiedContent).toContain('const y = 2'); + }); +}); + +describe('BOM with defaultFileEncoding configuration', () => { + it('should create new file with BOM when defaultFileEncoding is utf-8-bom', async () => { + const rigWithBOM = new TestRig(); + await rigWithBOM.setup('bom-default-encoding', { + settings: { + general: { + defaultFileEncoding: 'utf-8-bom', + }, + }, + }); + + const filename = 'new-file-with-bom.js'; + + // Ask Qwen Code to create a new file + const prompt = `create a new file called ${filename} with content: const greeting = "hello";`; + await rigWithBOM.run(prompt); + await rigWithBOM.waitForToolCall('write_file'); + + // Read the created file as raw bytes + const filePath = join(rigWithBOM.testDir!, filename); + const fileBuffer = readFileSync(filePath); + + // Verify BOM is present (first 3 bytes should be EF BB BF) + expect(fileBuffer[0]).toBe(0xef); + expect(fileBuffer[1]).toBe(0xbb); + expect(fileBuffer[2]).toBe(0xbf); + + // Verify the content includes the expected string + const fileContent = fileBuffer.toString('utf8'); + expect(fileContent).toContain('const greeting'); + + await rigWithBOM.cleanup(); + }); }); diff --git a/packages/cli/src/acp-integration/service/filesystem.test.ts b/packages/cli/src/acp-integration/service/filesystem.test.ts index bc1f56e81..a2ae0939e 100644 --- a/packages/cli/src/acp-integration/service/filesystem.test.ts +++ b/packages/cli/src/acp-integration/service/filesystem.test.ts @@ -12,6 +12,7 @@ import { ACP_ERROR_CODES } from '../errorCodes.js'; const createFallback = (): FileSystemService => ({ readTextFile: vi.fn(), writeTextFile: vi.fn(), + detectFileBOM: vi.fn().mockResolvedValue(false), findFiles: vi.fn().mockReturnValue([]), }); diff --git a/packages/cli/src/acp-integration/service/filesystem.ts b/packages/cli/src/acp-integration/service/filesystem.ts index 18aef1bec..fc86ee5bb 100644 --- a/packages/cli/src/acp-integration/service/filesystem.ts +++ b/packages/cli/src/acp-integration/service/filesystem.ts @@ -54,17 +54,30 @@ export class AcpFileSystemService implements FileSystemService { return response.content; } - async writeTextFile(filePath: string, content: string): Promise { + async writeTextFile( + filePath: string, + content: string, + options?: { bom?: boolean }, + ): Promise { if (!this.capabilities.writeTextFile) { - return this.fallback.writeTextFile(filePath, content); + return this.fallback.writeTextFile(filePath, content, options); } + // Prepend BOM character if requested + const finalContent = options?.bom ? '\uFEFF' + content : content; + await this.client.writeTextFile({ path: filePath, - content, + content: finalContent, sessionId: this.sessionId, }); } + + async detectFileBOM(filePath: string): Promise { + // Always use fallback for BOM detection + return this.fallback.detectFileBOM(filePath); + } + findFiles(fileName: string, searchPaths: readonly string[]): string[] { return this.fallback.findFiles(fileName, searchPaths); } diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index d4752d4be..f8bd784a3 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -10,6 +10,7 @@ import { Config, DEFAULT_QWEN_EMBEDDING_MODEL, FileDiscoveryService, + FileEncoding, getCurrentGeminiMdFilename, loadServerHierarchicalMemory, setGeminiMdFilename as setServerGeminiMdFilename, @@ -1030,6 +1031,8 @@ export async function loadCliConfig( // always be true and the settings file can never disable recording. chatRecording: argv.chatRecording ?? settings.general?.chatRecording ?? true, + defaultFileEncoding: + settings.general?.defaultFileEncoding ?? FileEncoding.UTF8, lsp: { enabled: lspEnabled, }, diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 44340b81e..943045608 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -244,6 +244,20 @@ const SETTINGS_SCHEMA = { 'Enable saving chat history to disk. Disabling this will also prevent --continue and --resume from working.', showInDialog: false, }, + defaultFileEncoding: { + type: 'enum', + label: 'Default File Encoding', + category: 'General', + requiresRestart: false, + default: 'utf-8', + description: + 'Default encoding for new files. Use "utf-8" (default) for UTF-8 without BOM, or "utf-8-bom" for UTF-8 with BOM. Only change this if your project specifically requires BOM.', + showInDialog: false, + options: [ + { value: 'utf-8', label: 'UTF-8 (without BOM)' }, + { value: 'utf-8-bom', label: 'UTF-8 with BOM' }, + ], + }, }, }, output: { diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index af2d28555..7169e0be9 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -36,6 +36,8 @@ import { FileDiscoveryService } from '../services/fileDiscoveryService.js'; import { type FileSystemService, StandardFileSystemService, + type FileEncodingType, + FileEncoding, } from '../services/fileSystemService.js'; import { GitService } from '../services/gitService.js'; @@ -350,6 +352,7 @@ export interface ConfigParameters { chatCompression?: ChatCompressionSettings; interactive?: boolean; trustedFolder?: boolean; + defaultFileEncoding?: FileEncodingType; useRipgrep?: boolean; useBuiltinRipgrep?: boolean; shouldUseNodePtyShell?: boolean; @@ -512,6 +515,7 @@ export class Config { private readonly eventEmitter?: EventEmitter; private readonly useSmartEdit: boolean; private readonly channel: string | undefined; + private readonly defaultFileEncoding: FileEncodingType; constructor(params: ConfigParameters) { this.sessionId = params.sessionId ?? randomUUID(); @@ -625,6 +629,7 @@ export class Config { this.enableToolOutputTruncation = params.enableToolOutputTruncation ?? true; this.useSmartEdit = params.useSmartEdit ?? false; this.channel = params.channel; + this.defaultFileEncoding = params.defaultFileEncoding ?? FileEncoding.UTF8; this.storage = new Storage(this.targetDir); this.vlmSwitchMode = params.vlmSwitchMode; this.inputFormat = params.inputFormat ?? InputFormat.TEXT; @@ -1432,6 +1437,14 @@ export class Config { return this.channel; } + /** + * Get the default file encoding for new files. + * @returns FileEncodingType + */ + getDefaultFileEncoding(): FileEncodingType { + return this.defaultFileEncoding; + } + /** * Get the current FileSystemService */ diff --git a/packages/core/src/services/fileSystemService.test.ts b/packages/core/src/services/fileSystemService.test.ts index 4ca5c3329..64219c2dc 100644 --- a/packages/core/src/services/fileSystemService.test.ts +++ b/packages/core/src/services/fileSystemService.test.ts @@ -55,5 +55,98 @@ describe('StandardFileSystemService', () => { 'utf-8', ); }); + + it('should write file with BOM when bom option is true', async () => { + vi.mocked(fs.writeFile).mockResolvedValue(); + + await fileSystem.writeTextFile('/test/file.txt', 'Hello, World!', { + bom: true, + }); + + // Verify that fs.writeFile was called with a Buffer that starts with BOM + const writeCall = vi.mocked(fs.writeFile).mock.calls[0]; + expect(writeCall[0]).toBe('/test/file.txt'); + expect(writeCall[1]).toBeInstanceOf(Buffer); + const buffer = writeCall[1] as Buffer; + expect(buffer[0]).toBe(0xef); + expect(buffer[1]).toBe(0xbb); + expect(buffer[2]).toBe(0xbf); + }); + + it('should write file without BOM when bom option is false', async () => { + vi.mocked(fs.writeFile).mockResolvedValue(); + + await fileSystem.writeTextFile('/test/file.txt', 'Hello, World!', { + bom: false, + }); + + expect(fs.writeFile).toHaveBeenCalledWith( + '/test/file.txt', + 'Hello, World!', + 'utf-8', + ); + }); + }); + + describe('detectFileBOM', () => { + it('should return true for file with UTF-8 BOM', async () => { + // Create a buffer with BOM + const bomBuffer = Buffer.from([0xef, 0xbb, 0xbf]); + + // Mock fs.open to return a file descriptor that fills buffer with BOM + vi.mocked(fs.open).mockImplementation( + async () => + ({ + read: async (buffer: Buffer, offset: number) => { + // Copy BOM bytes to the buffer + bomBuffer.copy(buffer, offset); + return { bytesRead: 3 }; + }, + close: async () => {}, + }) as unknown as fs.FileHandle, + ); + + const result = await fileSystem.detectFileBOM('/test/file.txt'); + expect(result).toBe(true); + }); + + it('should return false for file without BOM', async () => { + // Mock file without BOM (starts with plain text) + vi.mocked(fs.open).mockImplementation( + async () => + ({ + read: async (buffer: Buffer, offset: number) => { + // Copy plain text bytes ("// ") + const plainText = Buffer.from([0x2f, 0x2f, 0x20]); + plainText.copy(buffer, offset); + return { bytesRead: 3 }; + }, + close: async () => {}, + }) as unknown as fs.FileHandle, + ); + + const result = await fileSystem.detectFileBOM('/test/file.txt'); + expect(result).toBe(false); + }); + + it('should return false for non-existent file', async () => { + vi.mocked(fs.open).mockRejectedValue(new Error('ENOENT')); + + const result = await fileSystem.detectFileBOM('/test/nonexistent.txt'); + expect(result).toBe(false); + }); + + it('should return false for empty file', async () => { + vi.mocked(fs.open).mockImplementation( + async () => + ({ + read: async () => ({ bytesRead: 0 }), + close: async () => {}, + }) as unknown as fs.FileHandle, + ); + + const result = await fileSystem.detectFileBOM('/test/empty.txt'); + expect(result).toBe(false); + }); }); }); diff --git a/packages/core/src/services/fileSystemService.ts b/packages/core/src/services/fileSystemService.ts index 67c910611..97a4d30b1 100644 --- a/packages/core/src/services/fileSystemService.ts +++ b/packages/core/src/services/fileSystemService.ts @@ -8,6 +8,19 @@ import fs from 'node:fs/promises'; import * as path from 'node:path'; import { globSync } from 'glob'; +/** + * Supported file encodings for new files. + */ +export const FileEncoding = { + UTF8: 'utf-8', + UTF8_BOM: 'utf-8-bom', +} as const; + +/** + * Type for file encoding values. + */ +export type FileEncodingType = (typeof FileEncoding)[keyof typeof FileEncoding]; + /** * Interface for file system operations that may be delegated to different implementations */ @@ -25,8 +38,21 @@ export interface FileSystemService { * * @param filePath - The path to the file to write * @param content - The content to write + * @param options - Optional write options including whether to add BOM */ - writeTextFile(filePath: string, content: string): Promise; + writeTextFile( + filePath: string, + content: string, + options?: WriteTextFileOptions, + ): Promise; + + /** + * Detects if a file has UTF-8 BOM (Byte Order Mark). + * + * @param filePath - The path to the file to check + * @returns True if the file has BOM, false otherwise + */ + detectFileBOM(filePath: string): Promise; /** * Finds files with a given name within specified search paths. @@ -38,6 +64,34 @@ export interface FileSystemService { findFiles(fileName: string, searchPaths: readonly string[]): string[]; } +/** + * Options for writing text files + */ +export interface WriteTextFileOptions { + /** + * Whether to write the file with UTF-8 BOM. + * If true, EF BB BF will be prepended to the content. + * @default false + */ + bom?: boolean; +} + +/** + * Detects if a buffer has UTF-8 BOM (Byte Order Mark). + * UTF-8 BOM is the byte sequence EF BB BF. + * + * @param buffer - The buffer to check + * @returns True if the buffer starts with UTF-8 BOM + */ +function hasUTF8BOM(buffer: Buffer): boolean { + return ( + buffer.length >= 3 && + buffer[0] === 0xef && + buffer[1] === 0xbb && + buffer[2] === 0xbf + ); +} + /** * Standard file system implementation */ @@ -46,8 +100,40 @@ export class StandardFileSystemService implements FileSystemService { return fs.readFile(filePath, 'utf-8'); } - async writeTextFile(filePath: string, content: string): Promise { - await fs.writeFile(filePath, content, 'utf-8'); + async writeTextFile( + filePath: string, + content: string, + options?: WriteTextFileOptions, + ): Promise { + const bom = options?.bom ?? false; + + if (bom) { + // Prepend UTF-8 BOM (EF BB BF) + const bomBuffer = Buffer.from([0xef, 0xbb, 0xbf]); + const contentBuffer = Buffer.from(content, 'utf-8'); + await fs.writeFile(filePath, Buffer.concat([bomBuffer, contentBuffer])); + } else { + await fs.writeFile(filePath, content, 'utf-8'); + } + } + + async detectFileBOM(filePath: string): Promise { + try { + // Read only the first 3 bytes to check for BOM + const fd = await fs.open(filePath, 'r'); + const buffer = Buffer.alloc(3); + const { bytesRead } = await fd.read(buffer, 0, 3, 0); + await fd.close(); + + if (bytesRead < 3) { + return false; + } + + return hasUTF8BOM(buffer); + } catch { + // File doesn't exist or can't be read - treat as no BOM + return false; + } } findFiles(fileName: string, searchPaths: readonly string[]): string[] { diff --git a/packages/core/src/tools/write-file.test.ts b/packages/core/src/tools/write-file.test.ts index 6b60c42b3..b0d7a2b0d 100644 --- a/packages/core/src/tools/write-file.test.ts +++ b/packages/core/src/tools/write-file.test.ts @@ -81,6 +81,7 @@ const mockConfigInternal = { registerTool: vi.fn(), discoverTools: vi.fn(), }) as unknown as ToolRegistry, + getDefaultFileEncoding: () => 'utf-8', }; const mockConfig = mockConfigInternal as unknown as Config; @@ -730,4 +731,129 @@ describe('WriteFileTool', () => { ); }); }); + + describe('BOM preservation (Issue #1672)', () => { + const abortSignal = new AbortController().signal; + + it('should preserve BOM when overwriting existing file with BOM', async () => { + const filePath = path.join(rootDir, 'bom_file.txt'); + const originalContent = 'original content'; + const newContent = 'new content'; + + // Create file with BOM + fs.writeFileSync( + filePath, + Buffer.concat([ + Buffer.from([0xef, 0xbb, 0xbf]), + Buffer.from(originalContent, 'utf-8'), + ]), + ); + + // Spy on writeTextFile to verify BOM option + const writeSpy = vi.spyOn(fsService, 'writeTextFile'); + + const params = { file_path: filePath, content: newContent }; + const invocation = tool.build(params); + await invocation.execute(abortSignal); + + // Verify writeTextFile was called with bom: true + expect(writeSpy).toHaveBeenCalledWith(filePath, newContent, { + bom: true, + }); + + // Cleanup + if (fs.existsSync(filePath)) { + fs.unlinkSync(filePath); + } + }); + + it('should not add BOM when overwriting existing file without BOM', async () => { + const filePath = path.join(rootDir, 'no_bom_file.txt'); + const originalContent = 'original content'; + const newContent = 'new content'; + + // Create file without BOM + fs.writeFileSync(filePath, originalContent, 'utf-8'); + + // Spy on writeTextFile to verify BOM option + const writeSpy = vi.spyOn(fsService, 'writeTextFile'); + + const params = { file_path: filePath, content: newContent }; + const invocation = tool.build(params); + await invocation.execute(abortSignal); + + // Verify writeTextFile was called with bom: false + expect(writeSpy).toHaveBeenCalledWith(filePath, newContent, { + bom: false, + }); + + // Cleanup + if (fs.existsSync(filePath)) { + fs.unlinkSync(filePath); + } + }); + + it('should use default encoding for new files', async () => { + const filePath = path.join(rootDir, 'new_file.txt'); + const newContent = 'new content'; + + // Ensure file does not exist + if (fs.existsSync(filePath)) { + fs.unlinkSync(filePath); + } + + // Spy on writeTextFile to verify BOM option + const writeSpy = vi.spyOn(fsService, 'writeTextFile'); + + const params = { file_path: filePath, content: newContent }; + const invocation = tool.build(params); + await invocation.execute(abortSignal); + + // Verify writeTextFile was called with bom: false (default is utf-8) + expect(writeSpy).toHaveBeenCalledWith(filePath, newContent, { + bom: false, + }); + + // Cleanup + if (fs.existsSync(filePath)) { + fs.unlinkSync(filePath); + } + }); + + it('should use BOM for new files when defaultFileEncoding is utf-8-bom', async () => { + const filePath = path.join(rootDir, 'new_file_bom.txt'); + const newContent = 'new content'; + + // Ensure file does not exist + if (fs.existsSync(filePath)) { + fs.unlinkSync(filePath); + } + + // Mock config to return utf-8-bom + const originalGetDefaultFileEncoding = + mockConfigInternal.getDefaultFileEncoding; + mockConfigInternal.getDefaultFileEncoding = () => 'utf-8-bom'; + + // Spy on writeTextFile to verify BOM option + const writeSpy = vi.spyOn(fsService, 'writeTextFile'); + + const params = { file_path: filePath, content: newContent }; + const invocation = tool.build(params); + await invocation.execute(abortSignal); + + // Verify writeTextFile was called with bom: true + expect(writeSpy).toHaveBeenCalledWith(filePath, newContent, { + bom: true, + }); + + // Restore mock + mockConfigInternal.getDefaultFileEncoding = + originalGetDefaultFileEncoding; + + // Cleanup + if (fs.existsSync(filePath)) { + fs.unlinkSync(filePath); + } + }); + }); }); diff --git a/packages/core/src/tools/write-file.ts b/packages/core/src/tools/write-file.ts index c13c95539..b3d524a92 100644 --- a/packages/core/src/tools/write-file.ts +++ b/packages/core/src/tools/write-file.ts @@ -24,6 +24,7 @@ import { ToolConfirmationOutcome, } from './tools.js'; import { ToolErrorType } from './tool-error.js'; +import { FileEncoding } from '../services/fileSystemService.js'; import { makeRelative, shortenPath } from '../utils/paths.js'; import { getErrorMessage, isNodeError } from '../utils/errors.js'; import { DEFAULT_DIFF_OPTIONS, getDiffStat } from './diffOptions.js'; @@ -235,9 +236,20 @@ class WriteFileToolInvocation extends BaseToolInvocation< fs.mkdirSync(dirName, { recursive: true }); } + // Check if file exists and has BOM to preserve encoding + // For new files, use the configured default encoding + let useBOM = false; + if (!isNewFile) { + useBOM = await this.config + .getFileSystemService() + .detectFileBOM(file_path); + } else { + useBOM = this.config.getDefaultFileEncoding() === FileEncoding.UTF8_BOM; + } + await this.config .getFileSystemService() - .writeTextFile(file_path, fileContent); + .writeTextFile(file_path, fileContent, { bom: useBOM }); // Generate diff for display result const fileName = path.basename(file_path);