diff --git a/docs/developers/tools/file-system.md b/docs/developers/tools/file-system.md index bfa6de8d0..bf449b44e 100644 --- a/docs/developers/tools/file-system.md +++ b/docs/developers/tools/file-system.md @@ -24,7 +24,7 @@ Qwen Code provides a comprehensive suite of tools for interacting with the local ## 2. `read_file` (ReadFile) -`read_file` reads and returns the content of a specified file. This tool handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges. Other binary file types are generally skipped. +`read_file` reads and returns the content of a specified file. This tool handles text and images (PNG, JPG, GIF, WEBP, SVG, BMP). For text files, it can read specific line ranges. PDF files are not supported directly - extract text externally first. Other binary file types are generally skipped. - **Tool name:** `read_file` - **Display name:** ReadFile @@ -35,11 +35,13 @@ Qwen Code provides a comprehensive suite of tools for interacting with the local - `limit` (number, optional): For text files, the maximum number of lines to read. If omitted, reads a default maximum (e.g., 2000 lines) or the entire file if feasible. - **Behavior:** - For text files: Returns the content. If `offset` and `limit` are used, returns only that slice of lines. Indicates if content was truncated due to line limits or line length limits. - - For image and PDF files: Returns the file content as a base64-encoded data structure suitable for model consumption. + - For image files: Returns the file content as a base64-encoded `inlineData` object suitable for model consumption. + - For PDF files: Returns an error message directing users to extract text externally. - For other binary files: Attempts to identify and skip them, returning a message indicating it's a generic binary file. - **Output:** (`llmContent`): - For text files: The file content, potentially prefixed with a truncation message (e.g., `[File content truncated: showing lines 1-100 of 500 total lines...]\nActual file content...`). - - For image/PDF files: An object containing `inlineData` with `mimeType` and base64 `data` (e.g., `{ inlineData: { mimeType: 'image/png', data: 'base64encodedstring' } }`). + - For image files: An object containing `inlineData` with `mimeType` and base64 `data` (e.g., `{ inlineData: { mimeType: 'image/png', data: 'base64encodedstring' } }`). + - For PDF files: An error message string explaining that PDFs are not supported. - For other binary files: A message like `Cannot display content of binary file: /path/to/data.bin`. - **Confirmation:** No. diff --git a/packages/core/src/utils/fileUtils.test.ts b/packages/core/src/utils/fileUtils.test.ts index da9f257fd..d695642b2 100644 --- a/packages/core/src/utils/fileUtils.test.ts +++ b/packages/core/src/utils/fileUtils.test.ts @@ -738,7 +738,7 @@ describe('fileUtils', () => { expect(result.returnDisplay).toContain('Read image file: image.png'); }); - it('should process a PDF file', async () => { + it('should reject PDF files with error message', async () => { const fakePdfData = Buffer.from('fake pdf data'); actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData); mockMimeGetType.mockReturnValue('application/pdf'); @@ -746,21 +746,10 @@ describe('fileUtils', () => { testPdfFilePath, mockConfig, ); - expect( - (result.llmContent as { inlineData: unknown }).inlineData, - ).toBeDefined(); - expect( - (result.llmContent as { inlineData: { mimeType: string } }).inlineData - .mimeType, - ).toBe('application/pdf'); - expect( - (result.llmContent as { inlineData: { data: string } }).inlineData.data, - ).toBe(fakePdfData.toString('base64')); - expect( - (result.llmContent as { inlineData: { displayName?: string } }) - .inlineData.displayName, - ).toBe('document.pdf'); - expect(result.returnDisplay).toContain('Read pdf file: document.pdf'); + expect(typeof result.llmContent).toBe('string'); + expect(result.llmContent).toContain('PDF files cannot be read directly'); + expect(result.returnDisplay).toContain('Skipped PDF file'); + expect(result.error).toContain('PDF files are not supported'); }); it('should read an SVG file as text when under 1MB', async () => { diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index 3e4124d18..5e42bc5f4 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -461,8 +461,7 @@ export async function processSingleFileContent( } case 'image': case 'audio': - case 'video': - case 'pdf': { + case 'video': { const contentBuffer = await fs.promises.readFile(filePath); const base64Data = contentBuffer.toString('base64'); return { @@ -476,6 +475,13 @@ export async function processSingleFileContent( returnDisplay: `Read ${fileType} file: ${relativePathForDisplay}`, }; } + case 'pdf': { + return { + llmContent: `PDF files cannot be read directly. Use an external tool to extract text from: ${relativePathForDisplay}`, + returnDisplay: `Skipped PDF file: ${relativePathForDisplay}`, + error: `PDF files are not supported. Extract text externally and paste it instead.`, + }; + } default: { // Should not happen with current detectFileType logic const exhaustiveCheck: never = fileType;