fix(core): cap recursive file crawler at 100k entries to prevent OOM (#3138)

When the @ autocomplete triggers RecursiveFileSearch, the crawler materialises the entire project tree into memory with no upper bound. For very large workspaces (missing .gitignore, huge node_modules, home directory as cwd) this pushes Node.js past its heap limit and crashes. - Add `maxFiles` option to CrawlOptions; use fdir's withMaxFiles() to stop traversal early instead of post-hoc truncation - Apply file-level ignore patterns during crawl via fdir filter() so ignored files don't consume the maxFiles budget - Include maxFiles in the crawl cache key for correctness - Set MAX_CRAWL_FILES = 100 000 in RecursiveFileSearch (caps peak memory at ~50 MB for the file list) Fixes #3130
2026-05-05 15:31:27 +00:00 · 2026-04-11 16:44:02 +08:00 · 2026-04-11 16:44:02 +08:00 · e216ab35fc
commit e216ab35fc
parent 61ad9db9c1
4 changed files with 137 additions and 3 deletions
--- a/packages/core/src/utils/filesearch/crawler.test.ts
+++ b/packages/core/src/utils/filesearch/crawler.test.ts
@ -178,8 +178,10 @@ describe('crawler', () => {
    });

    expect(results).toEqual(
-      expect.arrayContaining(['.', '.gitignore', 'Foo.mk', 'bar.mk']),
+      expect.arrayContaining(['.', '.gitignore', 'Foo.mk']),
    );
+    // bar.mk matches *.mk and is not negated, so it should be filtered out
+    expect(results).not.toContain('bar.mk');
  });

  it('should handle directory negation with glob', async () => {
@ -571,4 +573,106 @@ describe('crawler', () => {
      );
    });
  });
+
+  describe('with maxFiles', () => {
+    it('should truncate results when maxFiles is exceeded', async () => {
+      tmpDir = await createTmpDir({
+        'a.txt': '',
+        'b.txt': '',
+        'c.txt': '',
+        sub: ['d.txt', 'e.txt'],
+      });
+
+      const ignore = loadIgnoreRules({
+        projectRoot: tmpDir,
+        useGitignore: false,
+        useQwenignore: false,
+        ignoreDirs: [],
+      });
+
+      const allResults = await crawl({
+        crawlDirectory: tmpDir,
+        cwd: tmpDir,
+        ignore,
+        cache: false,
+        cacheTtl: 0,
+      });
+
+      const limitedResults = await crawl({
+        crawlDirectory: tmpDir,
+        cwd: tmpDir,
+        ignore,
+        cache: false,
+        cacheTtl: 0,
+        maxFiles: 3,
+      });
+
+      expect(allResults.length).toBeGreaterThan(3);
+      expect(limitedResults.length).toBe(3);
+    });
+
+    it('should not count file-ignored entries toward maxFiles budget', async () => {
+      tmpDir = await createTmpDir({
+        '.gitignore': '*.log',
+        'a.txt': '',
+        'b.txt': '',
+        'noise1.log': '',
+        'noise2.log': '',
+        'noise3.log': '',
+      });
+
+      const ignore = loadIgnoreRules({
+        projectRoot: tmpDir,
+        useGitignore: true,
+        useQwenignore: false,
+        ignoreDirs: [],
+      });
+
+      // Valid entries: '.', '.gitignore', 'a.txt', 'b.txt' = 4
+      // Ignored entries: 'noise1.log', 'noise2.log', 'noise3.log'
+      // With maxFiles=4, all valid entries should fit because
+      // .log files are filtered out before the cap is applied.
+      const results = await crawl({
+        crawlDirectory: tmpDir,
+        cwd: tmpDir,
+        ignore,
+        cache: false,
+        cacheTtl: 0,
+        maxFiles: 4,
+      });
+
+      expect(results).toEqual(
+        expect.arrayContaining(['.', '.gitignore', 'a.txt', 'b.txt']),
+      );
+      for (const r of results) {
+        expect(r).not.toMatch(/\.log$/);
+      }
+    });
+
+    it('should not truncate when maxFiles exceeds total entries', async () => {
+      tmpDir = await createTmpDir({
+        'a.txt': '',
+        'b.txt': '',
+      });
+
+      const ignore = loadIgnoreRules({
+        projectRoot: tmpDir,
+        useGitignore: false,
+        useQwenignore: false,
+        ignoreDirs: [],
+      });
+
+      const results = await crawl({
+        crawlDirectory: tmpDir,
+        cwd: tmpDir,
+        ignore,
+        cache: false,
+        cacheTtl: 0,
+        maxFiles: 1000,
+      });
+
+      expect(results.length).toBeLessThanOrEqual(1000);
+      expect(results).toEqual(expect.arrayContaining(['.', 'a.txt', 'b.txt']));
+    });
+  });
 });