From 3c0e3293be1c8f3fc10b03507642912671d9d880 Mon Sep 17 00:00:00 2001 From: wenshao Date: Tue, 5 May 2026 22:52:32 +0800 Subject: [PATCH] fix(attribution): dedup snapshot writes, cap excludedGenerated, doc commit toggle scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rsf- (Copilot): recordAttributionSnapshot wrote a full snapshot to the JSONL on every non-retry turn, even when the tracked state was unchanged. Long-running sessions accumulated thousands of identical snapshot copies, inflating session size and slowing /resume hydrate. Dedup by JSON-equality with the prior write — first write always goes through, identical successors are no-ops. rsgo (Copilot): excludedGenerated path list was unbounded. A commit churning thousands of generated artifacts (large dist/ rebuild) could push the JSON note past MAX_NOTE_BYTES (30KB) and lose attribution for the real source files in the same commit. Cap the serialized sample at MAX_EXCLUDED_GENERATED_SAMPLE (50) and add excludedGeneratedCount for the true total. rsg9 + rshM (Copilot): the gitCoAuthor.commit description claimed the toggle only controlled the Co-authored-by trailer, but attachCommitAttribution also gates the per-file git-notes payload on the same flag. Update both the schema description and the settings.md table to mention both effects so disabling the option isn't a silent surprise. --- docs/users/configuration/settings.md | 22 +++++------ packages/cli/src/config/settingsSchema.ts | 2 +- .../src/services/attributionTrailer.test.ts | 2 + .../core/src/services/attributionTrailer.ts | 6 +-- .../src/services/chatRecordingService.test.ts | 39 +++++++++++++++++++ .../core/src/services/chatRecordingService.ts | 21 ++++++++++ .../core/src/services/commitAttribution.ts | 26 ++++++++++++- 7 files changed, 101 insertions(+), 17 deletions(-) diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md index 1d6ccc984..1cb700874 100644 --- a/docs/users/configuration/settings.md +++ b/docs/users/configuration/settings.md @@ -83,17 +83,17 @@ Settings are organized into categories. Most settings should be placed within th #### general -| Setting | Type | Description | Default | -| ------------------------------------------ | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | -| `general.preferredEditor` | string | The preferred editor to open files in. | `undefined` | -| `general.vimMode` | boolean | Enable Vim keybindings. | `false` | -| `general.enableAutoUpdate` | boolean | Enable automatic update checks and installations on startup. | `true` | -| `general.showSessionRecap` | boolean | Auto-show a one-line "where you left off" recap when returning to the terminal after being away. Off by default. Use `/recap` to trigger manually regardless of this setting. | `false` | -| `general.sessionRecapAwayThresholdMinutes` | number | Minutes the terminal must be blurred before an auto-recap fires on focus-in. Only used when `showSessionRecap` is enabled. | `5` | -| `general.gitCoAuthor.commit` | boolean | Automatically add a Co-authored-by trailer to git commit messages when commits are made through Qwen Code. | `true` | -| `general.gitCoAuthor.pr` | boolean | Append a Qwen Code attribution line to pull request descriptions when running `gh pr create`. | `true` | -| `general.checkpointing.enabled` | boolean | Enable session checkpointing for recovery. | `false` | -| `general.defaultFileEncoding` | string | Default encoding for new files. Use `"utf-8"` (default) for UTF-8 without BOM, or `"utf-8-bom"` for UTF-8 with BOM. Only change this if your project specifically requires BOM. | `"utf-8"` | +| Setting | Type | Description | Default | +| ------------------------------------------ | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | +| `general.preferredEditor` | string | The preferred editor to open files in. | `undefined` | +| `general.vimMode` | boolean | Enable Vim keybindings. | `false` | +| `general.enableAutoUpdate` | boolean | Enable automatic update checks and installations on startup. | `true` | +| `general.showSessionRecap` | boolean | Auto-show a one-line "where you left off" recap when returning to the terminal after being away. Off by default. Use `/recap` to trigger manually regardless of this setting. | `false` | +| `general.sessionRecapAwayThresholdMinutes` | number | Minutes the terminal must be blurred before an auto-recap fires on focus-in. Only used when `showSessionRecap` is enabled. | `5` | +| `general.gitCoAuthor.commit` | boolean | Add a Co-authored-by trailer to git commit messages AND attach a per-file AI-attribution git note (`refs/notes/ai-attribution`) for commits made through Qwen Code. Disabling skips both. | `true` | +| `general.gitCoAuthor.pr` | boolean | Append a Qwen Code attribution line to pull request descriptions when running `gh pr create`. | `true` | +| `general.checkpointing.enabled` | boolean | Enable session checkpointing for recovery. | `false` | +| `general.defaultFileEncoding` | string | Default encoding for new files. Use `"utf-8"` (default) for UTF-8 without BOM, or `"utf-8-bom"` for UTF-8 with BOM. Only change this if your project specifically requires BOM. | `"utf-8"` | #### output diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 9cc08c884..f7c1cbe40 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -395,7 +395,7 @@ const SETTINGS_SCHEMA = { requiresRestart: false, default: true, description: - 'Automatically add a Co-authored-by trailer to git commit messages when commits are made through Qwen Code.', + 'Add a Co-authored-by trailer to git commit messages AND attach a per-file AI-attribution git note (`refs/notes/ai-attribution`) for commits made through Qwen Code. Disabling skips both.', showInDialog: true, }, pr: { diff --git a/packages/core/src/services/attributionTrailer.test.ts b/packages/core/src/services/attributionTrailer.test.ts index 83ac83622..64dfafb36 100644 --- a/packages/core/src/services/attributionTrailer.test.ts +++ b/packages/core/src/services/attributionTrailer.test.ts @@ -28,6 +28,7 @@ const sampleNote: CommitAttributionNote = { }, surfaceBreakdown: { cli: { aiChars: 150, percent: 38 } }, excludedGenerated: ['package-lock.json'], + excludedGeneratedCount: 1, promptCount: 3, }; @@ -68,6 +69,7 @@ describe('attributionTrailer', () => { ...sampleNote, files: {}, excludedGenerated: [], + excludedGeneratedCount: 0, }; for (let i = 0; i < 2000; i++) { hugeNote.files[ diff --git a/packages/core/src/services/attributionTrailer.ts b/packages/core/src/services/attributionTrailer.ts index a242f4e35..a600c3a18 100644 --- a/packages/core/src/services/attributionTrailer.ts +++ b/packages/core/src/services/attributionTrailer.ts @@ -91,10 +91,8 @@ export function formatAttributionSummary(note: CommitAttributionNote): string { ); } - if (note.excludedGenerated.length > 0) { - lines.push( - ` Excluded generated: ${note.excludedGenerated.length} file(s)`, - ); + if (note.excludedGeneratedCount > 0) { + lines.push(` Excluded generated: ${note.excludedGeneratedCount} file(s)`); } return lines.join('\n'); diff --git a/packages/core/src/services/chatRecordingService.test.ts b/packages/core/src/services/chatRecordingService.test.ts index 15173c32b..c48a0668a 100644 --- a/packages/core/src/services/chatRecordingService.test.ts +++ b/packages/core/src/services/chatRecordingService.test.ts @@ -429,6 +429,45 @@ describe('ChatRecordingService', () => { }); }); + describe('recordAttributionSnapshot', () => { + const baseSnapshot = { + type: 'attribution-snapshot' as const, + version: 1, + surface: 'cli', + fileStates: {}, + baselines: {}, + promptCount: 0, + promptCountAtLastCommit: 0, + }; + + it('should write each distinct snapshot', async () => { + chatRecordingService.recordAttributionSnapshot(baseSnapshot); + chatRecordingService.recordAttributionSnapshot({ + ...baseSnapshot, + promptCount: 1, + }); + chatRecordingService.recordAttributionSnapshot({ + ...baseSnapshot, + promptCount: 2, + }); + await chatRecordingService.flush(); + expect(jsonl.writeLine).toHaveBeenCalledTimes(3); + }); + + // Sessions that touch many files emit a non-retry turn snapshot + // every prompt cycle. Without dedup, repeated identical snapshots + // (no edits, no prompt-counter change) would re-serialize the entire + // attribution state into the JSONL on every turn, inflating session + // size and slowing /resume. + it('should skip a snapshot identical to the previous write', async () => { + chatRecordingService.recordAttributionSnapshot(baseSnapshot); + chatRecordingService.recordAttributionSnapshot(baseSnapshot); + chatRecordingService.recordAttributionSnapshot(baseSnapshot); + await chatRecordingService.flush(); + expect(jsonl.writeLine).toHaveBeenCalledTimes(1); + }); + }); + // Note: Session management tests (listSessions, loadSession, deleteSession, etc.) // have been moved to sessionService.test.ts // Session resume integration tests should test via SessionService mock diff --git a/packages/core/src/services/chatRecordingService.ts b/packages/core/src/services/chatRecordingService.ts index 7e76e36aa..04c136d8c 100644 --- a/packages/core/src/services/chatRecordingService.ts +++ b/packages/core/src/services/chatRecordingService.ts @@ -351,6 +351,16 @@ export class ChatRecordingService { */ private autoTitleController: AbortController | undefined; + /** + * JSON-serialized form of the most recent attribution snapshot we + * wrote, used to deduplicate identical writes on every non-retry + * turn. Without this, sessions that touch many files would write a + * full duplicate of the entire snapshot to the JSONL on every turn, + * inflating the on-disk session and making `/resume` slower to + * hydrate. + */ + private lastAttributionSnapshotJson: string | undefined; + constructor(config: Config) { this.config = config; this.lastRecordUuid = @@ -969,9 +979,19 @@ export class ChatRecordingService { * Records an attribution state snapshot for session persistence. * Called at the start of every non-retry turn so that a resumed session * sees the most recent state including edits made during the prior turn. + * + * Deduplicates identical successive writes: if the snapshot's JSON + * form is byte-identical to the last one we wrote, skip the append. + * Without this, sessions that touch many files would write a full + * duplicate of the entire snapshot to the JSONL on every turn, even + * when nothing changed — inflating session size and slowing /resume. */ recordAttributionSnapshot(snapshot: AttributionSnapshot): void { try { + const json = JSON.stringify(snapshot); + if (json === this.lastAttributionSnapshotJson) { + return; + } const record: ChatRecord = { ...this.createBaseRecord('system'), type: 'system', @@ -980,6 +1000,7 @@ export class ChatRecordingService { }; this.appendRecord(record); + this.lastAttributionSnapshotJson = json; } catch (error) { debugLogger.error('Error saving attribution snapshot:', error); } diff --git a/packages/core/src/services/commitAttribution.ts b/packages/core/src/services/commitAttribution.ts index 7af5ee7dc..1a10702c1 100644 --- a/packages/core/src/services/commitAttribution.ts +++ b/packages/core/src/services/commitAttribution.ts @@ -83,10 +83,27 @@ export interface CommitAttributionNote { surfaces: string[]; }; surfaceBreakdown: Record; + /** + * Sample of generated/vendored files that were excluded from + * attribution. Capped at `MAX_EXCLUDED_GENERATED_SAMPLE` paths so a + * commit churning thousands of `dist/` artifacts can't blow past the + * 30 KB note budget and silently drop attribution for the real + * source files in the same commit. Use `excludedGeneratedCount` for + * the true total. + */ excludedGenerated: string[]; + /** Total count of excluded files (≥ excludedGenerated.length). */ + excludedGeneratedCount: number; promptCount: number; } +/** + * Upper bound on the number of excluded-generated paths we serialize + * into the git note. Keeps the JSON payload bounded for commits with + * lots of generated artifacts. + */ +export const MAX_EXCLUDED_GENERATED_SAMPLE = 50; + /** Result of running git commands to get staged file info. */ export interface StagedFileInfo { files: string[]; @@ -439,6 +456,7 @@ export class CommitAttributionService { const files: Record = {}; const excludedGenerated: string[] = []; + let excludedGeneratedCount = 0; const surfaceCounts: Record = {}; let totalAiChars = 0; let totalHumanChars = 0; @@ -463,7 +481,12 @@ export class CommitAttributionService { for (const relFile of stagedInfo.files) { if (isGeneratedFile(relFile)) { - excludedGenerated.push(relFile); + excludedGeneratedCount++; + // Cap the sample so a commit churning thousands of `dist/` + // artifacts can't blow past the 30 KB note budget. + if (excludedGenerated.length < MAX_EXCLUDED_GENERATED_SAMPLE) { + excludedGenerated.push(relFile); + } continue; } @@ -535,6 +558,7 @@ export class CommitAttributionService { }, surfaceBreakdown, excludedGenerated, + excludedGeneratedCount, promptCount: this.getPromptsSinceLastCommit(), }; }