fix(attribution): dedup snapshot writes, cap excludedGenerated, doc commit toggle scope

rsf- (Copilot): recordAttributionSnapshot wrote a full snapshot to
the JSONL on every non-retry turn, even when the tracked state was
unchanged. Long-running sessions accumulated thousands of identical
snapshot copies, inflating session size and slowing /resume hydrate.
Dedup by JSON-equality with the prior write — first write always
goes through, identical successors are no-ops.

rsgo (Copilot): excludedGenerated path list was unbounded. A commit
churning thousands of generated artifacts (large dist/ rebuild)
could push the JSON note past MAX_NOTE_BYTES (30KB) and lose
attribution for the real source files in the same commit. Cap the
serialized sample at MAX_EXCLUDED_GENERATED_SAMPLE (50) and add
excludedGeneratedCount for the true total.

rsg9 + rshM (Copilot): the gitCoAuthor.commit description claimed
the toggle only controlled the Co-authored-by trailer, but
attachCommitAttribution also gates the per-file git-notes payload
on the same flag. Update both the schema description and the
settings.md table to mention both effects so disabling the option
isn't a silent surprise.
This commit is contained in:
wenshao 2026-05-05 22:52:32 +08:00
parent 090758c5b1
commit 3c0e3293be
7 changed files with 101 additions and 17 deletions

View file

@ -83,17 +83,17 @@ Settings are organized into categories. Most settings should be placed within th
#### general
| Setting | Type | Description | Default |
| ------------------------------------------ | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- |
| `general.preferredEditor` | string | The preferred editor to open files in. | `undefined` |
| `general.vimMode` | boolean | Enable Vim keybindings. | `false` |
| `general.enableAutoUpdate` | boolean | Enable automatic update checks and installations on startup. | `true` |
| `general.showSessionRecap` | boolean | Auto-show a one-line "where you left off" recap when returning to the terminal after being away. Off by default. Use `/recap` to trigger manually regardless of this setting. | `false` |
| `general.sessionRecapAwayThresholdMinutes` | number | Minutes the terminal must be blurred before an auto-recap fires on focus-in. Only used when `showSessionRecap` is enabled. | `5` |
| `general.gitCoAuthor.commit` | boolean | Automatically add a Co-authored-by trailer to git commit messages when commits are made through Qwen Code. | `true` |
| `general.gitCoAuthor.pr` | boolean | Append a Qwen Code attribution line to pull request descriptions when running `gh pr create`. | `true` |
| `general.checkpointing.enabled` | boolean | Enable session checkpointing for recovery. | `false` |
| `general.defaultFileEncoding` | string | Default encoding for new files. Use `"utf-8"` (default) for UTF-8 without BOM, or `"utf-8-bom"` for UTF-8 with BOM. Only change this if your project specifically requires BOM. | `"utf-8"` |
| Setting | Type | Description | Default |
| ------------------------------------------ | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- |
| `general.preferredEditor` | string | The preferred editor to open files in. | `undefined` |
| `general.vimMode` | boolean | Enable Vim keybindings. | `false` |
| `general.enableAutoUpdate` | boolean | Enable automatic update checks and installations on startup. | `true` |
| `general.showSessionRecap` | boolean | Auto-show a one-line "where you left off" recap when returning to the terminal after being away. Off by default. Use `/recap` to trigger manually regardless of this setting. | `false` |
| `general.sessionRecapAwayThresholdMinutes` | number | Minutes the terminal must be blurred before an auto-recap fires on focus-in. Only used when `showSessionRecap` is enabled. | `5` |
| `general.gitCoAuthor.commit` | boolean | Add a Co-authored-by trailer to git commit messages AND attach a per-file AI-attribution git note (`refs/notes/ai-attribution`) for commits made through Qwen Code. Disabling skips both. | `true` |
| `general.gitCoAuthor.pr` | boolean | Append a Qwen Code attribution line to pull request descriptions when running `gh pr create`. | `true` |
| `general.checkpointing.enabled` | boolean | Enable session checkpointing for recovery. | `false` |
| `general.defaultFileEncoding` | string | Default encoding for new files. Use `"utf-8"` (default) for UTF-8 without BOM, or `"utf-8-bom"` for UTF-8 with BOM. Only change this if your project specifically requires BOM. | `"utf-8"` |
#### output

View file

@ -395,7 +395,7 @@ const SETTINGS_SCHEMA = {
requiresRestart: false,
default: true,
description:
'Automatically add a Co-authored-by trailer to git commit messages when commits are made through Qwen Code.',
'Add a Co-authored-by trailer to git commit messages AND attach a per-file AI-attribution git note (`refs/notes/ai-attribution`) for commits made through Qwen Code. Disabling skips both.',
showInDialog: true,
},
pr: {

View file

@ -28,6 +28,7 @@ const sampleNote: CommitAttributionNote = {
},
surfaceBreakdown: { cli: { aiChars: 150, percent: 38 } },
excludedGenerated: ['package-lock.json'],
excludedGeneratedCount: 1,
promptCount: 3,
};
@ -68,6 +69,7 @@ describe('attributionTrailer', () => {
...sampleNote,
files: {},
excludedGenerated: [],
excludedGeneratedCount: 0,
};
for (let i = 0; i < 2000; i++) {
hugeNote.files[

View file

@ -91,10 +91,8 @@ export function formatAttributionSummary(note: CommitAttributionNote): string {
);
}
if (note.excludedGenerated.length > 0) {
lines.push(
` Excluded generated: ${note.excludedGenerated.length} file(s)`,
);
if (note.excludedGeneratedCount > 0) {
lines.push(` Excluded generated: ${note.excludedGeneratedCount} file(s)`);
}
return lines.join('\n');

View file

@ -429,6 +429,45 @@ describe('ChatRecordingService', () => {
});
});
describe('recordAttributionSnapshot', () => {
const baseSnapshot = {
type: 'attribution-snapshot' as const,
version: 1,
surface: 'cli',
fileStates: {},
baselines: {},
promptCount: 0,
promptCountAtLastCommit: 0,
};
it('should write each distinct snapshot', async () => {
chatRecordingService.recordAttributionSnapshot(baseSnapshot);
chatRecordingService.recordAttributionSnapshot({
...baseSnapshot,
promptCount: 1,
});
chatRecordingService.recordAttributionSnapshot({
...baseSnapshot,
promptCount: 2,
});
await chatRecordingService.flush();
expect(jsonl.writeLine).toHaveBeenCalledTimes(3);
});
// Sessions that touch many files emit a non-retry turn snapshot
// every prompt cycle. Without dedup, repeated identical snapshots
// (no edits, no prompt-counter change) would re-serialize the entire
// attribution state into the JSONL on every turn, inflating session
// size and slowing /resume.
it('should skip a snapshot identical to the previous write', async () => {
chatRecordingService.recordAttributionSnapshot(baseSnapshot);
chatRecordingService.recordAttributionSnapshot(baseSnapshot);
chatRecordingService.recordAttributionSnapshot(baseSnapshot);
await chatRecordingService.flush();
expect(jsonl.writeLine).toHaveBeenCalledTimes(1);
});
});
// Note: Session management tests (listSessions, loadSession, deleteSession, etc.)
// have been moved to sessionService.test.ts
// Session resume integration tests should test via SessionService mock

View file

@ -351,6 +351,16 @@ export class ChatRecordingService {
*/
private autoTitleController: AbortController | undefined;
/**
* JSON-serialized form of the most recent attribution snapshot we
* wrote, used to deduplicate identical writes on every non-retry
* turn. Without this, sessions that touch many files would write a
* full duplicate of the entire snapshot to the JSONL on every turn,
* inflating the on-disk session and making `/resume` slower to
* hydrate.
*/
private lastAttributionSnapshotJson: string | undefined;
constructor(config: Config) {
this.config = config;
this.lastRecordUuid =
@ -969,9 +979,19 @@ export class ChatRecordingService {
* Records an attribution state snapshot for session persistence.
* Called at the start of every non-retry turn so that a resumed session
* sees the most recent state including edits made during the prior turn.
*
* Deduplicates identical successive writes: if the snapshot's JSON
* form is byte-identical to the last one we wrote, skip the append.
* Without this, sessions that touch many files would write a full
* duplicate of the entire snapshot to the JSONL on every turn, even
* when nothing changed inflating session size and slowing /resume.
*/
recordAttributionSnapshot(snapshot: AttributionSnapshot): void {
try {
const json = JSON.stringify(snapshot);
if (json === this.lastAttributionSnapshotJson) {
return;
}
const record: ChatRecord = {
...this.createBaseRecord('system'),
type: 'system',
@ -980,6 +1000,7 @@ export class ChatRecordingService {
};
this.appendRecord(record);
this.lastAttributionSnapshotJson = json;
} catch (error) {
debugLogger.error('Error saving attribution snapshot:', error);
}

View file

@ -83,10 +83,27 @@ export interface CommitAttributionNote {
surfaces: string[];
};
surfaceBreakdown: Record<string, { aiChars: number; percent: number }>;
/**
* Sample of generated/vendored files that were excluded from
* attribution. Capped at `MAX_EXCLUDED_GENERATED_SAMPLE` paths so a
* commit churning thousands of `dist/` artifacts can't blow past the
* 30 KB note budget and silently drop attribution for the real
* source files in the same commit. Use `excludedGeneratedCount` for
* the true total.
*/
excludedGenerated: string[];
/** Total count of excluded files (≥ excludedGenerated.length). */
excludedGeneratedCount: number;
promptCount: number;
}
/**
* Upper bound on the number of excluded-generated paths we serialize
* into the git note. Keeps the JSON payload bounded for commits with
* lots of generated artifacts.
*/
export const MAX_EXCLUDED_GENERATED_SAMPLE = 50;
/** Result of running git commands to get staged file info. */
export interface StagedFileInfo {
files: string[];
@ -439,6 +456,7 @@ export class CommitAttributionService {
const files: Record<string, FileAttributionDetail> = {};
const excludedGenerated: string[] = [];
let excludedGeneratedCount = 0;
const surfaceCounts: Record<string, number> = {};
let totalAiChars = 0;
let totalHumanChars = 0;
@ -463,7 +481,12 @@ export class CommitAttributionService {
for (const relFile of stagedInfo.files) {
if (isGeneratedFile(relFile)) {
excludedGenerated.push(relFile);
excludedGeneratedCount++;
// Cap the sample so a commit churning thousands of `dist/`
// artifacts can't blow past the 30 KB note budget.
if (excludedGenerated.length < MAX_EXCLUDED_GENERATED_SAMPLE) {
excludedGenerated.push(relFile);
}
continue;
}
@ -535,6 +558,7 @@ export class CommitAttributionService {
},
surfaceBreakdown,
excludedGenerated,
excludedGeneratedCount,
promptCount: this.getPromptsSinceLastCommit(),
};
}