fix(attribution): parse binary diffs, source generator from model, sync schema $version

Three follow-up review items from Copilot:

- parseDiffStat now handles git's binary-diff format (`path | Bin A ->
  B bytes`) using the byte delta with a floor of 1. Without this,
  binary edits arrived at the attribution payload as diffSize=0 and
  were silently dropped. Also extracted the parser to a top-level
  exported function so the binary path is unit-testable; added five
  targeted cases (text/binary/rename normalisation/summary skip).

- attachCommitAttribution now passes `this.config.getModel()` into
  generateNotePayload instead of the user-configurable
  `gitCoAuthor.name`. The note's `generator` field reflects which
  model produced the changes — and CommitAttributionService's
  sanitizeModelName() actually has the codename to scrub now.

- generate-settings-schema.ts imports SETTINGS_VERSION instead of
  hardcoding `default: 3`, so a future bump propagates to the emitted
  JSON schema in one place. Regenerated settings.schema.json bumps
  $version's default from 3 to 4 to match the V4 migration.
This commit is contained in:
wenshao 2026-05-01 13:31:03 +08:00
parent 98942178dc
commit c83479e2b5
4 changed files with 127 additions and 39 deletions

View file

@ -33,7 +33,7 @@ import * as fs from 'node:fs';
import * as os from 'node:os';
import * as crypto from 'node:crypto';
import { ToolErrorType } from './tool-error.js';
import { OUTPUT_UPDATE_INTERVAL_MS } from './shell.js';
import { OUTPUT_UPDATE_INTERVAL_MS, parseDiffStat } from './shell.js';
import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.js';
import { PermissionManager } from '../permissions/permission-manager.js';
import { CommitAttributionService } from '../services/commitAttribution.js';
@ -67,6 +67,7 @@ describe('ShellTool', () => {
getTruncateToolOutputLines: vi.fn().mockReturnValue(0),
getPermissionManager: vi.fn().mockReturnValue(undefined),
getGeminiClient: vi.fn(),
getModel: vi.fn().mockReturnValue('qwen3-coder-plus'),
getGitCoAuthor: vi.fn().mockReturnValue({
commit: true,
pr: true,
@ -1600,3 +1601,37 @@ describe('ShellTool', () => {
});
});
});
describe('parseDiffStat', () => {
it('parses text-diff lines as lines * 40', () => {
const out = ' src/main.ts | 5 ++---\n 1 file changed';
expect(parseDiffStat(out).get('src/main.ts')).toBe(200);
});
it('parses binary-diff lines as |new - old| with a floor of 1', () => {
const out =
' assets/logo.png | Bin 0 -> 1024 bytes\n' +
' assets/icon.png | Bin 1024 -> 1024 bytes';
const sizes = parseDiffStat(out);
expect(sizes.get('assets/logo.png')).toBe(1024);
// Same-size binary edit still lands in the map so attribution
// doesn't drop the file via diffSize=0.
expect(sizes.get('assets/icon.png')).toBe(1);
});
it('normalizes brace rename notation to the new path', () => {
const out = ' src/{old => new}/file.ts | 3 +++';
expect([...parseDiffStat(out).keys()]).toEqual(['src/new/file.ts']);
});
it('normalizes bare cross-directory rename to the new path', () => {
const out = ' old/dir/file.ts => new/dir/file.ts | 2 +-';
expect([...parseDiffStat(out).keys()]).toEqual(['new/dir/file.ts']);
});
it('skips the summary line', () => {
const out =
' src/a.ts | 1 +\n 2 files changed, 1 insertion(+), 1 deletion(-)';
expect(parseDiffStat(out).size).toBe(1);
});
});

View file

@ -86,6 +86,73 @@ function escapeForBashSingleQuote(s: string): string {
return s.replace(/'/g, "'\\''");
}
/**
* Parse `git diff --stat` output into a `path → approximate change size`
* map for attribution accounting. Approximate size is what ends up
* clamping `aiChars` in the attribution payload, so missing entries
* silently zero out a file's contribution meaning binary edits should
* land in the map too.
*
* Two line formats handled:
* - Text: ` path/to/file | 5 ++---` `lines * 40` chars
* - Binary: ` path/to/file | Bin 0 -> 123 b` `|new - old|` bytes (1)
*
* Rename notations (`{old => new}` and bare `old => new`) are normalized
* to the new path so lookups match `--name-only` output.
*
* Exported for unit testing the function is otherwise an implementation
* detail of `attachCommitAttribution`.
*/
export function parseDiffStat(statOutput: string): Map<string, number> {
const sizes = new Map<string, number>();
const lines = statOutput.split('\n').filter(Boolean);
const normalizeFilePath = (filePath: string): string => {
let p = filePath.trim();
// Brace rename: `{old => new}` or `dir/{old => new}/file`
p = p.replace(/\{[^}]*?=>\s*([^}]*)\}/g, '$1');
// Bare rename across directories: `old/path/file => new/path/file`
if (p.includes('=>')) {
const m = p.match(/^(.*?)\s=>\s(.*)$/);
if (m) p = m[2]!.trim();
}
return p;
};
for (const line of lines) {
// Skip summary line ("N files changed, X insertions(+), Y deletions(-)")
if (line.includes('file changed') || line.includes('files changed')) {
continue;
}
// Text diff: " path/to/file | 5 ++---"
const textMatch = line.match(/^\s*(.+?)\s+\|\s+(\d+)/);
if (textMatch) {
const filePath = normalizeFilePath(textMatch[1]!);
const changes = parseInt(textMatch[2]!, 10);
// Approximate chars: lines changed * avg 40 chars/line
sizes.set(filePath, changes * 40);
continue;
}
// Binary diff: " path/to/file | Bin 0 -> 123 bytes"
// Use the byte delta with a floor of 1 so binary-only changes still
// land in the map (otherwise they'd flow through as `diffSize=0` and
// silently drop out of attribution).
const binaryMatch = line.match(
/^\s*(.+?)\s+\|\s+Bin\s+(\d+)\s+->\s+(\d+)\s+bytes$/,
);
if (binaryMatch) {
const filePath = normalizeFilePath(binaryMatch[1]!);
const oldBytes = parseInt(binaryMatch[2]!, 10);
const newBytes = parseInt(binaryMatch[3]!, 10);
sizes.set(filePath, Math.max(1, Math.abs(newBytes - oldBytes)));
}
}
return sizes;
}
export const OUTPUT_UPDATE_INTERVAL_MS = 1000;
const DEFAULT_FOREGROUND_TIMEOUT_MS = 120000;
@ -921,12 +988,15 @@ export class ShellToolInvocation extends BaseToolInvocation<
// The commit already happened, so we diff HEAD~1..HEAD instead of --cached.
const stagedInfo = await this.getCommittedFileInfo(cwd);
const generatorName = gitCoAuthorSettings.name ?? 'Qwen-Coder';
// Pass the actual model name (e.g. `qwen3-coder-plus`) rather than the
// co-author display label so the note's `generator` field reflects
// which model produced the changes — and so generateNotePayload's
// sanitizeModelName() actually has the codename it's meant to scrub.
const baseDir = this.config.getTargetDir();
const note = attributionService.generateNotePayload(
stagedInfo,
baseDir,
generatorName,
this.config.getModel(),
);
const notesCommand = buildGitNotesCommand(note);
@ -1049,7 +1119,7 @@ export class ShellToolInvocation extends BaseToolInvocation<
}
// Get diff sizes from stat output
const diffSizes = this.parseDiffStat(statOutput);
const diffSizes = parseDiffStat(statOutput);
return { files, diffSizes, deletedFiles };
} catch {
@ -1061,38 +1131,19 @@ export class ShellToolInvocation extends BaseToolInvocation<
* Parse `git diff --stat` output to extract per-file change sizes.
* Estimates character count as (insertions + deletions) * 40 chars/line.
*/
private parseDiffStat(statOutput: string): Map<string, number> {
const sizes = new Map<string, number>();
const lines = statOutput.split('\n').filter(Boolean);
for (const line of lines) {
// Skip summary line ("N files changed, X insertions(+), Y deletions(-)")
if (line.includes('file changed') || line.includes('files changed')) {
continue;
}
// Format: " path/to/file | 5 ++---"
const match = line.match(/^\s*(.+?)\s+\|\s+(\d+)/);
if (match) {
let filePath = match[1]!.trim();
// Handle rename brace notation: "{old => new}" or "dir/{old => new}".
// Extract the new name so the key matches --name-only output.
filePath = filePath.replace(/\{[^}]*?=>\s*([^}]*)\}/g, '$1');
// Handle plain rename notation when the rename crosses directories:
// "old/path/file => new/path/file". Keep only the new path.
if (filePath.includes('=>')) {
const renameMatch = filePath.match(/^(.*?)\s=>\s(.*)$/);
if (renameMatch) {
filePath = renameMatch[2]!.trim();
}
}
const changes = parseInt(match[2]!, 10);
// Approximate chars: lines changed * avg 40 chars/line
sizes.set(filePath, changes * 40);
}
}
return sizes;
}
/**
* Parse `git diff --stat` output into a `path → approximate change size`
* map. Approximate size is what ends up clamping `aiChars` in the
* attribution payload, so missing entries silently zero out a file's
* contribution meaning binary edits should land in the map too.
*
* Two line formats handled:
* - Text: ` path/to/file | 5 ++---` `lines * 40` chars
* - Binary: ` path/to/file | Bin 0 -> 123 b` `|new - old|` bytes (1)
*
* Rename notations (`{old => new}` and bare `old => new`) are normalized
* to the new path so lookups match `--name-only` output.
*/
private addCoAuthorToGitCommit(command: string): string {
// Check if commit co-author feature is enabled

View file

@ -2048,7 +2048,7 @@
"$version": {
"type": "number",
"description": "Settings schema version for migration tracking.",
"default": 3
"default": 4
}
},
"additionalProperties": true

View file

@ -25,6 +25,7 @@ import type {
SettingsSchema,
} from '../packages/cli/src/config/settingsSchema.js';
import { getSettingsSchema } from '../packages/cli/src/config/settingsSchema.js';
import { SETTINGS_VERSION } from '../packages/cli/src/config/settings.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
@ -194,11 +195,12 @@ function generateJsonSchema(
);
}
// Add $version property
// Add $version property — sourced from settings.ts so a SETTINGS_VERSION
// bump propagates here instead of needing a parallel manual edit.
jsonSchema.properties!['$version'] = {
type: 'number',
description: 'Settings schema version for migration tracking.',
default: 3,
default: SETTINGS_VERSION,
};
return jsonSchema;