From c83479e2b51c03dc480fb4735a4e2bbb3ccb6135 Mon Sep 17 00:00:00 2001 From: wenshao Date: Fri, 1 May 2026 13:31:03 +0800 Subject: [PATCH] fix(attribution): parse binary diffs, source generator from model, sync schema $version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-up review items from Copilot: - parseDiffStat now handles git's binary-diff format (`path | Bin A -> B bytes`) using the byte delta with a floor of 1. Without this, binary edits arrived at the attribution payload as diffSize=0 and were silently dropped. Also extracted the parser to a top-level exported function so the binary path is unit-testable; added five targeted cases (text/binary/rename normalisation/summary skip). - attachCommitAttribution now passes `this.config.getModel()` into generateNotePayload instead of the user-configurable `gitCoAuthor.name`. The note's `generator` field reflects which model produced the changes — and CommitAttributionService's sanitizeModelName() actually has the codename to scrub now. - generate-settings-schema.ts imports SETTINGS_VERSION instead of hardcoding `default: 3`, so a future bump propagates to the emitted JSON schema in one place. Regenerated settings.schema.json bumps $version's default from 3 to 4 to match the V4 migration. --- packages/core/src/tools/shell.test.ts | 37 +++++- packages/core/src/tools/shell.ts | 121 +++++++++++++----- .../schemas/settings.schema.json | 2 +- scripts/generate-settings-schema.ts | 6 +- 4 files changed, 127 insertions(+), 39 deletions(-) diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts index 6d6705b41..15677fb73 100644 --- a/packages/core/src/tools/shell.test.ts +++ b/packages/core/src/tools/shell.test.ts @@ -33,7 +33,7 @@ import * as fs from 'node:fs'; import * as os from 'node:os'; import * as crypto from 'node:crypto'; import { ToolErrorType } from './tool-error.js'; -import { OUTPUT_UPDATE_INTERVAL_MS } from './shell.js'; +import { OUTPUT_UPDATE_INTERVAL_MS, parseDiffStat } from './shell.js'; import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.js'; import { PermissionManager } from '../permissions/permission-manager.js'; import { CommitAttributionService } from '../services/commitAttribution.js'; @@ -67,6 +67,7 @@ describe('ShellTool', () => { getTruncateToolOutputLines: vi.fn().mockReturnValue(0), getPermissionManager: vi.fn().mockReturnValue(undefined), getGeminiClient: vi.fn(), + getModel: vi.fn().mockReturnValue('qwen3-coder-plus'), getGitCoAuthor: vi.fn().mockReturnValue({ commit: true, pr: true, @@ -1600,3 +1601,37 @@ describe('ShellTool', () => { }); }); }); + +describe('parseDiffStat', () => { + it('parses text-diff lines as lines * 40', () => { + const out = ' src/main.ts | 5 ++---\n 1 file changed'; + expect(parseDiffStat(out).get('src/main.ts')).toBe(200); + }); + + it('parses binary-diff lines as |new - old| with a floor of 1', () => { + const out = + ' assets/logo.png | Bin 0 -> 1024 bytes\n' + + ' assets/icon.png | Bin 1024 -> 1024 bytes'; + const sizes = parseDiffStat(out); + expect(sizes.get('assets/logo.png')).toBe(1024); + // Same-size binary edit still lands in the map so attribution + // doesn't drop the file via diffSize=0. + expect(sizes.get('assets/icon.png')).toBe(1); + }); + + it('normalizes brace rename notation to the new path', () => { + const out = ' src/{old => new}/file.ts | 3 +++'; + expect([...parseDiffStat(out).keys()]).toEqual(['src/new/file.ts']); + }); + + it('normalizes bare cross-directory rename to the new path', () => { + const out = ' old/dir/file.ts => new/dir/file.ts | 2 +-'; + expect([...parseDiffStat(out).keys()]).toEqual(['new/dir/file.ts']); + }); + + it('skips the summary line', () => { + const out = + ' src/a.ts | 1 +\n 2 files changed, 1 insertion(+), 1 deletion(-)'; + expect(parseDiffStat(out).size).toBe(1); + }); +}); diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index 9360cb4ef..c84713e4c 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -86,6 +86,73 @@ function escapeForBashSingleQuote(s: string): string { return s.replace(/'/g, "'\\''"); } +/** + * Parse `git diff --stat` output into a `path → approximate change size` + * map for attribution accounting. Approximate size is what ends up + * clamping `aiChars` in the attribution payload, so missing entries + * silently zero out a file's contribution — meaning binary edits should + * land in the map too. + * + * Two line formats handled: + * - Text: ` path/to/file | 5 ++---` → `lines * 40` chars + * - Binary: ` path/to/file | Bin 0 -> 123 b` → `|new - old|` bytes (≥1) + * + * Rename notations (`{old => new}` and bare `old => new`) are normalized + * to the new path so lookups match `--name-only` output. + * + * Exported for unit testing — the function is otherwise an implementation + * detail of `attachCommitAttribution`. + */ +export function parseDiffStat(statOutput: string): Map { + const sizes = new Map(); + const lines = statOutput.split('\n').filter(Boolean); + + const normalizeFilePath = (filePath: string): string => { + let p = filePath.trim(); + // Brace rename: `{old => new}` or `dir/{old => new}/file` + p = p.replace(/\{[^}]*?=>\s*([^}]*)\}/g, '$1'); + // Bare rename across directories: `old/path/file => new/path/file` + if (p.includes('=>')) { + const m = p.match(/^(.*?)\s=>\s(.*)$/); + if (m) p = m[2]!.trim(); + } + return p; + }; + + for (const line of lines) { + // Skip summary line ("N files changed, X insertions(+), Y deletions(-)") + if (line.includes('file changed') || line.includes('files changed')) { + continue; + } + + // Text diff: " path/to/file | 5 ++---" + const textMatch = line.match(/^\s*(.+?)\s+\|\s+(\d+)/); + if (textMatch) { + const filePath = normalizeFilePath(textMatch[1]!); + const changes = parseInt(textMatch[2]!, 10); + // Approximate chars: lines changed * avg 40 chars/line + sizes.set(filePath, changes * 40); + continue; + } + + // Binary diff: " path/to/file | Bin 0 -> 123 bytes" + // Use the byte delta with a floor of 1 so binary-only changes still + // land in the map (otherwise they'd flow through as `diffSize=0` and + // silently drop out of attribution). + const binaryMatch = line.match( + /^\s*(.+?)\s+\|\s+Bin\s+(\d+)\s+->\s+(\d+)\s+bytes$/, + ); + if (binaryMatch) { + const filePath = normalizeFilePath(binaryMatch[1]!); + const oldBytes = parseInt(binaryMatch[2]!, 10); + const newBytes = parseInt(binaryMatch[3]!, 10); + sizes.set(filePath, Math.max(1, Math.abs(newBytes - oldBytes))); + } + } + + return sizes; +} + export const OUTPUT_UPDATE_INTERVAL_MS = 1000; const DEFAULT_FOREGROUND_TIMEOUT_MS = 120000; @@ -921,12 +988,15 @@ export class ShellToolInvocation extends BaseToolInvocation< // The commit already happened, so we diff HEAD~1..HEAD instead of --cached. const stagedInfo = await this.getCommittedFileInfo(cwd); - const generatorName = gitCoAuthorSettings.name ?? 'Qwen-Coder'; + // Pass the actual model name (e.g. `qwen3-coder-plus`) rather than the + // co-author display label so the note's `generator` field reflects + // which model produced the changes — and so generateNotePayload's + // sanitizeModelName() actually has the codename it's meant to scrub. const baseDir = this.config.getTargetDir(); const note = attributionService.generateNotePayload( stagedInfo, baseDir, - generatorName, + this.config.getModel(), ); const notesCommand = buildGitNotesCommand(note); @@ -1049,7 +1119,7 @@ export class ShellToolInvocation extends BaseToolInvocation< } // Get diff sizes from stat output - const diffSizes = this.parseDiffStat(statOutput); + const diffSizes = parseDiffStat(statOutput); return { files, diffSizes, deletedFiles }; } catch { @@ -1061,38 +1131,19 @@ export class ShellToolInvocation extends BaseToolInvocation< * Parse `git diff --stat` output to extract per-file change sizes. * Estimates character count as (insertions + deletions) * 40 chars/line. */ - private parseDiffStat(statOutput: string): Map { - const sizes = new Map(); - const lines = statOutput.split('\n').filter(Boolean); - - for (const line of lines) { - // Skip summary line ("N files changed, X insertions(+), Y deletions(-)") - if (line.includes('file changed') || line.includes('files changed')) { - continue; - } - // Format: " path/to/file | 5 ++---" - const match = line.match(/^\s*(.+?)\s+\|\s+(\d+)/); - if (match) { - let filePath = match[1]!.trim(); - // Handle rename brace notation: "{old => new}" or "dir/{old => new}". - // Extract the new name so the key matches --name-only output. - filePath = filePath.replace(/\{[^}]*?=>\s*([^}]*)\}/g, '$1'); - // Handle plain rename notation when the rename crosses directories: - // "old/path/file => new/path/file". Keep only the new path. - if (filePath.includes('=>')) { - const renameMatch = filePath.match(/^(.*?)\s=>\s(.*)$/); - if (renameMatch) { - filePath = renameMatch[2]!.trim(); - } - } - const changes = parseInt(match[2]!, 10); - // Approximate chars: lines changed * avg 40 chars/line - sizes.set(filePath, changes * 40); - } - } - - return sizes; - } + /** + * Parse `git diff --stat` output into a `path → approximate change size` + * map. Approximate size is what ends up clamping `aiChars` in the + * attribution payload, so missing entries silently zero out a file's + * contribution — meaning binary edits should land in the map too. + * + * Two line formats handled: + * - Text: ` path/to/file | 5 ++---` → `lines * 40` chars + * - Binary: ` path/to/file | Bin 0 -> 123 b` → `|new - old|` bytes (≥1) + * + * Rename notations (`{old => new}` and bare `old => new`) are normalized + * to the new path so lookups match `--name-only` output. + */ private addCoAuthorToGitCommit(command: string): string { // Check if commit co-author feature is enabled diff --git a/packages/vscode-ide-companion/schemas/settings.schema.json b/packages/vscode-ide-companion/schemas/settings.schema.json index 690f92478..a9773db6b 100644 --- a/packages/vscode-ide-companion/schemas/settings.schema.json +++ b/packages/vscode-ide-companion/schemas/settings.schema.json @@ -2048,7 +2048,7 @@ "$version": { "type": "number", "description": "Settings schema version for migration tracking.", - "default": 3 + "default": 4 } }, "additionalProperties": true diff --git a/scripts/generate-settings-schema.ts b/scripts/generate-settings-schema.ts index 3f22ade20..3d21a065a 100644 --- a/scripts/generate-settings-schema.ts +++ b/scripts/generate-settings-schema.ts @@ -25,6 +25,7 @@ import type { SettingsSchema, } from '../packages/cli/src/config/settingsSchema.js'; import { getSettingsSchema } from '../packages/cli/src/config/settingsSchema.js'; +import { SETTINGS_VERSION } from '../packages/cli/src/config/settings.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); @@ -194,11 +195,12 @@ function generateJsonSchema( ); } - // Add $version property + // Add $version property — sourced from settings.ts so a SETTINGS_VERSION + // bump propagates here instead of needing a parallel manual edit. jsonSchema.properties!['$version'] = { type: 'number', description: 'Settings schema version for migration tracking.', - default: 3, + default: SETTINGS_VERSION, }; return jsonSchema;