mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-16 02:39:08 +00:00
fix(attribution): parse binary diffs, source generator from model, sync schema $version
Three follow-up review items from Copilot: - parseDiffStat now handles git's binary-diff format (`path | Bin A -> B bytes`) using the byte delta with a floor of 1. Without this, binary edits arrived at the attribution payload as diffSize=0 and were silently dropped. Also extracted the parser to a top-level exported function so the binary path is unit-testable; added five targeted cases (text/binary/rename normalisation/summary skip). - attachCommitAttribution now passes `this.config.getModel()` into generateNotePayload instead of the user-configurable `gitCoAuthor.name`. The note's `generator` field reflects which model produced the changes — and CommitAttributionService's sanitizeModelName() actually has the codename to scrub now. - generate-settings-schema.ts imports SETTINGS_VERSION instead of hardcoding `default: 3`, so a future bump propagates to the emitted JSON schema in one place. Regenerated settings.schema.json bumps $version's default from 3 to 4 to match the V4 migration.
This commit is contained in:
parent
98942178dc
commit
c83479e2b5
4 changed files with 127 additions and 39 deletions
|
|
@ -33,7 +33,7 @@ import * as fs from 'node:fs';
|
|||
import * as os from 'node:os';
|
||||
import * as crypto from 'node:crypto';
|
||||
import { ToolErrorType } from './tool-error.js';
|
||||
import { OUTPUT_UPDATE_INTERVAL_MS } from './shell.js';
|
||||
import { OUTPUT_UPDATE_INTERVAL_MS, parseDiffStat } from './shell.js';
|
||||
import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.js';
|
||||
import { PermissionManager } from '../permissions/permission-manager.js';
|
||||
import { CommitAttributionService } from '../services/commitAttribution.js';
|
||||
|
|
@ -67,6 +67,7 @@ describe('ShellTool', () => {
|
|||
getTruncateToolOutputLines: vi.fn().mockReturnValue(0),
|
||||
getPermissionManager: vi.fn().mockReturnValue(undefined),
|
||||
getGeminiClient: vi.fn(),
|
||||
getModel: vi.fn().mockReturnValue('qwen3-coder-plus'),
|
||||
getGitCoAuthor: vi.fn().mockReturnValue({
|
||||
commit: true,
|
||||
pr: true,
|
||||
|
|
@ -1600,3 +1601,37 @@ describe('ShellTool', () => {
|
|||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseDiffStat', () => {
|
||||
it('parses text-diff lines as lines * 40', () => {
|
||||
const out = ' src/main.ts | 5 ++---\n 1 file changed';
|
||||
expect(parseDiffStat(out).get('src/main.ts')).toBe(200);
|
||||
});
|
||||
|
||||
it('parses binary-diff lines as |new - old| with a floor of 1', () => {
|
||||
const out =
|
||||
' assets/logo.png | Bin 0 -> 1024 bytes\n' +
|
||||
' assets/icon.png | Bin 1024 -> 1024 bytes';
|
||||
const sizes = parseDiffStat(out);
|
||||
expect(sizes.get('assets/logo.png')).toBe(1024);
|
||||
// Same-size binary edit still lands in the map so attribution
|
||||
// doesn't drop the file via diffSize=0.
|
||||
expect(sizes.get('assets/icon.png')).toBe(1);
|
||||
});
|
||||
|
||||
it('normalizes brace rename notation to the new path', () => {
|
||||
const out = ' src/{old => new}/file.ts | 3 +++';
|
||||
expect([...parseDiffStat(out).keys()]).toEqual(['src/new/file.ts']);
|
||||
});
|
||||
|
||||
it('normalizes bare cross-directory rename to the new path', () => {
|
||||
const out = ' old/dir/file.ts => new/dir/file.ts | 2 +-';
|
||||
expect([...parseDiffStat(out).keys()]).toEqual(['new/dir/file.ts']);
|
||||
});
|
||||
|
||||
it('skips the summary line', () => {
|
||||
const out =
|
||||
' src/a.ts | 1 +\n 2 files changed, 1 insertion(+), 1 deletion(-)';
|
||||
expect(parseDiffStat(out).size).toBe(1);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -86,6 +86,73 @@ function escapeForBashSingleQuote(s: string): string {
|
|||
return s.replace(/'/g, "'\\''");
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse `git diff --stat` output into a `path → approximate change size`
|
||||
* map for attribution accounting. Approximate size is what ends up
|
||||
* clamping `aiChars` in the attribution payload, so missing entries
|
||||
* silently zero out a file's contribution — meaning binary edits should
|
||||
* land in the map too.
|
||||
*
|
||||
* Two line formats handled:
|
||||
* - Text: ` path/to/file | 5 ++---` → `lines * 40` chars
|
||||
* - Binary: ` path/to/file | Bin 0 -> 123 b` → `|new - old|` bytes (≥1)
|
||||
*
|
||||
* Rename notations (`{old => new}` and bare `old => new`) are normalized
|
||||
* to the new path so lookups match `--name-only` output.
|
||||
*
|
||||
* Exported for unit testing — the function is otherwise an implementation
|
||||
* detail of `attachCommitAttribution`.
|
||||
*/
|
||||
export function parseDiffStat(statOutput: string): Map<string, number> {
|
||||
const sizes = new Map<string, number>();
|
||||
const lines = statOutput.split('\n').filter(Boolean);
|
||||
|
||||
const normalizeFilePath = (filePath: string): string => {
|
||||
let p = filePath.trim();
|
||||
// Brace rename: `{old => new}` or `dir/{old => new}/file`
|
||||
p = p.replace(/\{[^}]*?=>\s*([^}]*)\}/g, '$1');
|
||||
// Bare rename across directories: `old/path/file => new/path/file`
|
||||
if (p.includes('=>')) {
|
||||
const m = p.match(/^(.*?)\s=>\s(.*)$/);
|
||||
if (m) p = m[2]!.trim();
|
||||
}
|
||||
return p;
|
||||
};
|
||||
|
||||
for (const line of lines) {
|
||||
// Skip summary line ("N files changed, X insertions(+), Y deletions(-)")
|
||||
if (line.includes('file changed') || line.includes('files changed')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Text diff: " path/to/file | 5 ++---"
|
||||
const textMatch = line.match(/^\s*(.+?)\s+\|\s+(\d+)/);
|
||||
if (textMatch) {
|
||||
const filePath = normalizeFilePath(textMatch[1]!);
|
||||
const changes = parseInt(textMatch[2]!, 10);
|
||||
// Approximate chars: lines changed * avg 40 chars/line
|
||||
sizes.set(filePath, changes * 40);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Binary diff: " path/to/file | Bin 0 -> 123 bytes"
|
||||
// Use the byte delta with a floor of 1 so binary-only changes still
|
||||
// land in the map (otherwise they'd flow through as `diffSize=0` and
|
||||
// silently drop out of attribution).
|
||||
const binaryMatch = line.match(
|
||||
/^\s*(.+?)\s+\|\s+Bin\s+(\d+)\s+->\s+(\d+)\s+bytes$/,
|
||||
);
|
||||
if (binaryMatch) {
|
||||
const filePath = normalizeFilePath(binaryMatch[1]!);
|
||||
const oldBytes = parseInt(binaryMatch[2]!, 10);
|
||||
const newBytes = parseInt(binaryMatch[3]!, 10);
|
||||
sizes.set(filePath, Math.max(1, Math.abs(newBytes - oldBytes)));
|
||||
}
|
||||
}
|
||||
|
||||
return sizes;
|
||||
}
|
||||
|
||||
export const OUTPUT_UPDATE_INTERVAL_MS = 1000;
|
||||
const DEFAULT_FOREGROUND_TIMEOUT_MS = 120000;
|
||||
|
||||
|
|
@ -921,12 +988,15 @@ export class ShellToolInvocation extends BaseToolInvocation<
|
|||
// The commit already happened, so we diff HEAD~1..HEAD instead of --cached.
|
||||
const stagedInfo = await this.getCommittedFileInfo(cwd);
|
||||
|
||||
const generatorName = gitCoAuthorSettings.name ?? 'Qwen-Coder';
|
||||
// Pass the actual model name (e.g. `qwen3-coder-plus`) rather than the
|
||||
// co-author display label so the note's `generator` field reflects
|
||||
// which model produced the changes — and so generateNotePayload's
|
||||
// sanitizeModelName() actually has the codename it's meant to scrub.
|
||||
const baseDir = this.config.getTargetDir();
|
||||
const note = attributionService.generateNotePayload(
|
||||
stagedInfo,
|
||||
baseDir,
|
||||
generatorName,
|
||||
this.config.getModel(),
|
||||
);
|
||||
const notesCommand = buildGitNotesCommand(note);
|
||||
|
||||
|
|
@ -1049,7 +1119,7 @@ export class ShellToolInvocation extends BaseToolInvocation<
|
|||
}
|
||||
|
||||
// Get diff sizes from stat output
|
||||
const diffSizes = this.parseDiffStat(statOutput);
|
||||
const diffSizes = parseDiffStat(statOutput);
|
||||
|
||||
return { files, diffSizes, deletedFiles };
|
||||
} catch {
|
||||
|
|
@ -1061,38 +1131,19 @@ export class ShellToolInvocation extends BaseToolInvocation<
|
|||
* Parse `git diff --stat` output to extract per-file change sizes.
|
||||
* Estimates character count as (insertions + deletions) * 40 chars/line.
|
||||
*/
|
||||
private parseDiffStat(statOutput: string): Map<string, number> {
|
||||
const sizes = new Map<string, number>();
|
||||
const lines = statOutput.split('\n').filter(Boolean);
|
||||
|
||||
for (const line of lines) {
|
||||
// Skip summary line ("N files changed, X insertions(+), Y deletions(-)")
|
||||
if (line.includes('file changed') || line.includes('files changed')) {
|
||||
continue;
|
||||
}
|
||||
// Format: " path/to/file | 5 ++---"
|
||||
const match = line.match(/^\s*(.+?)\s+\|\s+(\d+)/);
|
||||
if (match) {
|
||||
let filePath = match[1]!.trim();
|
||||
// Handle rename brace notation: "{old => new}" or "dir/{old => new}".
|
||||
// Extract the new name so the key matches --name-only output.
|
||||
filePath = filePath.replace(/\{[^}]*?=>\s*([^}]*)\}/g, '$1');
|
||||
// Handle plain rename notation when the rename crosses directories:
|
||||
// "old/path/file => new/path/file". Keep only the new path.
|
||||
if (filePath.includes('=>')) {
|
||||
const renameMatch = filePath.match(/^(.*?)\s=>\s(.*)$/);
|
||||
if (renameMatch) {
|
||||
filePath = renameMatch[2]!.trim();
|
||||
}
|
||||
}
|
||||
const changes = parseInt(match[2]!, 10);
|
||||
// Approximate chars: lines changed * avg 40 chars/line
|
||||
sizes.set(filePath, changes * 40);
|
||||
}
|
||||
}
|
||||
|
||||
return sizes;
|
||||
}
|
||||
/**
|
||||
* Parse `git diff --stat` output into a `path → approximate change size`
|
||||
* map. Approximate size is what ends up clamping `aiChars` in the
|
||||
* attribution payload, so missing entries silently zero out a file's
|
||||
* contribution — meaning binary edits should land in the map too.
|
||||
*
|
||||
* Two line formats handled:
|
||||
* - Text: ` path/to/file | 5 ++---` → `lines * 40` chars
|
||||
* - Binary: ` path/to/file | Bin 0 -> 123 b` → `|new - old|` bytes (≥1)
|
||||
*
|
||||
* Rename notations (`{old => new}` and bare `old => new`) are normalized
|
||||
* to the new path so lookups match `--name-only` output.
|
||||
*/
|
||||
|
||||
private addCoAuthorToGitCommit(command: string): string {
|
||||
// Check if commit co-author feature is enabled
|
||||
|
|
|
|||
|
|
@ -2048,7 +2048,7 @@
|
|||
"$version": {
|
||||
"type": "number",
|
||||
"description": "Settings schema version for migration tracking.",
|
||||
"default": 3
|
||||
"default": 4
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ import type {
|
|||
SettingsSchema,
|
||||
} from '../packages/cli/src/config/settingsSchema.js';
|
||||
import { getSettingsSchema } from '../packages/cli/src/config/settingsSchema.js';
|
||||
import { SETTINGS_VERSION } from '../packages/cli/src/config/settings.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
|
@ -194,11 +195,12 @@ function generateJsonSchema(
|
|||
);
|
||||
}
|
||||
|
||||
// Add $version property
|
||||
// Add $version property — sourced from settings.ts so a SETTINGS_VERSION
|
||||
// bump propagates here instead of needing a parallel manual edit.
|
||||
jsonSchema.properties!['$version'] = {
|
||||
type: 'number',
|
||||
description: 'Settings schema version for migration tracking.',
|
||||
default: 3,
|
||||
default: SETTINGS_VERSION,
|
||||
};
|
||||
|
||||
return jsonSchema;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue