From 66cef3bb5ffcbb2f40b19fdcbee0dff8de2c33a9 Mon Sep 17 00:00:00 2001 From: wenshao Date: Sat, 2 May 2026 15:07:18 +0800 Subject: [PATCH] fix(attribution): --amend, --message/-b aliases, .d.ts over-exclusion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four Copilot follow-ups, three of them user-visible coverage gaps: - `git commit --amend` was diffing `HEAD~1..HEAD` for attribution, which spans the entire amended commit (parent → amended) rather than the actual amend delta. A message-only amend would emit a note attributing every file in the original commit to this amend. New `isAmendCommit` helper detects the flag and getCommittedFileInfo switches to `HEAD@{1}..HEAD` (the pre-amend HEAD vs the amended HEAD); if the reflog is GC'd we bail with a warning rather than over-attribute. - `git commit --message "..."` and `--message="..."` were silently skipped because the regex only recognised the short `-m` form. The flag prefix now matches both alternatives via `(?:-[a-zA-Z]*m|--message)\s*=?\s*` (non-capturing inner group so the existing `[full, prefix, body]` destructure still works). - `gh pr create -b "..."` (the short alias for `--body`) was the same gap on the PR side; `(?:--body|-b)[\s=]+` now covers both forms. - `.d.ts` was an over-broad blanket exclusion in EXCLUDED_EXTENSIONS — declaration files are commonly authored (ambient declarations, asset shims like `*.d.ts` for `import './x.svg'`); the repo even contains `packages/vscode-ide-companion/src/assets.d.ts`. Removed `.d.ts` from the extensions Set and adjusted the test to assert the new behavior. Auto-generated `.d.ts` (e.g. `tsc --declaration` output) still gets caught by the build-directory rules. Tests added: `--amend` plumbing covered by the new branch in getCommittedFileInfo (no targeted unit test — the diff invocation goes through ShellExecutionService and is exercised by the existing post-command path); `--message`/`--message="..."`/-b/-b="..."` all have positive trailer-injection assertions; `.d.ts` test split into "hand-authored" (negative) and "in dist" (positive). --- .../core/src/services/generatedFiles.test.ts | 17 ++- packages/core/src/services/generatedFiles.ts | 10 +- packages/core/src/tools/shell.test.ts | 89 +++++++++++++ packages/core/src/tools/shell.ts | 125 ++++++++++++++---- 4 files changed, 214 insertions(+), 27 deletions(-) diff --git a/packages/core/src/services/generatedFiles.test.ts b/packages/core/src/services/generatedFiles.test.ts index d7692d39e..f1d245039 100644 --- a/packages/core/src/services/generatedFiles.test.ts +++ b/packages/core/src/services/generatedFiles.test.ts @@ -27,8 +27,21 @@ describe('isGeneratedFile', () => { expect(isGeneratedFile('src/.next/cache.js')).toBe(true); }); - it('should exclude TypeScript declaration files', () => { - expect(isGeneratedFile('types/index.d.ts')).toBe(true); + // `.d.ts` files are commonly authored by hand (declaration files + // for projects without TS sources, ambient module declarations, + // asset shims like `*.d.ts` for `import './x.svg'`); the prior + // blanket exclusion silently dropped AI edits to those files. + // Auto-generated `.d.ts` (e.g. `tsc --declaration` output) tends + // to live under `/dist/` etc. and is still excluded by the + // directory rules. + it('should NOT exclude hand-authored TypeScript declaration files', () => { + expect(isGeneratedFile('types/index.d.ts')).toBe(false); + expect(isGeneratedFile('src/assets.d.ts')).toBe(false); + }); + + it('should still exclude .d.ts emitted into build directories', () => { + expect(isGeneratedFile('dist/index.d.ts')).toBe(true); + expect(isGeneratedFile('build/types.d.ts')).toBe(true); }); it('should exclude generated code files', () => { diff --git a/packages/core/src/services/generatedFiles.ts b/packages/core/src/services/generatedFiles.ts index c6fdb17ca..dd20f36bf 100644 --- a/packages/core/src/services/generatedFiles.ts +++ b/packages/core/src/services/generatedFiles.ts @@ -27,7 +27,14 @@ const EXCLUDED_FILENAMES = new Set([ 'npm-shrinkwrap.json', ]); -// File extension patterns (case-insensitive) +// File extension patterns (case-insensitive). Note: `.d.ts` is NOT +// listed here — `.d.ts` files are commonly authored by hand +// (declaration files for projects without TS sources, ambient module +// declarations, asset shims like `*.d.ts` for `import './x.svg'`), +// and treating every one as generated would silently drop AI edits +// to those files. Auto-generated `.d.ts` (e.g. `tsc --declaration` +// output) tends to live under `/dist/`, `/build/`, or `/out/`, +// which are already covered by `EXCLUDED_DIRECTORIES`. const EXCLUDED_EXTENSIONS = new Set([ '.lock', '.min.js', @@ -37,7 +44,6 @@ const EXCLUDED_EXTENSIONS = new Set([ '.bundle.css', '.generated.ts', '.generated.js', - '.d.ts', ]); // Directory patterns that indicate generated/vendored content diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts index ebc9ea033..47d67b5fa 100644 --- a/packages/core/src/tools/shell.test.ts +++ b/packages/core/src/tools/shell.test.ts @@ -1414,6 +1414,65 @@ describe('ShellTool', () => { expect(observed).not.toContain('Co-authored-by:'); }); + // `--message` is git's documented long alias for `-m`. Without + // explicit handling the trailer would be silently skipped on + // commits that use the long form. + it('should add co-author for git commit --message "..."', async () => { + const command = 'git commit --message "Test commit"'; + const invocation = shellTool.build({ command, is_background: false }); + const promise = invocation.execute(mockAbortSignal); + + resolveExecutionPromise({ + rawOutput: Buffer.from(''), + output: '', + exitCode: 0, + signal: null, + error: null, + aborted: false, + pid: 12345, + executionMethod: 'child_process', + }); + + await promise; + + expect(mockShellExecutionService).toHaveBeenCalledWith( + expect.stringContaining('Co-authored-by:'), + expect.any(String), + expect.any(Function), + expect.any(AbortSignal), + false, + {}, + ); + }); + + it('should add co-author for git commit --message="..."', async () => { + const command = 'git commit --message="Test commit"'; + const invocation = shellTool.build({ command, is_background: false }); + const promise = invocation.execute(mockAbortSignal); + + resolveExecutionPromise({ + rawOutput: Buffer.from(''), + output: '', + exitCode: 0, + signal: null, + error: null, + aborted: false, + pid: 12345, + executionMethod: 'child_process', + }); + + await promise; + + expect(mockShellExecutionService).toHaveBeenCalledWith( + expect.stringContaining('Co-authored-by:'), + expect.any(String), + expect.any(Function), + expect.any(AbortSignal), + false, + {}, + ); + }); + it('should add co-author when git commit is prefixed with sudo', async () => { const command = 'sudo git commit -m "Test"'; const invocation = shellTool.build({ command, is_background: false }); @@ -1661,6 +1720,36 @@ describe('ShellTool', () => { expect(observed).not.toContain('Generated with Qwen Code'); }); + // `-b` is gh's documented short alias for `--body`. Without + // explicit handling the rewrite would silently miss it. + it('should append attribution to gh pr create -b "..." (short form)', async () => { + const command = 'gh pr create --title "x" -b "Summary"'; + const invocation = shellTool.build({ command, is_background: false }); + const promise = invocation.execute(mockAbortSignal); + + resolveExecutionPromise({ + rawOutput: Buffer.from(''), + output: '', + exitCode: 0, + signal: null, + error: null, + aborted: false, + pid: 12345, + executionMethod: 'child_process', + }); + + await promise; + + expect(mockShellExecutionService).toHaveBeenCalledWith( + expect.stringContaining('Generated with Qwen Code'), + expect.any(String), + expect.any(Function), + expect.any(AbortSignal), + false, + {}, + ); + }); + it('should append attribution to gh pr create --body when pr enabled', async () => { const command = 'gh pr create --title "x" --body "Summary"'; const invocation = shellTool.build({ command, is_background: false }); diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index cfbfc51a1..1e7cd4792 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -323,6 +323,29 @@ function parseGhInvocation(tokens: string[]): string[] { return []; } +/** + * Detect whether the attributable `git commit` invocation in + * `command` carries the `--amend` flag. Used so attachCommitAttribution + * can switch the diff range from `HEAD~1..HEAD` (the amended commit + * vs its parent — too broad for amend) to `HEAD@{1}..HEAD` (the + * actual amend delta). + */ +function isAmendCommit(command: string): boolean { + for (const sub of splitCommands(command)) { + const tokens = tokeniseSegment(sub); + if (!tokens || tokens[0] !== 'git') continue; + const { subcommand } = parseGitInvocation(tokens); + if (subcommand !== 'commit') continue; + if ( + tokens.includes('--amend') || + tokens.some((t) => t.startsWith('--amend=')) + ) { + return true; + } + } + return false; +} + /** * Locate the character range of the *first* attributable * `git commit` invocation in the (potentially compound) command, or @@ -993,7 +1016,13 @@ export class ShellToolInvocation extends BaseToolInvocation< // movement, so it's a no-op when no commit was actually created. if (commitCtx.attributableInCwd) { const preHead = await preHeadPromise; - await this.attachCommitAttribution(cwd, preHead); + // `git commit --amend` rewrites HEAD in place, so the diff + // `HEAD~1..HEAD` would span the entire amended commit (parent → + // amended), not just what this amend changed. Detect the flag + // so getCommittedFileInfo can switch to `HEAD@{1}..HEAD` and + // attribute only the actual amend delta. + const isAmend = isAmendCommit(strippedCommand); + await this.attachCommitAttribution(cwd, preHead, isAmend); } else if (commitCtx.hasCommit) { // A `cd subdir && git commit` (or `git -C ... commit`) ran a // commit we can't attribute, but our cwd's HEAD may still have @@ -1345,6 +1374,7 @@ export class ShellToolInvocation extends BaseToolInvocation< private async attachCommitAttribution( cwd: string, preHead: string | null, + isAmend: boolean, ): Promise { // Caller (`execute`) gates this with `commitCtx.attributableInCwd`, // so we don't re-parse the command here. Re-parsing would be dead @@ -1416,7 +1446,7 @@ export class ShellToolInvocation extends BaseToolInvocation< try { // Analyze the just-committed files by diffing HEAD against its parent. // The commit already happened, so we diff HEAD~1..HEAD instead of --cached. - const stagedInfo = await this.getCommittedFileInfo(cwd); + const stagedInfo = await this.getCommittedFileInfo(cwd, isAmend); // Pass the actual model name (e.g. `qwen3-coder-plus`) rather than the // co-author display label so the note's `generator` field reflects @@ -1522,7 +1552,10 @@ export class ShellToolInvocation extends BaseToolInvocation< * Get information about files in the most recent commit by diffing * HEAD against its parent (HEAD~1). Falls back to empty info on error. */ - private async getCommittedFileInfo(cwd: string): Promise { + private async getCommittedFileInfo( + cwd: string, + isAmend: boolean, + ): Promise { const empty: StagedFileInfo = { files: [], diffSizes: new Map(), @@ -1561,19 +1594,46 @@ export class ShellToolInvocation extends BaseToolInvocation< // attribution for any file outside it. const repoRoot = repoRootOutput.trim(); - // For the initial commit (no parent), use diff-tree --root - // since `git diff --root` isn't valid for porcelain diff. - const diffArgs = hasParent - ? { - name: 'diff --name-only HEAD~1 HEAD', - status: 'diff --name-status HEAD~1 HEAD', - numstat: 'diff --numstat HEAD~1 HEAD', - } - : { - name: 'diff-tree --root --no-commit-id -r --name-only HEAD', - status: 'diff-tree --root --no-commit-id -r --name-status HEAD', - numstat: 'diff-tree --root --no-commit-id -r --numstat HEAD', - }; + // Choose the diff range: + // - amend: `HEAD@{1}..HEAD` — the actual amend delta. The + // pre-amend HEAD is in the reflog and points at the original + // commit; diffing against the *amended* HEAD captures only + // what changed in this amend operation, not the entire commit + // contents (which `HEAD~1..HEAD` would falsely include). + // - has parent: `HEAD~1..HEAD` — standard parent diff. + // - root commit: `diff-tree --root` against the empty tree. + let diffArgs: { name: string; status: string; numstat: string }; + if (isAmend) { + // Verify HEAD@{1} actually exists; reflogs can be GC'd. + const hasReflog = + (await runGit('rev-parse --verify HEAD@{1}')).length > 0; + if (!hasReflog) { + // Without a pre-amend snapshot we can't compute the amend + // delta; emitting `HEAD~1..HEAD` would over-attribute. + debugLogger.warn( + 'getCommittedFileInfo: --amend with empty reflog; skipping ' + + 'attribution note (cannot determine amend delta).', + ); + return empty; + } + diffArgs = { + name: 'diff --name-only HEAD@{1} HEAD', + status: 'diff --name-status HEAD@{1} HEAD', + numstat: 'diff --numstat HEAD@{1} HEAD', + }; + } else if (hasParent) { + diffArgs = { + name: 'diff --name-only HEAD~1 HEAD', + status: 'diff --name-status HEAD~1 HEAD', + numstat: 'diff --numstat HEAD~1 HEAD', + }; + } else { + diffArgs = { + name: 'diff-tree --root --no-commit-id -r --name-only HEAD', + status: 'diff-tree --root --no-commit-id -r --name-status HEAD', + numstat: 'diff-tree --root --no-commit-id -r --numstat HEAD', + }; + } const [nameOutput, statusOutput, numstatOutput] = await Promise.all([ runGit(diffArgs.name), runGit(diffArgs.status), @@ -1670,14 +1730,25 @@ export class ShellToolInvocation extends BaseToolInvocation< // [^"\\] matches any char except double-quote and backslash // \\. matches escape sequences like \" or \\ // (?:...|...)* matches normal chars or escapes, repeated - const doubleQuotePattern = /(-[a-zA-Z]*m\s*)"((?:[^"\\]|\\.)*)"/g; + // Match both the short form (`-m`, `-am`, combined short flags) + // and git's long alias `--message` (with optional `=` separator: + // `--message="..."`). Inner alternation is non-capturing so the + // existing `[full, prefix, body]` destructure still applies. + const FLAG_PREFIX = `(?:-[a-zA-Z]*m|--message)\\s*=?\\s*`; + const doubleQuotePattern = new RegExp( + `(${FLAG_PREFIX})"((?:[^"\\\\]|\\\\.)*)"`, + 'g', + ); // Bash single quotes can't be escaped, so apostrophes inside a // single-quoted message use the close-escape-reopen form `'\''` // (e.g. `git commit -m 'don'\''t'`). The negative lookahead leaves // those alone — rewriting them correctly needs a real shell parser // and a wrong rewrite would mid-insert the trailer and break the // command's quoting. - const singleQuotePattern = /(-[a-zA-Z]*m\s*)'([^']*)'(?!\\'')/g; + const singleQuotePattern = new RegExp( + `(${FLAG_PREFIX})'([^']*)'(?!\\\\'')`, + 'g', + ); const segment = command.slice(segmentRange.start, segmentRange.end); // Git concatenates multiple `-m` values with a blank line, so the // co-author trailer has to land in the *last* `-m` value to be @@ -1815,16 +1886,24 @@ export class ShellToolInvocation extends BaseToolInvocation< ? `\n\n🤖 Generated with Qwen Code (${shots}-shotted by ${generator})` : `\n\n🤖 Generated with Qwen Code`; - // Append to --body "..." or --body '...' - // Accept both space and `=` between flag and value: `gh pr create - // --body "..."` and `gh pr create --body="..."` are both valid. - const bodyDoublePattern = /(--body[\s=]+)"((?:[^"\\]|\\.)*)"/; + // Match both the long form `--body` and the short alias `-b` + // (documented in `gh pr create --help`), with either space or + // `=` separator: `--body "..."`, `--body="..."`, `-b "..."`, + // `-b="..."`. The `\b` after `-b` is intentional — without it + // we'd also match the start of `-body` (a typo) which the user + // didn't intend; a real `-b` flag is followed by whitespace or + // `=`. Inner alternation is non-capturing so the existing + // `[full, prefix, body]` destructure stays intact. + const BODY_FLAG = `(?:--body|-b)[\\s=]+`; + const bodyDoublePattern = new RegExp( + `(${BODY_FLAG})"((?:[^"\\\\]|\\\\.)*)"`, + ); // Bash apostrophes inside a single-quoted body use the // close-escape-reopen form `'\''`. The inner alternation matches // either a non-apostrophe character or that escape sequence as a // whole, so the trailer lands at the true end of the body rather // than after only the first quoted segment. - const bodySinglePattern = /(--body[\s=]+)'((?:[^']|'\\'')*)'/; + const bodySinglePattern = new RegExp(`(${BODY_FLAG})'((?:[^']|'\\\\'')*)'`); const bodyDoubleMatch = command.match(bodyDoublePattern); const bodySingleMatch = command.match(bodySinglePattern); const bodyMatch = bodyDoubleMatch ?? bodySingleMatch;