mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-15 01:04:44 +00:00
fix(shell): scope -m rewrite to commit segment, reject nested matches
Two Critical findings on addCoAuthorToGitCommit, plus a Copilot maintainability nit: - The `-m` regex used to scan the whole compound command, so `git commit -m "fix" && git tag -a v1 -m "release"` would target the LATER tag annotation (last -m wins) and splice the trailer there instead of the commit message. The rewrite now scopes to the actual `git commit` segment via a new findAttributableCommitSegment(): same shell-aware walk gitCommitContext does, but returning the segment's character range so the regex can be run on a slice and spliced back into the original command. - Within the segment, a literal `-m '...'` *inside* a quoted body was treated as a real later -m. For `git commit -m "docs mention -m 'flag' for completeness"`, the inner single-quoted -m sits at a higher index than the real outer -m, and the previous index comparison would have it win — splicing the trailer mid-message and corrupting the quoting. The new code checks whether the candidate is nested inside the other quote-style's range (start/end containment) and prefers the outer match when so. - Hoisted three constant Sets (sudo flag list, git global flags taking values, git global flags shifting cwd, gh global flags) out of the per-call scope to module constants. Functional no-op, but keeps the parsing helpers easier to read and avoids re-allocating the Sets on every command. Two regression tests added for the cases above: - inner `-m '...'` inside the outer message body is preserved literally and the trailer lands after the body - `git tag -a v1 -m "release notes"` after a real `git commit -m "fix"` is left untouched, with the trailer appended to "fix" only
This commit is contained in:
parent
2dd792c87e
commit
222b1884dc
2 changed files with 195 additions and 61 deletions
|
|
@ -1315,6 +1315,75 @@ describe('ShellTool', () => {
|
|||
expect(observed).toMatch(/-m\s+"Title"\s/);
|
||||
});
|
||||
|
||||
// Concern: a literal `-m '...'` *inside* a quoted commit
|
||||
// message body could be picked up by the regex as if it were a
|
||||
// real later argument, splicing the trailer mid-message and
|
||||
// breaking the command's quoting.
|
||||
it('should not be fooled by a literal -m token inside the quoted message body', async () => {
|
||||
const command =
|
||||
'git commit -m "docs mention -m \'flag\' for completeness"';
|
||||
const invocation = shellTool.build({ command, is_background: false });
|
||||
const promise = invocation.execute(mockAbortSignal);
|
||||
|
||||
resolveExecutionPromise({
|
||||
rawOutput: Buffer.from(''),
|
||||
output: '',
|
||||
exitCode: 0,
|
||||
signal: null,
|
||||
error: null,
|
||||
aborted: false,
|
||||
pid: 12345,
|
||||
executionMethod: 'child_process',
|
||||
});
|
||||
|
||||
await promise;
|
||||
|
||||
const observed = mockShellExecutionService.mock.calls[0][0];
|
||||
// The original message body must be preserved end-to-end —
|
||||
// no trailer spliced before its closing quote.
|
||||
expect(observed).toContain(
|
||||
"-m \"docs mention -m 'flag' for completeness",
|
||||
);
|
||||
// The trailer must land AFTER the original body, just before
|
||||
// the message's outer closing quote.
|
||||
expect(observed).toMatch(
|
||||
/docs mention -m 'flag' for completeness\s+Co-authored-by:[^"]+"/s,
|
||||
);
|
||||
});
|
||||
|
||||
// Concern: a later `git tag -m "..."` in the same compound
|
||||
// command could be mistaken for the commit message because the
|
||||
// regex was matching across the whole command string.
|
||||
it('should target the commit message, not a later git tag -m in the same chain', async () => {
|
||||
const command =
|
||||
'git commit -m "fix" && git tag -a v1 -m "release notes"';
|
||||
const invocation = shellTool.build({ command, is_background: false });
|
||||
const promise = invocation.execute(mockAbortSignal);
|
||||
|
||||
resolveExecutionPromise({
|
||||
rawOutput: Buffer.from(''),
|
||||
output: '',
|
||||
exitCode: 0,
|
||||
signal: null,
|
||||
error: null,
|
||||
aborted: false,
|
||||
pid: 12345,
|
||||
executionMethod: 'child_process',
|
||||
});
|
||||
|
||||
await promise;
|
||||
|
||||
const observed = mockShellExecutionService.mock.calls[0][0];
|
||||
// The trailer is appended to the commit message body...
|
||||
expect(observed).toMatch(/git commit -m "fix\s+Co-authored-by:[^"]+"/s);
|
||||
// ...and the later `git tag -m` is left exactly as the user
|
||||
// wrote it.
|
||||
expect(observed).toContain('git tag -a v1 -m "release notes"');
|
||||
// The tag annotation must not have a trailer spliced in.
|
||||
const tagMatch = observed.match(/git tag .*-m "([^"]*)"/);
|
||||
expect(tagMatch?.[1]).toBe('release notes');
|
||||
});
|
||||
|
||||
it('should add co-author when git commit is prefixed with sudo', async () => {
|
||||
const command = 'sudo git commit -m "Test"';
|
||||
const invocation = shellTool.build({ command, is_background: false });
|
||||
|
|
|
|||
|
|
@ -127,21 +127,6 @@ function tokeniseSegment(segment: string): string[] | null {
|
|||
if (tokens[i] === 'sudo' || tokens[i] === 'command') {
|
||||
const wrapper = tokens[i];
|
||||
i++;
|
||||
const sudoFlagsWithValue = new Set([
|
||||
'-u',
|
||||
'-g',
|
||||
'-h',
|
||||
'-D',
|
||||
'-r',
|
||||
'-t',
|
||||
'-C',
|
||||
'--user',
|
||||
'--group',
|
||||
'--host',
|
||||
'--chdir',
|
||||
'--role',
|
||||
'--type',
|
||||
]);
|
||||
while (i < tokens.length && tokens[i]!.startsWith('-')) {
|
||||
const flag = tokens[i]!;
|
||||
i++;
|
||||
|
|
@ -149,7 +134,7 @@ function tokeniseSegment(segment: string): string[] | null {
|
|||
// `command`'s flag-only options we leave i alone.
|
||||
if (
|
||||
wrapper === 'sudo' &&
|
||||
sudoFlagsWithValue.has(flag) &&
|
||||
SUDO_FLAGS_WITH_VALUE.has(flag) &&
|
||||
i < tokens.length
|
||||
) {
|
||||
i++;
|
||||
|
|
@ -159,6 +144,22 @@ function tokeniseSegment(segment: string): string[] | null {
|
|||
return tokens.slice(i);
|
||||
}
|
||||
|
||||
const SUDO_FLAGS_WITH_VALUE = new Set([
|
||||
'-u',
|
||||
'-g',
|
||||
'-h',
|
||||
'-D',
|
||||
'-r',
|
||||
'-t',
|
||||
'-C',
|
||||
'--user',
|
||||
'--group',
|
||||
'--host',
|
||||
'--chdir',
|
||||
'--role',
|
||||
'--type',
|
||||
]);
|
||||
|
||||
/**
|
||||
* Walk a `git ...` token sequence past git's global flags
|
||||
* (`-c key=val`, `-C path`, `--no-pager`, `--git-dir`, `--work-tree`,
|
||||
|
|
@ -169,31 +170,31 @@ function tokeniseSegment(segment: string): string[] | null {
|
|||
* `changesCwd` is true when any of the consumed flags would relocate
|
||||
* the working directory (`-C`, `--git-dir`, `--work-tree`).
|
||||
*/
|
||||
// Two-token global flags whose second token is consumed as a value.
|
||||
const GIT_GLOBAL_FLAGS_TAKES_VALUE = new Set([
|
||||
'-c',
|
||||
'-C',
|
||||
'--git-dir',
|
||||
'--work-tree',
|
||||
'--namespace',
|
||||
'--exec-path',
|
||||
'--config-env',
|
||||
'--super-prefix',
|
||||
'--list-cmds',
|
||||
]);
|
||||
// Flags whose presence shifts cwd interpretation.
|
||||
const GIT_GLOBAL_FLAGS_SHIFTS_CWD = new Set(['-C', '--git-dir', '--work-tree']);
|
||||
|
||||
function parseGitInvocation(tokens: string[]): {
|
||||
subcommand: string | undefined;
|
||||
changesCwd: boolean;
|
||||
} {
|
||||
// Two-token global flags whose second token is consumed as a value.
|
||||
const TAKES_VALUE = new Set([
|
||||
'-c',
|
||||
'-C',
|
||||
'--git-dir',
|
||||
'--work-tree',
|
||||
'--namespace',
|
||||
'--exec-path',
|
||||
'--config-env',
|
||||
'--super-prefix',
|
||||
'--list-cmds',
|
||||
]);
|
||||
// Flags whose presence shifts cwd interpretation.
|
||||
const SHIFTS_CWD = new Set(['-C', '--git-dir', '--work-tree']);
|
||||
|
||||
let i = 1; // skip 'git'
|
||||
let changesCwd = false;
|
||||
while (i < tokens.length) {
|
||||
const t = tokens[i]!;
|
||||
if (TAKES_VALUE.has(t)) {
|
||||
if (SHIFTS_CWD.has(t)) changesCwd = true;
|
||||
if (GIT_GLOBAL_FLAGS_TAKES_VALUE.has(t)) {
|
||||
if (GIT_GLOBAL_FLAGS_SHIFTS_CWD.has(t)) changesCwd = true;
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -295,12 +296,13 @@ function gitCommitContext(command: string): {
|
|||
* `parseGitInvocation`: a fixed-position check at index 1 misses
|
||||
* `gh --repo owner/repo pr create ...`, which is a common form.
|
||||
*/
|
||||
const GH_GLOBAL_FLAGS_TAKES_VALUE = new Set(['--repo', '-R', '--hostname']);
|
||||
|
||||
function parseGhInvocation(tokens: string[]): string[] {
|
||||
const TAKES_VALUE = new Set(['--repo', '-R', '--hostname']);
|
||||
let i = 1; // skip 'gh'
|
||||
while (i < tokens.length) {
|
||||
const t = tokens[i]!;
|
||||
if (TAKES_VALUE.has(t)) {
|
||||
if (GH_GLOBAL_FLAGS_TAKES_VALUE.has(t)) {
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -321,6 +323,46 @@ function parseGhInvocation(tokens: string[]): string[] {
|
|||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Locate the character range of the *first* attributable
|
||||
* `git commit` invocation in the (potentially compound) command, or
|
||||
* `null` if none is attributable in the current cwd. The range
|
||||
* covers the segment as `splitCommands` tokenised it — i.e. just
|
||||
* the `git commit ...` part, NOT later `&& git tag -m ...` or
|
||||
* earlier `git status &&` segments.
|
||||
*
|
||||
* Used by `addCoAuthorToGitCommit` to scope the `-m` regex rewrite
|
||||
* so a later `git tag -m "..."` (different sub-command in the same
|
||||
* compound) can't be mistaken for the commit message.
|
||||
*/
|
||||
function findAttributableCommitSegment(
|
||||
command: string,
|
||||
): { start: number; end: number } | null {
|
||||
let cursor = 0;
|
||||
let cwdShifted = false;
|
||||
for (const sub of splitCommands(command)) {
|
||||
const start = command.indexOf(sub, cursor);
|
||||
if (start < 0) continue;
|
||||
const end = start + sub.length;
|
||||
cursor = end;
|
||||
const tokens = tokeniseSegment(sub);
|
||||
if (!tokens || tokens.length === 0) continue;
|
||||
const program = tokens[0]!;
|
||||
if (program === 'cd') {
|
||||
if (!cwdShifted) cwdShifted = true;
|
||||
continue;
|
||||
}
|
||||
if (program === 'git') {
|
||||
const { subcommand, changesCwd } = parseGitInvocation(tokens);
|
||||
if (subcommand === 'commit' && !cwdShifted && !changesCwd) {
|
||||
return { start, end };
|
||||
}
|
||||
if (changesCwd && !cwdShifted) cwdShifted = true;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect whether `command` invokes `gh pr create` at the top level —
|
||||
* same shell-aware shape as `gitCommitContext` so quoted text like
|
||||
|
|
@ -1570,7 +1612,8 @@ export class ShellToolInvocation extends BaseToolInvocation<
|
|||
// (with the trailer mid-string) back to the executor. The stricter
|
||||
// `attributableInCwd` is what we want here: only inject the
|
||||
// trailer when we're confident the commit lands in our cwd.
|
||||
if (!gitCommitContext(command).attributableInCwd) {
|
||||
const segmentRange = findAttributableCommitSegment(command);
|
||||
if (!segmentRange) {
|
||||
return command;
|
||||
}
|
||||
|
||||
|
|
@ -1581,6 +1624,10 @@ export class ShellToolInvocation extends BaseToolInvocation<
|
|||
// both `-m foo` and `-mfoo`, and we shouldn't silently skip the
|
||||
// shorthand form.
|
||||
//
|
||||
// The regex is scoped to the actual `git commit` segment (not the
|
||||
// whole compound command) so a later `git tag -a v1 -m "..."` in
|
||||
// the same chain can't be mistaken for the commit message.
|
||||
//
|
||||
// Pattern breakdown:
|
||||
// -[a-zA-Z]*m matches -m, -am, -nm, etc. (combined short flags)
|
||||
// \s* matches optional whitespace after the flag
|
||||
|
|
@ -1595,6 +1642,7 @@ export class ShellToolInvocation extends BaseToolInvocation<
|
|||
// and a wrong rewrite would mid-insert the trailer and break the
|
||||
// command's quoting.
|
||||
const singleQuotePattern = /(-[a-zA-Z]*m\s*)'([^']*)'(?!\\'')/g;
|
||||
const segment = command.slice(segmentRange.start, segmentRange.end);
|
||||
// Git concatenates multiple `-m` values with a blank line, so the
|
||||
// co-author trailer has to land in the *last* `-m` value to be
|
||||
// recognised by `git interpret-trailers`. matchAll → take the
|
||||
|
|
@ -1606,26 +1654,44 @@ export class ShellToolInvocation extends BaseToolInvocation<
|
|||
for (const m of matches) result = m;
|
||||
return result;
|
||||
};
|
||||
const doubleMatch = lastMatch(command.matchAll(doubleQuotePattern));
|
||||
const singleMatch = lastMatch(command.matchAll(singleQuotePattern));
|
||||
// Pick whichever match appears LAST in the command string,
|
||||
// regardless of quote style. For `-m "Title" -m 'Body'`, a
|
||||
// simple `doubleMatch ?? singleMatch` would target the title
|
||||
// (last double match) and bury the trailer in the wrong field —
|
||||
// git interpret-trailers only recognises a trailer at the end
|
||||
// of the final `-m` value.
|
||||
const match =
|
||||
doubleMatch && singleMatch
|
||||
? (doubleMatch.index ?? 0) > (singleMatch.index ?? 0)
|
||||
? doubleMatch
|
||||
: singleMatch
|
||||
: (doubleMatch ?? singleMatch);
|
||||
const doubleMatch = lastMatch(segment.matchAll(doubleQuotePattern));
|
||||
const singleMatch = lastMatch(segment.matchAll(singleQuotePattern));
|
||||
|
||||
// Pick whichever match appears LAST in the segment, regardless of
|
||||
// quote style — but reject any candidate that's nested inside the
|
||||
// other's range. For `git commit -m "docs mention -m 'flag'"` the
|
||||
// single-quoted `-m 'flag'` lives INSIDE the double-quoted real
|
||||
// message; without a nesting check the later (inner) `-m` would
|
||||
// win and the trailer would be spliced into the body text.
|
||||
const matchRange = (m: RegExpMatchArray | null) =>
|
||||
m ? { start: m.index ?? 0, end: (m.index ?? 0) + m[0].length } : null;
|
||||
const isInside = (
|
||||
inner: RegExpMatchArray | null,
|
||||
outer: RegExpMatchArray | null,
|
||||
): boolean => {
|
||||
const i = matchRange(inner);
|
||||
const o = matchRange(outer);
|
||||
return !!(i && o && i.start >= o.start && i.end <= o.end);
|
||||
};
|
||||
let match: RegExpMatchArray | null;
|
||||
if (doubleMatch && singleMatch) {
|
||||
if (isInside(singleMatch, doubleMatch)) {
|
||||
match = doubleMatch;
|
||||
} else if (isInside(doubleMatch, singleMatch)) {
|
||||
match = singleMatch;
|
||||
} else {
|
||||
match =
|
||||
(doubleMatch.index ?? 0) > (singleMatch.index ?? 0)
|
||||
? doubleMatch
|
||||
: singleMatch;
|
||||
}
|
||||
} else {
|
||||
match = doubleMatch ?? singleMatch;
|
||||
}
|
||||
const quote = match === doubleMatch ? '"' : "'";
|
||||
|
||||
// Escape the configured name/email for the surrounding quote
|
||||
// style — has to follow the actually-selected match, not just
|
||||
// whether a double-quoted -m exists somewhere earlier (a later
|
||||
// single-quoted -m wins on index comparison above).
|
||||
// style — has to follow the actually-selected match.
|
||||
const escape =
|
||||
match === doubleMatch
|
||||
? escapeForBashDoubleQuote
|
||||
|
|
@ -1639,16 +1705,15 @@ export class ShellToolInvocation extends BaseToolInvocation<
|
|||
const newMessage = existingMessage + coAuthor;
|
||||
const replacement = prefix + quote + newMessage + quote;
|
||||
|
||||
// Use match.index + slice (rather than indexOf) so multiple
|
||||
// `-m` flags don't collide — we want the position of the
|
||||
// *last* match, not the first occurrence of a string that
|
||||
// could appear earlier in the command.
|
||||
const idx = match.index ?? command.indexOf(fullMatch);
|
||||
if (idx >= 0) {
|
||||
// Splice the modified segment back into the original command,
|
||||
// preserving everything outside the commit segment exactly as
|
||||
// the caller had it.
|
||||
const matchStart = (match.index ?? 0) + segmentRange.start;
|
||||
if (matchStart >= segmentRange.start) {
|
||||
return (
|
||||
command.slice(0, idx) +
|
||||
command.slice(0, matchStart) +
|
||||
replacement +
|
||||
command.slice(idx + fullMatch.length)
|
||||
command.slice(matchStart + fullMatch.length)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue