fix(core): silence two CodeQL findings on PR #4151

CodeQL 223 — Incomplete multi-character sanitization
(packages/core/src/permissions/classifier.ts:258)
A single `/<[^>]*>/g` pass can leave residual angle-brackets when the
input is crafted to overlap (e.g. `<scr<script>ipt>`). In our actual
use case the sanitized string is a prompt fragment, not HTML output,
so a "reconstituted script tag" doesn't matter — but iterating the
strip until the string stabilises is cheap defense-in-depth and
removes the warning. Bounded by 8 iterations so the loop is always
O(n) regardless of how the attacker structures the input.

CodeQL 222 — Polynomial regex on uncontrolled data
(packages/core/src/permissions/dangerousRules.ts:93)
The regex `/[*]+$/` is actually linear (single-character class + `$`
anchor, no backtracking), but CodeQL flags any `replace(<regex>, ...)`
applied to user-controlled input. Replace the regex with a manual
trailing-`*` strip via `slice` + a counted loop — same semantics,
no regex engine involved, warning cleared.

Existing tests cover both branches (classifier transcript sanitizer
test suite, dangerousRules interpreter coverage). No regressions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
LaZzyMan 2026-05-18 11:03:45 +08:00
parent c45ad0ed77
commit c5cf60ee8f
2 changed files with 25 additions and 6 deletions

View file

@ -251,11 +251,23 @@ export async function classifyAction(
*/
export function sanitizeClassifierReason(raw: string): string {
if (!raw) return raw;
// Drop `<...>` pseudo-tags ("<system>...", "<user>...") that could be
// parsed as control fences by the main model's prompt.
//
// Replace iteratively until the string stabilises. A single `/g` pass
// can leave residual `<>` if the input was crafted to overlap (CodeQL
// 223). Bounded by a small iteration cap so the loop is always O(n)
// regardless of how the attacker structures the string.
let stripped = raw;
for (let i = 0; i < 8; i++) {
const next = stripped.replace(/<[^>]*>/g, '');
if (next === stripped) break;
stripped = next;
}
return (
raw
// Drop `<...>` pseudo-tags ("<system>...", "<user>...") that could
// be parsed as control fences by the main model's prompt.
.replace(/<[^>]*>/g, '')
stripped
// Collapse newlines / runs of whitespace — defeats multi-paragraph
// attempts to stage a fake "instruction block".
.replace(/\s+/g, ' ')

View file

@ -89,8 +89,15 @@ const SHELL_LIKE_TOOLS: readonly string[] = Object.freeze([
*/
function isInterpreterToken(rawToken: string): boolean {
if (!rawToken) return false;
// Strip trailing wildcards / colons / arguments after `:`
const noWildcard = rawToken.replace(/[*]+$/, '');
// Strip trailing wildcards. Using a manual loop instead of `/[*]+$/`
// both because the regex form trips CodeQL's polynomial-regex
// heuristic (CodeQL 222) and because end-of-string trim is O(n) by
// construction here.
let end = rawToken.length;
while (end > 0 && rawToken.charCodeAt(end - 1) === 42 /* '*' */) {
end--;
}
const noWildcard = rawToken.slice(0, end);
const beforeColon = noWildcard.split(':')[0];
// Last path segment so `/usr/bin/python3` → `python3`
const lastSegment = (beforeColon ?? '').split('/').pop() ?? '';