From c5cf60ee8fee2592f2a87c7a96d9379ae237ec69 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Mon, 18 May 2026 11:03:45 +0800 Subject: [PATCH] fix(core): silence two CodeQL findings on PR #4151 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeQL 223 — Incomplete multi-character sanitization (packages/core/src/permissions/classifier.ts:258) A single `/<[^>]*>/g` pass can leave residual angle-brackets when the input is crafted to overlap (e.g. `ipt>`). In our actual use case the sanitized string is a prompt fragment, not HTML output, so a "reconstituted script tag" doesn't matter — but iterating the strip until the string stabilises is cheap defense-in-depth and removes the warning. Bounded by 8 iterations so the loop is always O(n) regardless of how the attacker structures the input. CodeQL 222 — Polynomial regex on uncontrolled data (packages/core/src/permissions/dangerousRules.ts:93) The regex `/[*]+$/` is actually linear (single-character class + `$` anchor, no backtracking), but CodeQL flags any `replace(, ...)` applied to user-controlled input. Replace the regex with a manual trailing-`*` strip via `slice` + a counted loop — same semantics, no regex engine involved, warning cleared. Existing tests cover both branches (classifier transcript sanitizer test suite, dangerousRules interpreter coverage). No regressions. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/core/src/permissions/classifier.ts | 20 +++++++++++++++---- .../core/src/permissions/dangerousRules.ts | 11 ++++++++-- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/packages/core/src/permissions/classifier.ts b/packages/core/src/permissions/classifier.ts index b1d86fb8d..9b1197bd5 100644 --- a/packages/core/src/permissions/classifier.ts +++ b/packages/core/src/permissions/classifier.ts @@ -251,11 +251,23 @@ export async function classifyAction( */ export function sanitizeClassifierReason(raw: string): string { if (!raw) return raw; + + // Drop `<...>` pseudo-tags ("...", "...") that could be + // parsed as control fences by the main model's prompt. + // + // Replace iteratively until the string stabilises. A single `/g` pass + // can leave residual `<>` if the input was crafted to overlap (CodeQL + // 223). Bounded by a small iteration cap so the loop is always O(n) + // regardless of how the attacker structures the string. + let stripped = raw; + for (let i = 0; i < 8; i++) { + const next = stripped.replace(/<[^>]*>/g, ''); + if (next === stripped) break; + stripped = next; + } + return ( - raw - // Drop `<...>` pseudo-tags ("...", "...") that could - // be parsed as control fences by the main model's prompt. - .replace(/<[^>]*>/g, '') + stripped // Collapse newlines / runs of whitespace — defeats multi-paragraph // attempts to stage a fake "instruction block". .replace(/\s+/g, ' ') diff --git a/packages/core/src/permissions/dangerousRules.ts b/packages/core/src/permissions/dangerousRules.ts index dcc2d45f9..84722d716 100644 --- a/packages/core/src/permissions/dangerousRules.ts +++ b/packages/core/src/permissions/dangerousRules.ts @@ -89,8 +89,15 @@ const SHELL_LIKE_TOOLS: readonly string[] = Object.freeze([ */ function isInterpreterToken(rawToken: string): boolean { if (!rawToken) return false; - // Strip trailing wildcards / colons / arguments after `:` - const noWildcard = rawToken.replace(/[*]+$/, ''); + // Strip trailing wildcards. Using a manual loop instead of `/[*]+$/` + // both because the regex form trips CodeQL's polynomial-regex + // heuristic (CodeQL 222) and because end-of-string trim is O(n) by + // construction here. + let end = rawToken.length; + while (end > 0 && rawToken.charCodeAt(end - 1) === 42 /* '*' */) { + end--; + } + const noWildcard = rawToken.slice(0, end); const beforeColon = noWildcard.split(':')[0]; // Last path segment so `/usr/bin/python3` → `python3` const lastSegment = (beforeColon ?? '').split('/').pop() ?? '';