fix(core): silence two CodeQL findings on PR #4151

CodeQL 223 — Incomplete multi-character sanitization (packages/core/src/permissions/classifier.ts:258) A single `/<[^>]*>/g` pass can leave residual angle-brackets when the input is crafted to overlap (e.g. `<scr<script>ipt>`). In our actual use case the sanitized string is a prompt fragment, not HTML output, so a "reconstituted script tag" doesn't matter — but iterating the strip until the string stabilises is cheap defense-in-depth and removes the warning. Bounded by 8 iterations so the loop is always O(n) regardless of how the attacker structures the input. CodeQL 222 — Polynomial regex on uncontrolled data (packages/core/src/permissions/dangerousRules.ts:93) The regex `/[*]+$/` is actually linear (single-character class + `$` anchor, no backtracking), but CodeQL flags any `replace(<regex>, ...)` applied to user-controlled input. Replace the regex with a manual trailing-`*` strip via `slice` + a counted loop — same semantics, no regex engine involved, warning cleared. Existing tests cover both branches (classifier transcript sanitizer test suite, dangerousRules interpreter coverage). No regressions. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 12:44:02 +00:00 · 2026-05-18 11:03:45 +08:00 · 2026-05-18 11:03:45 +08:00 · c5cf60ee8f
commit c5cf60ee8f
parent c45ad0ed77
2 changed files with 25 additions and 6 deletions
--- a/packages/core/src/permissions/classifier.ts
+++ b/packages/core/src/permissions/classifier.ts
@ -251,11 +251,23 @@ export async function classifyAction(
 */
 export function sanitizeClassifierReason(raw: string): string {
  if (!raw) return raw;
+
+  // Drop `<...>` pseudo-tags ("<system>...", "<user>...") that could be
+  // parsed as control fences by the main model's prompt.
+  //
+  // Replace iteratively until the string stabilises. A single `/g` pass
+  // can leave residual `<>` if the input was crafted to overlap (CodeQL
+  // 223). Bounded by a small iteration cap so the loop is always O(n)
+  // regardless of how the attacker structures the string.
+  let stripped = raw;
+  for (let i = 0; i < 8; i++) {
+    const next = stripped.replace(/<[^>]*>/g, '');
+    if (next === stripped) break;
+    stripped = next;
+  }
+
  return (
-    raw
-      // Drop `<...>` pseudo-tags ("<system>...", "<user>...") that could
-      // be parsed as control fences by the main model's prompt.
-      .replace(/<[^>]*>/g, '')
+    stripped
      // Collapse newlines / runs of whitespace — defeats multi-paragraph
      // attempts to stage a fake "instruction block".
      .replace(/\s+/g, ' ')
--- a/packages/core/src/permissions/dangerousRules.ts
+++ b/packages/core/src/permissions/dangerousRules.ts
@ -89,8 +89,15 @@ const SHELL_LIKE_TOOLS: readonly string[] = Object.freeze([
 */
 function isInterpreterToken(rawToken: string): boolean {
  if (!rawToken) return false;
-  // Strip trailing wildcards / colons / arguments after `:`
-  const noWildcard = rawToken.replace(/[*]+$/, '');
+  // Strip trailing wildcards. Using a manual loop instead of `/[*]+$/`
+  // both because the regex form trips CodeQL's polynomial-regex
+  // heuristic (CodeQL 222) and because end-of-string trim is O(n) by
+  // construction here.
+  let end = rawToken.length;
+  while (end > 0 && rawToken.charCodeAt(end - 1) === 42 /* '*' */) {
+    end--;
+  }
+  const noWildcard = rawToken.slice(0, end);
  const beforeColon = noWildcard.split(':')[0];
  // Last path segment so `/usr/bin/python3` → `python3`
  const lastSegment = (beforeColon ?? '').split('/').pop() ?? '';