mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-23 12:44:02 +00:00
fix(core): silence two CodeQL findings on PR #4151
CodeQL 223 — Incomplete multi-character sanitization (packages/core/src/permissions/classifier.ts:258) A single `/<[^>]*>/g` pass can leave residual angle-brackets when the input is crafted to overlap (e.g. `<scr<script>ipt>`). In our actual use case the sanitized string is a prompt fragment, not HTML output, so a "reconstituted script tag" doesn't matter — but iterating the strip until the string stabilises is cheap defense-in-depth and removes the warning. Bounded by 8 iterations so the loop is always O(n) regardless of how the attacker structures the input. CodeQL 222 — Polynomial regex on uncontrolled data (packages/core/src/permissions/dangerousRules.ts:93) The regex `/[*]+$/` is actually linear (single-character class + `$` anchor, no backtracking), but CodeQL flags any `replace(<regex>, ...)` applied to user-controlled input. Replace the regex with a manual trailing-`*` strip via `slice` + a counted loop — same semantics, no regex engine involved, warning cleared. Existing tests cover both branches (classifier transcript sanitizer test suite, dangerousRules interpreter coverage). No regressions. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c45ad0ed77
commit
c5cf60ee8f
2 changed files with 25 additions and 6 deletions
|
|
@ -251,11 +251,23 @@ export async function classifyAction(
|
|||
*/
|
||||
export function sanitizeClassifierReason(raw: string): string {
|
||||
if (!raw) return raw;
|
||||
|
||||
// Drop `<...>` pseudo-tags ("<system>...", "<user>...") that could be
|
||||
// parsed as control fences by the main model's prompt.
|
||||
//
|
||||
// Replace iteratively until the string stabilises. A single `/g` pass
|
||||
// can leave residual `<>` if the input was crafted to overlap (CodeQL
|
||||
// 223). Bounded by a small iteration cap so the loop is always O(n)
|
||||
// regardless of how the attacker structures the string.
|
||||
let stripped = raw;
|
||||
for (let i = 0; i < 8; i++) {
|
||||
const next = stripped.replace(/<[^>]*>/g, '');
|
||||
if (next === stripped) break;
|
||||
stripped = next;
|
||||
}
|
||||
|
||||
return (
|
||||
raw
|
||||
// Drop `<...>` pseudo-tags ("<system>...", "<user>...") that could
|
||||
// be parsed as control fences by the main model's prompt.
|
||||
.replace(/<[^>]*>/g, '')
|
||||
stripped
|
||||
// Collapse newlines / runs of whitespace — defeats multi-paragraph
|
||||
// attempts to stage a fake "instruction block".
|
||||
.replace(/\s+/g, ' ')
|
||||
|
|
|
|||
|
|
@ -89,8 +89,15 @@ const SHELL_LIKE_TOOLS: readonly string[] = Object.freeze([
|
|||
*/
|
||||
function isInterpreterToken(rawToken: string): boolean {
|
||||
if (!rawToken) return false;
|
||||
// Strip trailing wildcards / colons / arguments after `:`
|
||||
const noWildcard = rawToken.replace(/[*]+$/, '');
|
||||
// Strip trailing wildcards. Using a manual loop instead of `/[*]+$/`
|
||||
// both because the regex form trips CodeQL's polynomial-regex
|
||||
// heuristic (CodeQL 222) and because end-of-string trim is O(n) by
|
||||
// construction here.
|
||||
let end = rawToken.length;
|
||||
while (end > 0 && rawToken.charCodeAt(end - 1) === 42 /* '*' */) {
|
||||
end--;
|
||||
}
|
||||
const noWildcard = rawToken.slice(0, end);
|
||||
const beforeColon = noWildcard.split(':')[0];
|
||||
// Last path segment so `/usr/bin/python3` → `python3`
|
||||
const lastSegment = (beforeColon ?? '').split('/').pop() ?? '';
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue