qwen-code/packages/core/src/tools/web-fetch.ts
易良 fbf5ed57d6
Some checks failed
Qwen Code CI / Lint (push) Failing after 12s
Qwen Code CI / Test (push) Has been skipped
Qwen Code CI / Test-1 (push) Has been skipped
Qwen Code CI / Test-2 (push) Has been skipped
Qwen Code CI / Test-3 (push) Has been skipped
Qwen Code CI / Test-4 (push) Has been skipped
Qwen Code CI / Test-5 (push) Has been skipped
Qwen Code CI / Test-6 (push) Has been skipped
Qwen Code CI / Test-7 (push) Has been skipped
Qwen Code CI / Test-8 (push) Has been skipped
Qwen Code CI / CodeQL (push) Failing after 6s
E2E Tests / E2E Test (Linux) - sandbox:docker (push) Failing after 5s
Qwen Code CI / Post Coverage Comment (push) Has been skipped
E2E Tests / E2E Test (Linux) - sandbox:none (push) Failing after 10m36s
E2E Tests / E2E Test - macOS (push) Has been cancelled
feat(storage): support configurable runtime output directory (#2127)
* feat(storage): support configurable runtime output directory (#2014)

Add `advanced.runtimeOutputDir` setting and `QWEN_RUNTIME_DIR` env var
to redirect runtime output (temp files, debug logs, session data, todos,
insights) to a custom directory while keeping config files at ~/.qwen.

- Introduce `Storage.setRuntimeBaseDir()` / `getRuntimeBaseDir()` with
  tilde expansion and relative path resolution
- Add `AsyncLocalStorage`-based `runWithRuntimeBaseDir()` for concurrent
  session isolation in ACP integration
- Update all runtime path methods to use `getRuntimeBaseDir()` instead
  of `getGlobalQwenDir()` (temp, debug, ide, projects, history dirs)
- Config paths (settings, oauth, installation_id, etc.) remain pinned
  to `~/.qwen` regardless of runtime dir configuration
- Add comprehensive tests covering path resolution, env var priority,
  async context isolation, and config path stability

* fix(core/storage): 支持 Windows 风格波浪号路径

扩展 setRuntimeBaseDir 以支持 Windows 风格的波浪号路径 (~\),
使用统一的路径分割逻辑处理 Unix 和 Windows 风格的路径分隔符

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>

* fix(core/debugLogger): runtime base dir 变更时创建新 debug 目录

添加 ensuredDebugDirPath 追踪变量,当 runtime base dir 发生变更时,
确保在新的目录下创建 debug 子目录

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>

* feat(cli/acp): 支持 ACP runtime output dir 配置

新增 runWithAcpRuntimeOutputDir 辅助函数,在 ACP Agent 的
loadSession 和 listSessions 操作中应用配置的 runtimeOutputDir

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>

* docs(vscode-ide-companion/acpConnection): 补充 this 别名的使用说明

为 self = this 的用法添加解释性注释,说明在嵌套回调中需要使用 this

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>

* feat(cli): add runtime output directory configuration support

* fix(core): update test to use getUserSkillsDirs method

Update storage.test.ts to call getUserSkillsDirs() instead of the
non-existent getUserSkillsDir() method. The method was renamed to
return an array of skill directories.

* fix(core/todoWrite): use path.join for cross-platform path assertion in test

Replace hardcoded forward-slash path `.qwen/todos/` with `path.join('.qwen', 'todos')` to fix Windows CI failure where paths use backslashes.

Made-with: Cursor

---------

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-03-20 13:53:05 +08:00

267 lines
8.5 KiB
TypeScript

/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { convert } from 'html-to-text';
import { ProxyAgent, setGlobalDispatcher } from 'undici';
import type { Config } from '../config/config.js';
import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js';
import { getResponseText } from '../utils/partUtils.js';
import { ToolErrorType } from './tool-error.js';
import type {
ToolCallConfirmationDetails,
ToolInvocation,
ToolResult,
ToolConfirmationPayload,
ToolConfirmationOutcome,
} from './tools.js';
import type { PermissionDecision } from '../permissions/types.js';
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
import { DEFAULT_QWEN_MODEL } from '../config/models.js';
import { ToolNames, ToolDisplayNames } from './tool-names.js';
import { createDebugLogger, type DebugLogger } from '../utils/debugLogger.js';
const URL_FETCH_TIMEOUT_MS = 10000;
const MAX_CONTENT_LENGTH = 100000;
/**
* Parameters for the WebFetch tool
*/
export interface WebFetchToolParams {
/**
* The URL to fetch content from
*/
url: string;
/**
* The prompt to run on the fetched content
*/
prompt: string;
}
/**
* Implementation of the WebFetch tool invocation logic
*/
class WebFetchToolInvocation extends BaseToolInvocation<
WebFetchToolParams,
ToolResult
> {
private readonly debugLogger: DebugLogger;
constructor(
private readonly config: Config,
params: WebFetchToolParams,
) {
super(params);
this.debugLogger = createDebugLogger('WEB_FETCH');
}
private async executeDirectFetch(signal: AbortSignal): Promise<ToolResult> {
let url = this.params.url;
// Convert GitHub blob URL to raw URL
if (url.includes('github.com') && url.includes('/blob/')) {
url = url
.replace('github.com', 'raw.githubusercontent.com')
.replace('/blob/', '/');
this.debugLogger.debug(
`[WebFetchTool] Converted GitHub blob URL to raw URL: ${url}`,
);
}
try {
this.debugLogger.debug(`[WebFetchTool] Fetching content from: ${url}`);
const response = await fetchWithTimeout(url, URL_FETCH_TIMEOUT_MS);
if (!response.ok) {
const errorMessage = `Request failed with status code ${response.status} ${response.statusText}`;
this.debugLogger.error(`[WebFetchTool] ${errorMessage}`);
throw new Error(errorMessage);
}
this.debugLogger.debug(
`[WebFetchTool] Successfully fetched content from ${url}`,
);
const html = await response.text();
const textContent = convert(html, {
wordwrap: false,
selectors: [
{ selector: 'a', options: { ignoreHref: true } },
{ selector: 'img', format: 'skip' },
],
}).substring(0, MAX_CONTENT_LENGTH);
this.debugLogger.debug(
`[WebFetchTool] Converted HTML to text (${textContent.length} characters)`,
);
const geminiClient = this.config.getGeminiClient();
const fallbackPrompt = `The user requested the following: "${this.params.prompt}".
I have fetched the content from ${this.params.url}. Please use the following content to answer the user's request.
---
${textContent}
---`;
this.debugLogger.debug(
`[WebFetchTool] Processing content with prompt: "${this.params.prompt}"`,
);
const result = await geminiClient.generateContent(
[{ role: 'user', parts: [{ text: fallbackPrompt }] }],
{},
signal,
this.config.getModel() || DEFAULT_QWEN_MODEL,
);
const resultText = getResponseText(result) || '';
this.debugLogger.debug(
`[WebFetchTool] Successfully processed content from ${this.params.url}`,
);
return {
llmContent: resultText,
returnDisplay: `Content from ${this.params.url} processed successfully.`,
};
} catch (e) {
const error = e as Error;
const errorMessage = `Error during fetch for ${url}: ${error.message}`;
this.debugLogger.error(`[WebFetchTool] ${errorMessage}`, error);
return {
llmContent: `Error: ${errorMessage}`,
returnDisplay: `Error: ${errorMessage}`,
error: {
message: errorMessage,
type: ToolErrorType.WEB_FETCH_FALLBACK_FAILED,
},
};
}
}
override getDescription(): string {
const displayPrompt =
this.params.prompt.length > 100
? this.params.prompt.substring(0, 97) + '...'
: this.params.prompt;
return `Fetching content from ${this.params.url} and processing with prompt: "${displayPrompt}"`;
}
/**
* WebFetch is a read-like tool (fetches content) but requires confirmation
* because it makes external network requests.
*/
override async getDefaultPermission(): Promise<PermissionDecision> {
return 'ask';
}
/**
* Constructs the web fetch confirmation details.
*/
override async getConfirmationDetails(
_abortSignal: AbortSignal,
): Promise<ToolCallConfirmationDetails> {
// Extract the domain for the permission rule.
let domain: string;
try {
domain = new URL(this.params.url).hostname;
} catch {
domain = this.params.url;
}
const permissionRules = [`WebFetch(${domain})`];
const confirmationDetails: ToolCallConfirmationDetails = {
type: 'info',
title: `Confirm Web Fetch`,
prompt: `Fetch content from ${this.params.url} and process with: ${this.params.prompt}`,
urls: [this.params.url],
permissionRules,
onConfirm: async (
_outcome: ToolConfirmationOutcome,
_payload?: ToolConfirmationPayload,
) => {
// No-op: persistence is handled by coreToolScheduler via PM rules
},
};
return confirmationDetails;
}
async execute(signal: AbortSignal): Promise<ToolResult> {
// Check if URL is private/localhost
const isPrivate = isPrivateIp(this.params.url);
if (isPrivate) {
this.debugLogger.debug(
`[WebFetchTool] Private IP detected for ${this.params.url}, using direct fetch`,
);
} else {
this.debugLogger.debug(
`[WebFetchTool] Public URL detected for ${this.params.url}, using direct fetch`,
);
}
return this.executeDirectFetch(signal);
}
}
/**
* Implementation of the WebFetch tool logic
*/
export class WebFetchTool extends BaseDeclarativeTool<
WebFetchToolParams,
ToolResult
> {
static readonly Name: string = ToolNames.WEB_FETCH;
constructor(private readonly config: Config) {
super(
WebFetchTool.Name,
ToolDisplayNames.WEB_FETCH,
'Fetches content from a specified URL and processes it using an AI model\n- Takes a URL and a prompt as input\n- Fetches the URL content, converts HTML to markdown\n- Processes the content with the prompt using a small, fast model\n- Returns the model\'s response about the content\n- Use this tool when you need to retrieve and analyze web content\n\nUsage notes:\n - IMPORTANT: If an MCP-provided web fetch tool is available, prefer using that tool instead of this one, as it may have fewer restrictions. All MCP-provided tools start with "mcp__".\n - The URL must be a fully-formed valid URL\n - The prompt should describe what information you want to extract from the page\n - This tool is read-only and does not modify any files\n - Results may be summarized if the content is very large\n - Supports both public and private/localhost URLs using direct fetch',
Kind.Fetch,
{
properties: {
url: {
description: 'The URL to fetch content from',
type: 'string',
},
prompt: {
description: 'The prompt to run on the fetched content',
type: 'string',
},
},
required: ['url', 'prompt'],
type: 'object',
},
);
const proxy = config.getProxy();
if (proxy) {
setGlobalDispatcher(new ProxyAgent(proxy as string));
}
}
protected override validateToolParamValues(
params: WebFetchToolParams,
): string | null {
if (!params.url || params.url.trim() === '') {
return "The 'url' parameter cannot be empty.";
}
if (
!params.url.startsWith('http://') &&
!params.url.startsWith('https://')
) {
return "The 'url' must be a valid URL starting with http:// or https://.";
}
if (!params.prompt || params.prompt.trim() === '') {
return "The 'prompt' parameter cannot be empty.";
}
return null;
}
protected createInvocation(
params: WebFetchToolParams,
): ToolInvocation<WebFetchToolParams, ToolResult> {
return new WebFetchToolInvocation(this.config, params);
}
}