mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-04-30 04:30:48 +00:00
Replace soft "Prefer responding" wording with mandatory "MUST always respond" rule to improve LLM adherence to configured output language. Add exception clause allowing users to explicitly override for the remainder of a conversation.
191 lines
6.2 KiB
TypeScript
191 lines
6.2 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2025 Qwen team
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
/**
|
|
* Utilities for managing the LLM output language rule file.
|
|
* This file handles the creation and maintenance of ~/.qwen/output-language.md
|
|
* which instructs the LLM to respond in the user's preferred language.
|
|
*/
|
|
|
|
import * as fs from 'node:fs';
|
|
import * as path from 'node:path';
|
|
import { Storage } from '@qwen-code/qwen-code-core';
|
|
import {
|
|
detectSystemLanguage,
|
|
getLanguageNameFromLocale,
|
|
} from '../i18n/index.js';
|
|
|
|
const LLM_OUTPUT_LANGUAGE_RULE_FILENAME = 'output-language.md';
|
|
const LLM_OUTPUT_LANGUAGE_MARKER_PREFIX = 'qwen-code:llm-output-language:';
|
|
|
|
/** Special value meaning "detect from system settings" */
|
|
export const OUTPUT_LANGUAGE_AUTO = 'auto';
|
|
|
|
/**
|
|
* Checks if a value represents the "auto" setting.
|
|
*/
|
|
export function isAutoLanguage(value: string | undefined | null): boolean {
|
|
return !value || value.toLowerCase() === OUTPUT_LANGUAGE_AUTO;
|
|
}
|
|
|
|
/**
|
|
* Normalizes a language input to its canonical form.
|
|
* Converts known locale codes (e.g., "zh", "ru") to full names (e.g., "Chinese", "Russian").
|
|
* Unknown inputs are returned as-is to support any language name.
|
|
*/
|
|
export function normalizeOutputLanguage(language: string): string {
|
|
const lowered = language.toLowerCase();
|
|
const fullName = getLanguageNameFromLocale(lowered);
|
|
// getLanguageNameFromLocale returns 'English' as default for unknown codes.
|
|
// Only use the result if it's a known code or explicitly 'en'.
|
|
if (fullName !== 'English' || lowered === 'en') {
|
|
return fullName;
|
|
}
|
|
return language;
|
|
}
|
|
|
|
/**
|
|
* Resolves the output language, converting 'auto' to the detected system language.
|
|
*/
|
|
export function resolveOutputLanguage(
|
|
value: string | undefined | null,
|
|
): string {
|
|
if (isAutoLanguage(value)) {
|
|
const detectedLocale = detectSystemLanguage();
|
|
return getLanguageNameFromLocale(detectedLocale);
|
|
}
|
|
return normalizeOutputLanguage(value!);
|
|
}
|
|
|
|
/**
|
|
* Returns the path to the LLM output language rule file (~/.qwen/output-language.md).
|
|
*/
|
|
function getOutputLanguageFilePath(): string {
|
|
return path.join(
|
|
Storage.getGlobalQwenDir(),
|
|
LLM_OUTPUT_LANGUAGE_RULE_FILENAME,
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Sanitizes a language string for use in an HTML comment marker.
|
|
* Removes characters that could break HTML comment syntax.
|
|
*/
|
|
function sanitizeForMarker(language: string): string {
|
|
return language
|
|
.replace(/[\r\n]/g, ' ')
|
|
.replace(/--!?>/g, '')
|
|
.replace(/--/g, '');
|
|
}
|
|
|
|
/**
|
|
* Generates the content for the LLM output language rule file.
|
|
*/
|
|
function generateOutputLanguageFileContent(language: string): string {
|
|
const safeLanguage = sanitizeForMarker(language);
|
|
return `# Output language preference: ${language}
|
|
<!-- ${LLM_OUTPUT_LANGUAGE_MARKER_PREFIX} ${safeLanguage} -->
|
|
|
|
## Rule
|
|
You MUST always respond in **${language}** regardless of the user's input language.
|
|
This is a mandatory requirement, not a preference.
|
|
|
|
## Exception
|
|
If the user **explicitly** requests a response in a specific language (e.g., "please reply in English", "用中文回答"), switch to the user's requested language for the remainder of the conversation.
|
|
|
|
## Keep technical artifacts unchanged
|
|
Do **not** translate or rewrite:
|
|
- Code blocks, CLI commands, file paths, stack traces, logs, JSON keys, identifiers
|
|
- Exact quoted text from the user (keep quotes verbatim)
|
|
|
|
## Tool / system outputs
|
|
Raw tool/system outputs may contain fixed-format English. Preserve them verbatim, and if needed, add a short **${language}** explanation below.
|
|
`;
|
|
}
|
|
|
|
/**
|
|
* Extracts the language from the content of an output language rule file.
|
|
* Supports both the new marker format and legacy heading format.
|
|
*/
|
|
function parseOutputLanguageFromContent(content: string): string | null {
|
|
// Primary: machine-readable marker (e.g., <!-- qwen-code:llm-output-language: 中文 -->)
|
|
const markerRegex = new RegExp(
|
|
String.raw`<!--\s*${LLM_OUTPUT_LANGUAGE_MARKER_PREFIX}\s*(.*?)\s*-->`,
|
|
'i',
|
|
);
|
|
const markerMatch = content.match(markerRegex);
|
|
if (markerMatch?.[1]?.trim()) {
|
|
return markerMatch[1].trim();
|
|
}
|
|
|
|
// Fallback: legacy heading format (e.g., # CRITICAL: Chinese Output Language Rule)
|
|
const headingMatch = content.match(
|
|
/^#.*?CRITICAL:\s*(.*?)\s+Output Language Rule\b/im,
|
|
);
|
|
if (headingMatch?.[1]?.trim()) {
|
|
return headingMatch[1].trim();
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Reads the current output language from the rule file.
|
|
* Returns null if the file doesn't exist or can't be parsed.
|
|
*/
|
|
function readOutputLanguageFromFile(): string | null {
|
|
const filePath = getOutputLanguageFilePath();
|
|
if (!fs.existsSync(filePath)) {
|
|
return null;
|
|
}
|
|
try {
|
|
const content = fs.readFileSync(filePath, 'utf-8');
|
|
return parseOutputLanguageFromContent(content);
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Writes the output language rule file with the given language.
|
|
*/
|
|
export function writeOutputLanguageFile(language: string): void {
|
|
const filePath = getOutputLanguageFilePath();
|
|
const content = generateOutputLanguageFileContent(language);
|
|
const dir = path.dirname(filePath);
|
|
fs.mkdirSync(dir, { recursive: true });
|
|
fs.writeFileSync(filePath, content, 'utf-8');
|
|
}
|
|
|
|
/**
|
|
* Updates the LLM output language rule file based on the setting value.
|
|
* Resolves 'auto' to the detected system language before writing.
|
|
*/
|
|
export function updateOutputLanguageFile(settingValue: string): void {
|
|
const resolved = resolveOutputLanguage(settingValue);
|
|
writeOutputLanguageFile(resolved);
|
|
}
|
|
|
|
/**
|
|
* Initializes the LLM output language rule file on application startup.
|
|
*
|
|
* @param outputLanguage - The output language setting value (e.g., 'auto', 'Chinese', etc.)
|
|
*
|
|
* Behavior:
|
|
* - Resolves the setting value ('auto' -> detected system language, or use as-is)
|
|
* - Ensures the rule file matches the resolved language
|
|
* - Creates the file if it doesn't exist
|
|
*/
|
|
export function initializeLlmOutputLanguage(outputLanguage?: string): void {
|
|
// Resolve 'auto' or undefined to the detected system language
|
|
const resolved = resolveOutputLanguage(outputLanguage);
|
|
const currentFileLanguage = readOutputLanguageFromFile();
|
|
|
|
// Only write if the file doesn't match the resolved language
|
|
if (currentFileLanguage !== resolved) {
|
|
writeOutputLanguageFile(resolved);
|
|
}
|
|
}
|