qwen-code/scripts/tests/check-i18n.test.ts
MikeWang0316tw 02a65f90c4
fix(i18n): Correct zh-TW translations to match Traditional Chinese conventions (#4129)
* fix(i18n): Correct zh-TW translations to match Traditional Chinese conventions

Fix ~131 lines of Traditional Chinese (zh-TW) translations that used
Simplified Chinese character forms instead of standard Traditional
Chinese usage.

Changes:
- 文件 → 檔案 (47 occurrences)
- 爲 → 為 (45 occurrences)
- 啓 → 啟 (44 occurrences)
- 曆史 → 歷史 (6 occurrences)
- 鏈接 → 連結 (4 occurrences)
- 菜單 → 選單 (3 occurrences)

* fix(i18n): Replace 服務器 with 伺服器 (15 occurrences)

Align with Traditional Chinese convention where 伺服器 is the standard
term for 'server' in computing contexts.

* fix(i18n): Update zh-TW.js header comment to prevent accidental overwrite

Clarify that the file is the authoritative source and should not be
overwritten with auto-generated output, to prevent future maintainers
from regenerating with raw OpenCC and losing manual corrections.

* fix(i18n): Add zh-TW regression check and maintenance docs

Addresses reviewer feedback on PR #4129 (points 2 and 3):

- scripts/check-i18n.ts: Iterate over parsed zh-TW translation values
  (not raw file content) and report the offending key. Replace the
  earlier substring list with ZH_TW_FORBIDDEN_PATTERNS, which targets
  the three real regression categories: variant Traditional characters
  produced by OpenCC s2t (爲, 啓), Mainland-Chinese vocabulary (服務器,
  菜單, 鏈接), and pure Simplified characters. Excludes 禁用 / 配置 /
  文件 / 打開 to avoid false positives on Taiwan-valid usage.
- scripts/tests/check-i18n.test.ts: Cover the new check, including
  negative cases for Taiwan-valid vocabulary.
- docs/users/features/language.md: Document zh-TW maintenance — the
  vocabulary table, why raw OpenCC s2t output is not acceptable, and
  where the CI-enforced list lives.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* fix(i18n): Address review feedback on zh-TW check (#4129)

- check-i18n.ts: Sort ZH_TW_FORBIDDEN_PATTERNS longest-first and break
  on first match so e.g. `历史` reports the specific bigram instead of
  also firing the bare `历` rule (no duplicate CI errors).
- check-i18n.ts: Add ZH_TW_ALLOWED_EXCEPTIONS escape hatch so a future
  legitimate translation (e.g. 區塊鏈 in a UI string) can opt out by key
  without weakening the global pattern list.
- docs/users/features/language.md: Add a "CI enforced?" column so
  contributors can tell which rows block CI vs. which are review-only
  style guidance. Replace bare `曆` in the table with the `曆史` bigram
  and note that `曆` is correct in calendar terms (日曆, 農曆, 西曆) —
  prevents a future maintainer from globally replacing 曆→歷.
- Tests: Cover the dedup behavior on overlapping patterns.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* docs(i18n): Note word-boundary limitation of zh-TW substring check

Document the known limitation that `includes()`-based pattern matching
does not respect Chinese word boundaries — a bigram like `鏈接` will
false-positive on `區塊鏈接口` (區塊鏈 + 接口). Direct contributors to
`ZH_TW_ALLOWED_EXCEPTIONS` when this happens instead of weakening the
pattern list.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-15 15:26:12 +08:00

447 lines
13 KiB
TypeScript

/**
* @license
* Copyright 2026 Qwen Team
* SPDX-License-Identifier: Apache-2.0
*/
import { tmpdir } from 'node:os';
import path from 'node:path';
import {
mkdtempSync,
mkdirSync,
readFileSync,
rmSync,
writeFileSync,
} from 'node:fs';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import type { LanguageDefinition } from '../../packages/cli/src/i18n/languages.js';
import {
checkI18n,
findForbiddenZhTwPatterns,
printCheckI18nResult,
shouldWriteUnusedKeysJson,
type CheckI18nOptions,
} from '../check-i18n.js';
vi.unmock('fs');
vi.unmock('node:fs');
type TestLanguage = Pick<LanguageDefinition, 'code' | 'id' | 'strictParity'>;
type TestLanguageInput =
| string
| (Pick<LanguageDefinition, 'code'> &
Partial<Pick<LanguageDefinition, 'id' | 'strictParity'>>);
type LocaleEntries = Record<string, string>;
const tempDirs: string[] = [];
function makeFixture(): {
root: string;
localesDir: string;
sourceDir: string;
} {
const root = mkdtempSync(path.join(tmpdir(), 'qwen-check-i18n-'));
tempDirs.push(root);
const localesDir = path.join(root, 'locales');
const sourceDir = path.join(root, 'src');
mkdirSync(localesDir, { recursive: true });
mkdirSync(sourceDir, { recursive: true });
return { root, localesDir, sourceDir };
}
function writeLocale(
localesDir: string,
code: string,
entries: LocaleEntries,
): void {
const lines = Object.entries(entries).map(
([key, value]) => ` ${JSON.stringify(key)}: ${JSON.stringify(value)},`,
);
writeFileSync(
path.join(localesDir, `${code}.js`),
`export default {\n${lines.join('\n')}\n};\n`,
);
}
function writeSource(sourceDir: string, content: string): void {
writeFileSync(path.join(sourceDir, 'fixture.ts'), content);
}
function languages(
...definitions: TestLanguageInput[]
): NonNullable<CheckI18nOptions['supportedLanguages']> {
return definitions.map((definition) => {
const language =
typeof definition === 'string' ? { code: definition } : definition;
const strictParity =
language.strictParity ?? ['zh', 'zh-TW'].includes(language.code);
return {
id: `${language.code}-test`,
...language,
strictParity,
} satisfies TestLanguage;
});
}
beforeEach(() => {
vi.spyOn(console, 'error').mockImplementation(() => {});
vi.spyOn(console, 'log').mockImplementation(() => {});
});
afterEach(() => {
vi.restoreAllMocks();
for (const dir of tempDirs.splice(0)) {
rmSync(dir, { recursive: true, force: true });
}
});
describe('checkI18n', () => {
it('records locale load failures and continues checking other locales', async () => {
const { localesDir, sourceDir } = makeFixture();
writeLocale(localesDir, 'en', { Used: 'Used' });
writeFileSync(path.join(localesDir, 'broken.js'), 'export default null;\n');
writeLocale(localesDir, 'fr', { Used: 'Utilise' });
writeSource(sourceDir, "t('Used');\n");
const result = await checkI18n({
localesDir,
sourceDir,
supportedLanguages: languages('en', 'broken', 'fr'),
mustTranslateKeys: [],
});
expect(result.success).toBe(false);
expect(result.errors).toContainEqual(
expect.stringContaining(
'Failed to load broken.js: Invalid locale module',
),
);
expect(result.stats.locales.map((locale) => locale.code)).toEqual(['fr']);
});
it('returns a failed empty result when en.js is missing', async () => {
const { localesDir, sourceDir } = makeFixture();
writeLocale(localesDir, 'fr', { Used: 'Utilise' });
writeSource(sourceDir, "t('Used');\n");
const result = await checkI18n({
localesDir,
sourceDir,
supportedLanguages: languages('en', 'fr'),
mustTranslateKeys: [],
});
expect(result.success).toBe(false);
expect(result.stats.totalKeys).toBe(0);
expect(result.stats.locales).toEqual([]);
expect(result.errors).toContainEqual(
expect.stringContaining('Failed to load en.js:'),
);
});
it('enforces strict key parity for zh and zh-TW', async () => {
const { localesDir, sourceDir } = makeFixture();
writeLocale(localesDir, 'en', {
Used: 'Used',
MissingInStrict: 'MissingInStrict',
});
writeLocale(localesDir, 'zh', {
Used: '已使用',
ExtraStrict: '额外',
});
writeLocale(localesDir, 'zh-TW', {
Used: '已使用',
ExtraStrict: '額外',
});
writeSource(sourceDir, "t('Used');\nt('MissingInStrict');\n");
const result = await checkI18n({
localesDir,
sourceDir,
supportedLanguages: languages('en', 'zh', 'zh-TW'),
mustTranslateKeys: [],
});
expect(result.errors).toContain(
'Missing translation in zh.js: "MissingInStrict"',
);
expect(result.errors).toContain(
'Extra key in zh.js (not in en.js): "ExtraStrict"',
);
expect(result.errors).toContain(
'Missing translation in zh-TW.js: "MissingInStrict"',
);
expect(result.errors).toContain(
'Extra key in zh-TW.js (not in en.js): "ExtraStrict"',
);
});
it('uses language metadata to decide strict key parity', async () => {
const { localesDir, sourceDir } = makeFixture();
writeLocale(localesDir, 'en', {
Used: 'Used',
MissingInStrict: 'MissingInStrict',
});
writeLocale(localesDir, 'fr', {
Used: 'Utilise',
ExtraStrict: 'Supplémentaire',
});
writeSource(sourceDir, "t('Used');\nt('MissingInStrict');\n");
const result = await checkI18n({
localesDir,
sourceDir,
supportedLanguages: languages('en', {
code: 'fr',
strictParity: true,
}),
mustTranslateKeys: [],
});
expect(result.errors).toContain(
'Missing translation in fr.js: "MissingInStrict"',
);
expect(result.errors).toContain(
'Extra key in fr.js (not in en.js): "ExtraStrict"',
);
});
it('warns for non-strict optional missing keys and errors for required keys', async () => {
const { localesDir, sourceDir } = makeFixture();
writeLocale(localesDir, 'en', {
Optional: 'Optional',
Required: 'Required',
});
writeLocale(localesDir, 'fr', { ExtraLoose: 'Supplémentaire' });
writeSource(sourceDir, "t('Optional');\nt('Required');\n");
const result = await checkI18n({
localesDir,
sourceDir,
supportedLanguages: languages('en', 'fr'),
mustTranslateKeys: ['Required'],
});
expect(result.errors).toContain(
'Missing required translation in fr.js: "Required"',
);
expect(result.warnings).toContain(
'fr.js is missing 1 non-required translation keys',
);
});
it('warns instead of errors for extra keys in non-strict locales', async () => {
const { localesDir, sourceDir } = makeFixture();
writeLocale(localesDir, 'en', { Used: 'Used' });
writeLocale(localesDir, 'fr', {
Used: 'Utilise',
ExtraLoose: 'Supplémentaire',
});
writeSource(sourceDir, "t('Used');\n");
const result = await checkI18n({
localesDir,
sourceDir,
supportedLanguages: languages('en', 'fr'),
mustTranslateKeys: [],
});
expect(result.errors).not.toContain(
'Extra key in fr.js (not in en.js): "ExtraLoose"',
);
expect(result.warnings).toContain('fr.js has 1 keys not present in en.js');
});
it('errors when a required translation still falls back to English', async () => {
const { localesDir, sourceDir } = makeFixture();
writeLocale(localesDir, 'en', { Required: 'Required' });
writeLocale(localesDir, 'fr', { Required: 'Required' });
writeSource(sourceDir, "t('Required');\n");
const result = await checkI18n({
localesDir,
sourceDir,
supportedLanguages: languages('en', 'fr'),
mustTranslateKeys: ['Required'],
});
expect(result.errors).toContain(
'Required translation still falls back to English in fr.js: "Required"',
);
});
it('writes unused locale-only keys only when requested', async () => {
const { root, localesDir, sourceDir } = makeFixture();
writeLocale(localesDir, 'en', {
Used: 'Used',
LocaleOnly: 'LocaleOnly',
});
writeLocale(localesDir, 'fr', {
Used: 'Utilise',
LocaleOnly: 'Locale seulement',
});
writeSource(sourceDir, "t('Used');\n");
const result = await checkI18n({
localesDir,
sourceDir,
supportedLanguages: languages('en', 'fr'),
mustTranslateKeys: [],
});
expect(result.stats.unusedKeysOnlyInLocales).toEqual(['LocaleOnly']);
const outputPath = path.join(root, 'unused-keys-only-in-locales.json');
printCheckI18nResult(result, {
writeUnusedKeysJson: false,
unusedKeysOutputPath: outputPath,
});
expect(() => readFileSync(outputPath, 'utf-8')).toThrow();
printCheckI18nResult(result, {
writeUnusedKeysJson: true,
unusedKeysOutputPath: outputPath,
});
expect(JSON.parse(readFileSync(outputPath, 'utf-8'))).toEqual({
keys: ['LocaleOnly'],
count: 1,
});
});
it('extracts escaped string-literal translation keys from source files', async () => {
const { localesDir, sourceDir } = makeFixture();
writeLocale(localesDir, 'en', {
"Quoted ' key": "Quoted ' key",
'Tabbed\tkey': 'Tabbed\tkey',
'Line\nbreak': 'Line\nbreak',
});
writeLocale(localesDir, 'fr', {
"Quoted ' key": 'Clé avec apostrophe',
'Tabbed\tkey': 'Clé avec tabulation',
'Line\nbreak': 'Saut de ligne',
});
writeSource(
sourceDir,
[
"t('Quoted \\' key');",
't("Tabbed\\tkey");',
"ta('Line\\nbreak');",
].join('\n'),
);
const result = await checkI18n({
localesDir,
sourceDir,
supportedLanguages: languages('en', 'fr'),
mustTranslateKeys: [],
});
expect(result.stats.unusedKeys).toEqual([]);
});
it('flags Mainland-Chinese vocabulary and variant Traditional chars in zh-TW values', async () => {
const { localesDir, sourceDir } = makeFixture();
writeLocale(localesDir, 'en', {
Open: 'Open',
Server: 'Server',
Menu: 'Menu',
Disable: 'Disable',
Config: 'Config',
});
writeLocale(localesDir, 'zh', {
Open: '打开',
Server: '服务器',
Menu: '菜单',
Disable: '禁用',
Config: '配置',
});
writeLocale(localesDir, 'zh-TW', {
// Regressions we expect the check to catch
Open: '啓動', // variant Traditional 啓 (OpenCC s2t artifact)
Server: '服務器', // Mainland vocabulary
Menu: '菜單', // Mainland vocabulary
// Taiwan-standard vocabulary — must NOT be flagged
Disable: '禁用',
Config: '配置',
});
writeSource(
sourceDir,
"t('Open');\nt('Server');\nt('Menu');\nt('Disable');\nt('Config');\n",
);
const result = await checkI18n({
localesDir,
sourceDir,
supportedLanguages: languages('en', 'zh', 'zh-TW'),
mustTranslateKeys: [],
});
expect(result.errors).toContain(
'Non-Taiwan vocabulary in zh-TW.js at "Open": "啓" should be "啟"',
);
expect(result.errors).toContain(
'Non-Taiwan vocabulary in zh-TW.js at "Server": "服務器" should be "伺服器"',
);
expect(result.errors).toContain(
'Non-Taiwan vocabulary in zh-TW.js at "Menu": "菜單" should be "選單"',
);
expect(result.errors).not.toContainEqual(
expect.stringContaining('at "Disable"'),
);
expect(result.errors).not.toContainEqual(
expect.stringContaining('at "Config"'),
);
});
it('returns no findings for clean Taiwan Traditional translations', () => {
const findings = findForbiddenZhTwPatterns({
Open: '開啟',
Server: '伺服器',
Menu: '選單',
Disable: '禁用',
Config: '配置',
Link: '連結',
History: '歷史',
});
expect(findings).toEqual([]);
});
it('reports only the most specific pattern per value (no duplicate findings)', () => {
// `历史` (Simplified) overlaps with the single-char pattern `历`.
// We expect exactly one finding for the longer/more specific pattern.
const findings = findForbiddenZhTwPatterns({
History: '历史',
});
expect(findings).toEqual([
{ key: 'History', pattern: '历史', preferred: '歷史' },
]);
});
it('detects the unused-keys JSON flag from argv or env', () => {
const originalArgv = process.argv;
const originalEnv = process.env['QWEN_CHECK_I18N_WRITE_UNUSED_KEYS'];
try {
process.argv = ['node', 'check-i18n.ts'];
delete process.env['QWEN_CHECK_I18N_WRITE_UNUSED_KEYS'];
expect(shouldWriteUnusedKeysJson()).toBe(false);
process.argv = ['node', 'check-i18n.ts', '--write-unused-locale-keys'];
expect(shouldWriteUnusedKeysJson()).toBe(true);
process.argv = ['node', 'check-i18n.ts'];
process.env['QWEN_CHECK_I18N_WRITE_UNUSED_KEYS'] = '1';
expect(shouldWriteUnusedKeysJson()).toBe(true);
} finally {
process.argv = originalArgv;
if (originalEnv === undefined) {
delete process.env['QWEN_CHECK_I18N_WRITE_UNUSED_KEYS'];
} else {
process.env['QWEN_CHECK_I18N_WRITE_UNUSED_KEYS'] = originalEnv;
}
}
});
});