refactor(encoding): consolidate system encoding fallback logic

Move system encoding fallback from detectEncodingFromBuffer into
getCachedEncodingForBuffer for clearer responsibility. Remove unused
WINDOWS_UTF8_CODE_PAGE export and inline the value.

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
tanzhenxin 2026-03-16 18:53:51 +08:00
parent e9facac111
commit f93e5f0d46
2 changed files with 8 additions and 13 deletions

View file

@ -73,7 +73,6 @@ vi.mock('../utils/shell-utils.js', () => ({
vi.mock('../utils/systemEncoding.js', () => ({
getCachedEncodingForBuffer: vi.fn().mockReturnValue('utf-8'),
getSystemEncoding: mockGetSystemEncoding,
WINDOWS_UTF8_CODE_PAGE: 65001,
}));
const mockProcessKill = vi

View file

@ -47,6 +47,13 @@ export function getCachedEncodingForBuffer(buffer: Buffer): string {
return detected;
}
if (cachedSystemEncoding === undefined) {
cachedSystemEncoding = getSystemEncoding();
}
if (cachedSystemEncoding) {
return cachedSystemEncoding;
}
// Last resort
return 'utf-8';
}
@ -134,8 +141,6 @@ export function getSystemEncoding(): string | null {
* @param cp The Windows code page number (e.g., 437, 850, etc.)
* @returns The corresponding encoding name as a string, or null if no mapping exists.
*/
/** Windows code page number for UTF-8. */
export const WINDOWS_UTF8_CODE_PAGE = 65001;
export function windowsCodePageToEncoding(cp: number): string | null {
// Most common mappings; extend as needed
@ -160,7 +165,7 @@ export function windowsCodePageToEncoding(cp: number): string | null {
1256: 'windows-1256',
1257: 'windows-1257',
1258: 'windows-1258',
[WINDOWS_UTF8_CODE_PAGE]: 'utf-8',
65001: 'utf-8',
};
if (map[cp]) {
@ -192,14 +197,5 @@ export function detectEncodingFromBuffer(buffer: Buffer): string | null {
debugLogger.warn('Failed to detect encoding with chardet:', error);
}
// Fall back to system encoding — catches cases where chardet fails
// (e.g. small GBK files that chardet misdetects as ISO-8859-2)
if (cachedSystemEncoding === undefined) {
cachedSystemEncoding = getSystemEncoding();
}
if (cachedSystemEncoding) {
return cachedSystemEncoding;
}
return null;
}