mirror of
https://github.com/AgentSeal/codeburn.git
synced 2026-05-17 03:56:45 +00:00
Fix node:sqlite V8 crash on invalid UTF-8 in text columns (#272)
node:sqlite calls v8::String::NewFromUtf8 with kAbort on TEXT columns. Cursor chat blobs often contain truncated multi-byte chars from streaming boundaries, which triggers a V8 CHECK abort (not a JS exception). Select all text-content columns as CAST(col AS BLOB) so node:sqlite returns Uint8Array instead. Decode in JS with TextDecoder fatal:false which replaces bad bytes with U+FFFD. Covers all three SQLite providers (Cursor, Goose, OpenCode). Removes the version blocklist (MIN_NODE_22_PATCH) and lowers engines requirement from >=22.20 to >=22 since the BLOB cast approach works on all Node 22.x versions. Closes #264 Closes #250
This commit is contained in:
parent
d142bd97ef
commit
02f4635cec
6 changed files with 101 additions and 76 deletions
|
|
@ -4,7 +4,7 @@ import { homedir } from 'os'
|
|||
|
||||
import { calculateCost } from '../models.js'
|
||||
import { readCachedResults, writeCachedResults } from '../cursor-cache.js'
|
||||
import { isSqliteAvailable, getSqliteLoadError, openDatabase, type SqliteDatabase } from '../sqlite.js'
|
||||
import { isSqliteAvailable, getSqliteLoadError, openDatabase, blobToText, type SqliteDatabase } from '../sqlite.js'
|
||||
import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js'
|
||||
|
||||
const CURSOR_COST_MODEL = 'claude-sonnet-4-5'
|
||||
|
|
@ -33,16 +33,16 @@ type BubbleRow = {
|
|||
model: string | null
|
||||
created_at: string | null
|
||||
conversation_id: string | null
|
||||
user_text: string | null
|
||||
user_text: Uint8Array | string | null
|
||||
text_length: number | null
|
||||
bubble_type: number | null
|
||||
code_blocks: string | null
|
||||
code_blocks: Uint8Array | string | null
|
||||
}
|
||||
|
||||
type AgentKvRow = {
|
||||
key: string
|
||||
role: string | null
|
||||
content: string | null
|
||||
content: Uint8Array | string | null
|
||||
request_id: string | null
|
||||
content_length: number
|
||||
}
|
||||
|
|
@ -291,10 +291,10 @@ const BUBBLE_QUERY_BASE = `
|
|||
json_extract(value, '$.modelInfo.modelName') as model,
|
||||
json_extract(value, '$.createdAt') as created_at,
|
||||
json_extract(value, '$.conversationId') as conversation_id,
|
||||
substr(json_extract(value, '$.text'), 1, 500) as user_text,
|
||||
CAST(substr(json_extract(value, '$.text'), 1, 500) AS BLOB) as user_text,
|
||||
length(json_extract(value, '$.text')) as text_length,
|
||||
json_extract(value, '$.type') as bubble_type,
|
||||
json_extract(value, '$.codeBlocks') as code_blocks
|
||||
CAST(json_extract(value, '$.codeBlocks') AS BLOB) as code_blocks
|
||||
FROM cursorDiskKV
|
||||
WHERE key LIKE 'bubbleId:%'
|
||||
`
|
||||
|
|
@ -303,7 +303,7 @@ const AGENTKV_QUERY = `
|
|||
SELECT
|
||||
key,
|
||||
json_extract(value, '$.role') as role,
|
||||
json_extract(value, '$.content') as content,
|
||||
CAST(json_extract(value, '$.content') AS BLOB) as content,
|
||||
json_extract(value, '$.providerOptions.cursor.requestId') as request_id,
|
||||
length(value) as content_length
|
||||
FROM cursorDiskKV
|
||||
|
|
@ -316,7 +316,7 @@ const USER_MESSAGES_QUERY = `
|
|||
SELECT
|
||||
json_extract(value, '$.conversationId') as conversation_id,
|
||||
json_extract(value, '$.createdAt') as created_at,
|
||||
substr(json_extract(value, '$.text'), 1, 500) as text
|
||||
CAST(substr(json_extract(value, '$.text'), 1, 500) AS BLOB) as text
|
||||
FROM cursorDiskKV
|
||||
WHERE key LIKE 'bubbleId:%'
|
||||
AND json_extract(value, '$.type') = 1
|
||||
|
|
@ -346,7 +346,7 @@ function validateSchema(db: SqliteDatabase): boolean {
|
|||
}
|
||||
}
|
||||
|
||||
type UserMsgRow = { conversation_id: string; created_at: string; text: string }
|
||||
type UserMsgRow = { conversation_id: string; created_at: string; text: Uint8Array | string }
|
||||
|
||||
/// Per-conversation user-message buffer. We pop messages in arrival order via
|
||||
/// the `pos` cursor — a previous implementation called Array.shift() which is
|
||||
|
|
@ -363,11 +363,12 @@ function buildUserMessageMap(db: SqliteDatabase, timeFloor: string): Map<string,
|
|||
const rows = db.query<UserMsgRow>(USER_MESSAGES_QUERY, [timeFloor])
|
||||
for (const row of rows) {
|
||||
if (!row.conversation_id || !row.text) continue
|
||||
const text = blobToText(row.text)
|
||||
const existing = map.get(row.conversation_id)
|
||||
if (existing) {
|
||||
existing.messages.push(row.text)
|
||||
existing.messages.push(text)
|
||||
} else {
|
||||
map.set(row.conversation_id, { messages: [row.text], pos: 0 })
|
||||
map.set(row.conversation_id, { messages: [text], pos: 0 })
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
|
|
@ -488,10 +489,10 @@ function parseBubbles(db: SqliteDatabase, seenKeys: Set<string>): { calls: Parse
|
|||
|
||||
const timestamp = createdAt || new Date().toISOString()
|
||||
const userQuestion = takeUserMessage(userMessages, conversationId)
|
||||
const assistantText = row.user_text ?? ''
|
||||
const assistantText = blobToText(row.user_text)
|
||||
const userText = (userQuestion + ' ' + assistantText).trim()
|
||||
|
||||
const languages = extractLanguages(row.code_blocks)
|
||||
const languages = extractLanguages(blobToText(row.code_blocks))
|
||||
const hasCode = languages.length > 0
|
||||
|
||||
const cursorTools: string[] = hasCode ? ['cursor:edit', ...languages.map(l => `lang:${l}`)] : []
|
||||
|
|
@ -572,20 +573,21 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set<string>, dbPath: string)
|
|||
|
||||
for (const row of rows) {
|
||||
if (!row.role || !row.content) continue
|
||||
const contentText = blobToText(row.content)
|
||||
|
||||
let content: AgentKvContent[]
|
||||
let plainTextLength = 0
|
||||
try {
|
||||
const parsed = JSON.parse(row.content)
|
||||
const parsed = JSON.parse(contentText)
|
||||
if (Array.isArray(parsed)) {
|
||||
content = parsed
|
||||
} else {
|
||||
content = []
|
||||
plainTextLength = row.content.length
|
||||
plainTextLength = contentText.length
|
||||
}
|
||||
} catch {
|
||||
content = []
|
||||
plainTextLength = row.content.length
|
||||
plainTextLength = contentText.length
|
||||
}
|
||||
|
||||
const requestId = row.request_id ?? currentRequestId
|
||||
|
|
@ -601,7 +603,7 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set<string>, dbPath: string)
|
|||
const existing = sessions.get(requestId) ?? { inputChars: 0, outputChars: 0, model: null, userText: '' }
|
||||
existing.inputChars += textLength
|
||||
if (!existing.userText) {
|
||||
const text = content[0]?.text ?? row.content
|
||||
const text = content[0]?.text ?? contentText
|
||||
const queryMatch = text.match(/<user_query>([\s\S]*?)<\/user_query>/)
|
||||
existing.userText = queryMatch ? queryMatch[1].trim().slice(0, 500) : text.slice(0, 500)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import { homedir, platform } from 'os'
|
|||
|
||||
import { calculateCost, getShortModelName } from '../models.js'
|
||||
import { extractBashCommands } from '../bash-utils.js'
|
||||
import { isSqliteAvailable, getSqliteLoadError, openDatabase, type SqliteDatabase } from '../sqlite.js'
|
||||
import { isSqliteAvailable, getSqliteLoadError, openDatabase, blobToText, type SqliteDatabase } from '../sqlite.js'
|
||||
import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js'
|
||||
|
||||
type SessionRow = {
|
||||
|
|
@ -15,7 +15,7 @@ type SessionRow = {
|
|||
accumulated_input_tokens: number | null
|
||||
accumulated_output_tokens: number | null
|
||||
provider_name: string | null
|
||||
model_config_json: string | null
|
||||
model_config_json: Uint8Array | string | null
|
||||
}
|
||||
|
||||
type ModelConfig = {
|
||||
|
|
@ -26,7 +26,7 @@ type ModelConfig = {
|
|||
type MessageRow = {
|
||||
message_id: string
|
||||
role: string
|
||||
content_json: string
|
||||
content_json: Uint8Array | string
|
||||
created_timestamp: number
|
||||
}
|
||||
|
||||
|
|
@ -86,15 +86,15 @@ function extractToolsFromMessages(db: SqliteDatabase, sessionId: string): { tool
|
|||
const seen = new Set<string>()
|
||||
|
||||
try {
|
||||
const rows = db.query<{ content_json: string }>(
|
||||
"SELECT content_json FROM messages WHERE session_id = ? AND role = 'assistant' AND content_json LIKE '%toolRequest%'",
|
||||
const rows = db.query<{ content_json: Uint8Array | string }>(
|
||||
"SELECT CAST(content_json AS BLOB) AS content_json FROM messages WHERE session_id = ? AND role = 'assistant' AND content_json LIKE '%toolRequest%'",
|
||||
[sessionId],
|
||||
)
|
||||
|
||||
for (const row of rows) {
|
||||
let items: ContentItem[]
|
||||
try {
|
||||
items = JSON.parse(row.content_json) as ContentItem[]
|
||||
items = JSON.parse(blobToText(row.content_json)) as ContentItem[]
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
|
|
@ -124,12 +124,12 @@ function extractToolsFromMessages(db: SqliteDatabase, sessionId: string): { tool
|
|||
|
||||
function getFirstUserMessage(db: SqliteDatabase, sessionId: string): string {
|
||||
try {
|
||||
const rows = db.query<{ content_json: string }>(
|
||||
"SELECT content_json FROM messages WHERE session_id = ? AND role = 'user' ORDER BY created_timestamp ASC LIMIT 1",
|
||||
const rows = db.query<{ content_json: Uint8Array | string }>(
|
||||
"SELECT CAST(content_json AS BLOB) AS content_json FROM messages WHERE session_id = ? AND role = 'user' ORDER BY created_timestamp ASC LIMIT 1",
|
||||
[sessionId],
|
||||
)
|
||||
if (rows.length === 0) return ''
|
||||
const items = JSON.parse(rows[0]!.content_json) as ContentItem[]
|
||||
const items = JSON.parse(blobToText(rows[0]!.content_json)) as ContentItem[]
|
||||
const text = items.find(i => i.type === 'text') as { text?: string } | undefined
|
||||
return (text?.text ?? '').slice(0, 500)
|
||||
} catch {
|
||||
|
|
@ -161,7 +161,7 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
|
|||
if (!validateSchema(db)) return
|
||||
|
||||
const rows = db.query<SessionRow>(
|
||||
'SELECT id, name, working_dir, created_at, updated_at, accumulated_input_tokens, accumulated_output_tokens, provider_name, model_config_json FROM sessions WHERE id = ?',
|
||||
'SELECT id, name, working_dir, created_at, updated_at, accumulated_input_tokens, accumulated_output_tokens, provider_name, CAST(model_config_json AS BLOB) AS model_config_json FROM sessions WHERE id = ?',
|
||||
[sessionId],
|
||||
)
|
||||
if (rows.length === 0) return
|
||||
|
|
@ -175,7 +175,7 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
|
|||
if (seenKeys.has(dedupKey)) return
|
||||
seenKeys.add(dedupKey)
|
||||
|
||||
const config = parseModelConfig(session.model_config_json)
|
||||
const config = parseModelConfig(blobToText(session.model_config_json))
|
||||
const model = config.model_name ?? 'unknown'
|
||||
const costUSD = calculateCost(model, inputTokens, outputTokens, 0, 0, 0)
|
||||
|
||||
|
|
@ -223,7 +223,7 @@ async function discoverFromDb(dbPath: string): Promise<SessionSource[]> {
|
|||
|
||||
try {
|
||||
const rows = db.query<SessionRow>(
|
||||
'SELECT id, name, working_dir, created_at, updated_at, accumulated_input_tokens, accumulated_output_tokens, provider_name, model_config_json FROM sessions ORDER BY updated_at DESC',
|
||||
'SELECT id, name, working_dir, created_at, updated_at, accumulated_input_tokens, accumulated_output_tokens, provider_name, CAST(model_config_json AS BLOB) AS model_config_json FROM sessions ORDER BY updated_at DESC',
|
||||
)
|
||||
|
||||
return rows
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import { homedir } from 'os'
|
|||
|
||||
import { calculateCost, getShortModelName } from '../models.js'
|
||||
import { extractBashCommands } from '../bash-utils.js'
|
||||
import { isSqliteAvailable, getSqliteLoadError, openDatabase, type SqliteDatabase } from '../sqlite.js'
|
||||
import { isSqliteAvailable, getSqliteLoadError, openDatabase, blobToText, type SqliteDatabase } from '../sqlite.js'
|
||||
import type {
|
||||
Provider,
|
||||
SessionSource,
|
||||
|
|
@ -15,18 +15,18 @@ import type {
|
|||
type MessageRow = {
|
||||
id: string
|
||||
time_created: number
|
||||
data: string
|
||||
data: Uint8Array | string
|
||||
}
|
||||
|
||||
type PartRow = {
|
||||
message_id: string
|
||||
data: string
|
||||
data: Uint8Array | string
|
||||
}
|
||||
|
||||
type SessionRow = {
|
||||
id: string
|
||||
directory: string
|
||||
title: string
|
||||
directory: Uint8Array | string
|
||||
title: Uint8Array | string
|
||||
time_created: number
|
||||
}
|
||||
|
||||
|
|
@ -169,19 +169,19 @@ function createParser(
|
|||
}
|
||||
|
||||
const messages = db.query<MessageRow>(
|
||||
'SELECT id, time_created, data FROM message WHERE session_id = ? ORDER BY time_created ASC',
|
||||
'SELECT id, time_created, CAST(data AS BLOB) AS data FROM message WHERE session_id = ? ORDER BY time_created ASC',
|
||||
[sessionId],
|
||||
)
|
||||
|
||||
const parts = db.query<PartRow>(
|
||||
'SELECT message_id, data FROM part WHERE session_id = ? ORDER BY message_id, id',
|
||||
'SELECT message_id, CAST(data AS BLOB) AS data FROM part WHERE session_id = ? ORDER BY message_id, id',
|
||||
[sessionId],
|
||||
)
|
||||
|
||||
const partsByMsg = new Map<string, PartData[]>()
|
||||
for (const part of parts) {
|
||||
try {
|
||||
const parsed = JSON.parse(part.data) as PartData
|
||||
const parsed = JSON.parse(blobToText(part.data)) as PartData
|
||||
const list = partsByMsg.get(part.message_id) ?? []
|
||||
list.push(parsed)
|
||||
partsByMsg.set(part.message_id, list)
|
||||
|
|
@ -195,7 +195,7 @@ function createParser(
|
|||
for (const msg of messages) {
|
||||
let data: MessageData
|
||||
try {
|
||||
data = JSON.parse(msg.data) as MessageData
|
||||
data = JSON.parse(blobToText(msg.data)) as MessageData
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
|
|
@ -294,14 +294,18 @@ async function discoverFromDb(dbPath: string): Promise<SessionSource[]> {
|
|||
|
||||
try {
|
||||
const rows = db.query<SessionRow>(
|
||||
'SELECT id, directory, title, time_created FROM session WHERE time_archived IS NULL AND parent_id IS NULL ORDER BY time_created DESC',
|
||||
'SELECT id, CAST(directory AS BLOB) AS directory, CAST(title AS BLOB) AS title, time_created FROM session WHERE time_archived IS NULL AND parent_id IS NULL ORDER BY time_created DESC',
|
||||
)
|
||||
|
||||
return rows.map((row) => ({
|
||||
path: `${dbPath}:${row.id}`,
|
||||
project: row.directory ? sanitize(row.directory) : sanitize(row.title),
|
||||
provider: 'opencode',
|
||||
}))
|
||||
return rows.map((row) => {
|
||||
const dir = blobToText(row.directory)
|
||||
const title = blobToText(row.title)
|
||||
return {
|
||||
path: `${dbPath}:${row.id}`,
|
||||
project: dir ? sanitize(dir) : sanitize(title),
|
||||
provider: 'opencode',
|
||||
}
|
||||
})
|
||||
} catch {
|
||||
return []
|
||||
} finally {
|
||||
|
|
|
|||
|
|
@ -23,29 +23,18 @@ let DatabaseSync: DatabaseSyncCtor | null = null
|
|||
let loadAttempted = false
|
||||
let loadError: string | null = null
|
||||
|
||||
/// Minimum Node 22.x patch version that contains the node:sqlite UTF-8 fix.
|
||||
/// Older 22.x lines crash with `Check failed: (location_) != nullptr` when a
|
||||
/// SQLite TEXT column returns bytes that V8's String::NewFromUtf8 rejects —
|
||||
/// commonly the case for Cursor's text blobs (truncated multi-byte chars at
|
||||
/// streaming boundaries) and OpenCode message text (rich tooling output).
|
||||
/// Track of issue: https://github.com/getagentseal/codeburn/issues/264
|
||||
/// Track of upstream: https://github.com/nodejs/node — fix landed in 22.x via
|
||||
/// later patches; stable on Node 24+.
|
||||
const MIN_NODE_22_PATCH = 20
|
||||
const textDecoder = new TextDecoder('utf-8', { fatal: false })
|
||||
|
||||
function checkBuggyNodeVersion(): string | null {
|
||||
const match = /^v(\d+)\.(\d+)\.(\d+)/.exec(process.version)
|
||||
if (!match) return null
|
||||
const major = parseInt(match[1]!, 10)
|
||||
const minor = parseInt(match[2]!, 10)
|
||||
if (major === 22 && minor < MIN_NODE_22_PATCH) {
|
||||
return (
|
||||
`codeburn: Node ${process.version} ships an older node:sqlite that crashes on ` +
|
||||
`non-UTF-8 bytes in Cursor/OpenCode session text. Upgrade to Node 22.${MIN_NODE_22_PATCH}+ ` +
|
||||
`or 24+ to avoid the V8 fatal error. (https://nodejs.org)`
|
||||
)
|
||||
}
|
||||
return null
|
||||
/// Safely decode a BLOB column (Uint8Array) to a UTF-8 string. Node's
|
||||
/// node:sqlite crashes with a V8 CHECK abort when a TEXT column contains
|
||||
/// invalid UTF-8 (common in Cursor chat blobs with truncated multi-byte
|
||||
/// chars). By selecting those columns as `CAST(... AS BLOB)` in SQL, we
|
||||
/// get a Uint8Array here and decode it in JS where bad bytes become the
|
||||
/// U+FFFD replacement character instead of aborting the process.
|
||||
export function blobToText(value: Uint8Array | string | null | undefined): string {
|
||||
if (value == null) return ''
|
||||
if (typeof value === 'string') return value
|
||||
return textDecoder.decode(value)
|
||||
}
|
||||
|
||||
/// Lazily imports `node:sqlite`. On Node 22/23 it emits an ExperimentalWarning the first
|
||||
|
|
@ -56,15 +45,6 @@ function loadDriver(): boolean {
|
|||
if (loadAttempted) return DatabaseSync !== null
|
||||
loadAttempted = true
|
||||
|
||||
// Refuse to load on a Node version known to crash mid-query. Treating the
|
||||
// SQLite providers as unavailable is much friendlier than letting the user
|
||||
// hit a V8 CHECK abort that takes down the whole CLI.
|
||||
const versionWarning = checkBuggyNodeVersion()
|
||||
if (versionWarning !== null) {
|
||||
loadError = versionWarning
|
||||
return false
|
||||
}
|
||||
|
||||
const origEmit = process.emit.bind(process)
|
||||
let restored = false
|
||||
const restore = () => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue