Fix node:sqlite V8 crash on invalid UTF-8 in text columns (#272)

node:sqlite calls v8::String::NewFromUtf8 with kAbort on TEXT columns.
Cursor chat blobs often contain truncated multi-byte chars from streaming
boundaries, which triggers a V8 CHECK abort (not a JS exception).

Select all text-content columns as CAST(col AS BLOB) so node:sqlite
returns Uint8Array instead. Decode in JS with TextDecoder fatal:false
which replaces bad bytes with U+FFFD. Covers all three SQLite providers
(Cursor, Goose, OpenCode).

Removes the version blocklist (MIN_NODE_22_PATCH) and lowers engines
requirement from >=22.20 to >=22 since the BLOB cast approach works
on all Node 22.x versions.

Closes #264
Closes #250
This commit is contained in:
Resham Joshi 2026-05-10 17:05:08 -07:00 committed by GitHub
parent d142bd97ef
commit 02f4635cec
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 101 additions and 76 deletions

View file

@ -30,7 +30,7 @@
"developer-tools"
],
"engines": {
"node": ">=22.20"
"node": ">=22"
},
"author": "AgentSeal <hello@agentseal.org>",
"license": "MIT",

View file

@ -4,7 +4,7 @@ import { homedir } from 'os'
import { calculateCost } from '../models.js'
import { readCachedResults, writeCachedResults } from '../cursor-cache.js'
import { isSqliteAvailable, getSqliteLoadError, openDatabase, type SqliteDatabase } from '../sqlite.js'
import { isSqliteAvailable, getSqliteLoadError, openDatabase, blobToText, type SqliteDatabase } from '../sqlite.js'
import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js'
const CURSOR_COST_MODEL = 'claude-sonnet-4-5'
@ -33,16 +33,16 @@ type BubbleRow = {
model: string | null
created_at: string | null
conversation_id: string | null
user_text: string | null
user_text: Uint8Array | string | null
text_length: number | null
bubble_type: number | null
code_blocks: string | null
code_blocks: Uint8Array | string | null
}
type AgentKvRow = {
key: string
role: string | null
content: string | null
content: Uint8Array | string | null
request_id: string | null
content_length: number
}
@ -291,10 +291,10 @@ const BUBBLE_QUERY_BASE = `
json_extract(value, '$.modelInfo.modelName') as model,
json_extract(value, '$.createdAt') as created_at,
json_extract(value, '$.conversationId') as conversation_id,
substr(json_extract(value, '$.text'), 1, 500) as user_text,
CAST(substr(json_extract(value, '$.text'), 1, 500) AS BLOB) as user_text,
length(json_extract(value, '$.text')) as text_length,
json_extract(value, '$.type') as bubble_type,
json_extract(value, '$.codeBlocks') as code_blocks
CAST(json_extract(value, '$.codeBlocks') AS BLOB) as code_blocks
FROM cursorDiskKV
WHERE key LIKE 'bubbleId:%'
`
@ -303,7 +303,7 @@ const AGENTKV_QUERY = `
SELECT
key,
json_extract(value, '$.role') as role,
json_extract(value, '$.content') as content,
CAST(json_extract(value, '$.content') AS BLOB) as content,
json_extract(value, '$.providerOptions.cursor.requestId') as request_id,
length(value) as content_length
FROM cursorDiskKV
@ -316,7 +316,7 @@ const USER_MESSAGES_QUERY = `
SELECT
json_extract(value, '$.conversationId') as conversation_id,
json_extract(value, '$.createdAt') as created_at,
substr(json_extract(value, '$.text'), 1, 500) as text
CAST(substr(json_extract(value, '$.text'), 1, 500) AS BLOB) as text
FROM cursorDiskKV
WHERE key LIKE 'bubbleId:%'
AND json_extract(value, '$.type') = 1
@ -346,7 +346,7 @@ function validateSchema(db: SqliteDatabase): boolean {
}
}
type UserMsgRow = { conversation_id: string; created_at: string; text: string }
type UserMsgRow = { conversation_id: string; created_at: string; text: Uint8Array | string }
/// Per-conversation user-message buffer. We pop messages in arrival order via
/// the `pos` cursor — a previous implementation called Array.shift() which is
@ -363,11 +363,12 @@ function buildUserMessageMap(db: SqliteDatabase, timeFloor: string): Map<string,
const rows = db.query<UserMsgRow>(USER_MESSAGES_QUERY, [timeFloor])
for (const row of rows) {
if (!row.conversation_id || !row.text) continue
const text = blobToText(row.text)
const existing = map.get(row.conversation_id)
if (existing) {
existing.messages.push(row.text)
existing.messages.push(text)
} else {
map.set(row.conversation_id, { messages: [row.text], pos: 0 })
map.set(row.conversation_id, { messages: [text], pos: 0 })
}
}
} catch {}
@ -488,10 +489,10 @@ function parseBubbles(db: SqliteDatabase, seenKeys: Set<string>): { calls: Parse
const timestamp = createdAt || new Date().toISOString()
const userQuestion = takeUserMessage(userMessages, conversationId)
const assistantText = row.user_text ?? ''
const assistantText = blobToText(row.user_text)
const userText = (userQuestion + ' ' + assistantText).trim()
const languages = extractLanguages(row.code_blocks)
const languages = extractLanguages(blobToText(row.code_blocks))
const hasCode = languages.length > 0
const cursorTools: string[] = hasCode ? ['cursor:edit', ...languages.map(l => `lang:${l}`)] : []
@ -572,20 +573,21 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set<string>, dbPath: string)
for (const row of rows) {
if (!row.role || !row.content) continue
const contentText = blobToText(row.content)
let content: AgentKvContent[]
let plainTextLength = 0
try {
const parsed = JSON.parse(row.content)
const parsed = JSON.parse(contentText)
if (Array.isArray(parsed)) {
content = parsed
} else {
content = []
plainTextLength = row.content.length
plainTextLength = contentText.length
}
} catch {
content = []
plainTextLength = row.content.length
plainTextLength = contentText.length
}
const requestId = row.request_id ?? currentRequestId
@ -601,7 +603,7 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set<string>, dbPath: string)
const existing = sessions.get(requestId) ?? { inputChars: 0, outputChars: 0, model: null, userText: '' }
existing.inputChars += textLength
if (!existing.userText) {
const text = content[0]?.text ?? row.content
const text = content[0]?.text ?? contentText
const queryMatch = text.match(/<user_query>([\s\S]*?)<\/user_query>/)
existing.userText = queryMatch ? queryMatch[1].trim().slice(0, 500) : text.slice(0, 500)
}

View file

@ -3,7 +3,7 @@ import { homedir, platform } from 'os'
import { calculateCost, getShortModelName } from '../models.js'
import { extractBashCommands } from '../bash-utils.js'
import { isSqliteAvailable, getSqliteLoadError, openDatabase, type SqliteDatabase } from '../sqlite.js'
import { isSqliteAvailable, getSqliteLoadError, openDatabase, blobToText, type SqliteDatabase } from '../sqlite.js'
import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js'
type SessionRow = {
@ -15,7 +15,7 @@ type SessionRow = {
accumulated_input_tokens: number | null
accumulated_output_tokens: number | null
provider_name: string | null
model_config_json: string | null
model_config_json: Uint8Array | string | null
}
type ModelConfig = {
@ -26,7 +26,7 @@ type ModelConfig = {
type MessageRow = {
message_id: string
role: string
content_json: string
content_json: Uint8Array | string
created_timestamp: number
}
@ -86,15 +86,15 @@ function extractToolsFromMessages(db: SqliteDatabase, sessionId: string): { tool
const seen = new Set<string>()
try {
const rows = db.query<{ content_json: string }>(
"SELECT content_json FROM messages WHERE session_id = ? AND role = 'assistant' AND content_json LIKE '%toolRequest%'",
const rows = db.query<{ content_json: Uint8Array | string }>(
"SELECT CAST(content_json AS BLOB) AS content_json FROM messages WHERE session_id = ? AND role = 'assistant' AND content_json LIKE '%toolRequest%'",
[sessionId],
)
for (const row of rows) {
let items: ContentItem[]
try {
items = JSON.parse(row.content_json) as ContentItem[]
items = JSON.parse(blobToText(row.content_json)) as ContentItem[]
} catch {
continue
}
@ -124,12 +124,12 @@ function extractToolsFromMessages(db: SqliteDatabase, sessionId: string): { tool
function getFirstUserMessage(db: SqliteDatabase, sessionId: string): string {
try {
const rows = db.query<{ content_json: string }>(
"SELECT content_json FROM messages WHERE session_id = ? AND role = 'user' ORDER BY created_timestamp ASC LIMIT 1",
const rows = db.query<{ content_json: Uint8Array | string }>(
"SELECT CAST(content_json AS BLOB) AS content_json FROM messages WHERE session_id = ? AND role = 'user' ORDER BY created_timestamp ASC LIMIT 1",
[sessionId],
)
if (rows.length === 0) return ''
const items = JSON.parse(rows[0]!.content_json) as ContentItem[]
const items = JSON.parse(blobToText(rows[0]!.content_json)) as ContentItem[]
const text = items.find(i => i.type === 'text') as { text?: string } | undefined
return (text?.text ?? '').slice(0, 500)
} catch {
@ -161,7 +161,7 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
if (!validateSchema(db)) return
const rows = db.query<SessionRow>(
'SELECT id, name, working_dir, created_at, updated_at, accumulated_input_tokens, accumulated_output_tokens, provider_name, model_config_json FROM sessions WHERE id = ?',
'SELECT id, name, working_dir, created_at, updated_at, accumulated_input_tokens, accumulated_output_tokens, provider_name, CAST(model_config_json AS BLOB) AS model_config_json FROM sessions WHERE id = ?',
[sessionId],
)
if (rows.length === 0) return
@ -175,7 +175,7 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
if (seenKeys.has(dedupKey)) return
seenKeys.add(dedupKey)
const config = parseModelConfig(session.model_config_json)
const config = parseModelConfig(blobToText(session.model_config_json))
const model = config.model_name ?? 'unknown'
const costUSD = calculateCost(model, inputTokens, outputTokens, 0, 0, 0)
@ -223,7 +223,7 @@ async function discoverFromDb(dbPath: string): Promise<SessionSource[]> {
try {
const rows = db.query<SessionRow>(
'SELECT id, name, working_dir, created_at, updated_at, accumulated_input_tokens, accumulated_output_tokens, provider_name, model_config_json FROM sessions ORDER BY updated_at DESC',
'SELECT id, name, working_dir, created_at, updated_at, accumulated_input_tokens, accumulated_output_tokens, provider_name, CAST(model_config_json AS BLOB) AS model_config_json FROM sessions ORDER BY updated_at DESC',
)
return rows

View file

@ -4,7 +4,7 @@ import { homedir } from 'os'
import { calculateCost, getShortModelName } from '../models.js'
import { extractBashCommands } from '../bash-utils.js'
import { isSqliteAvailable, getSqliteLoadError, openDatabase, type SqliteDatabase } from '../sqlite.js'
import { isSqliteAvailable, getSqliteLoadError, openDatabase, blobToText, type SqliteDatabase } from '../sqlite.js'
import type {
Provider,
SessionSource,
@ -15,18 +15,18 @@ import type {
type MessageRow = {
id: string
time_created: number
data: string
data: Uint8Array | string
}
type PartRow = {
message_id: string
data: string
data: Uint8Array | string
}
type SessionRow = {
id: string
directory: string
title: string
directory: Uint8Array | string
title: Uint8Array | string
time_created: number
}
@ -169,19 +169,19 @@ function createParser(
}
const messages = db.query<MessageRow>(
'SELECT id, time_created, data FROM message WHERE session_id = ? ORDER BY time_created ASC',
'SELECT id, time_created, CAST(data AS BLOB) AS data FROM message WHERE session_id = ? ORDER BY time_created ASC',
[sessionId],
)
const parts = db.query<PartRow>(
'SELECT message_id, data FROM part WHERE session_id = ? ORDER BY message_id, id',
'SELECT message_id, CAST(data AS BLOB) AS data FROM part WHERE session_id = ? ORDER BY message_id, id',
[sessionId],
)
const partsByMsg = new Map<string, PartData[]>()
for (const part of parts) {
try {
const parsed = JSON.parse(part.data) as PartData
const parsed = JSON.parse(blobToText(part.data)) as PartData
const list = partsByMsg.get(part.message_id) ?? []
list.push(parsed)
partsByMsg.set(part.message_id, list)
@ -195,7 +195,7 @@ function createParser(
for (const msg of messages) {
let data: MessageData
try {
data = JSON.parse(msg.data) as MessageData
data = JSON.parse(blobToText(msg.data)) as MessageData
} catch {
continue
}
@ -294,14 +294,18 @@ async function discoverFromDb(dbPath: string): Promise<SessionSource[]> {
try {
const rows = db.query<SessionRow>(
'SELECT id, directory, title, time_created FROM session WHERE time_archived IS NULL AND parent_id IS NULL ORDER BY time_created DESC',
'SELECT id, CAST(directory AS BLOB) AS directory, CAST(title AS BLOB) AS title, time_created FROM session WHERE time_archived IS NULL AND parent_id IS NULL ORDER BY time_created DESC',
)
return rows.map((row) => ({
path: `${dbPath}:${row.id}`,
project: row.directory ? sanitize(row.directory) : sanitize(row.title),
provider: 'opencode',
}))
return rows.map((row) => {
const dir = blobToText(row.directory)
const title = blobToText(row.title)
return {
path: `${dbPath}:${row.id}`,
project: dir ? sanitize(dir) : sanitize(title),
provider: 'opencode',
}
})
} catch {
return []
} finally {

View file

@ -23,29 +23,18 @@ let DatabaseSync: DatabaseSyncCtor | null = null
let loadAttempted = false
let loadError: string | null = null
/// Minimum Node 22.x patch version that contains the node:sqlite UTF-8 fix.
/// Older 22.x lines crash with `Check failed: (location_) != nullptr` when a
/// SQLite TEXT column returns bytes that V8's String::NewFromUtf8 rejects —
/// commonly the case for Cursor's text blobs (truncated multi-byte chars at
/// streaming boundaries) and OpenCode message text (rich tooling output).
/// Track of issue: https://github.com/getagentseal/codeburn/issues/264
/// Track of upstream: https://github.com/nodejs/node — fix landed in 22.x via
/// later patches; stable on Node 24+.
const MIN_NODE_22_PATCH = 20
const textDecoder = new TextDecoder('utf-8', { fatal: false })
function checkBuggyNodeVersion(): string | null {
const match = /^v(\d+)\.(\d+)\.(\d+)/.exec(process.version)
if (!match) return null
const major = parseInt(match[1]!, 10)
const minor = parseInt(match[2]!, 10)
if (major === 22 && minor < MIN_NODE_22_PATCH) {
return (
`codeburn: Node ${process.version} ships an older node:sqlite that crashes on ` +
`non-UTF-8 bytes in Cursor/OpenCode session text. Upgrade to Node 22.${MIN_NODE_22_PATCH}+ ` +
`or 24+ to avoid the V8 fatal error. (https://nodejs.org)`
)
}
return null
/// Safely decode a BLOB column (Uint8Array) to a UTF-8 string. Node's
/// node:sqlite crashes with a V8 CHECK abort when a TEXT column contains
/// invalid UTF-8 (common in Cursor chat blobs with truncated multi-byte
/// chars). By selecting those columns as `CAST(... AS BLOB)` in SQL, we
/// get a Uint8Array here and decode it in JS where bad bytes become the
/// U+FFFD replacement character instead of aborting the process.
export function blobToText(value: Uint8Array | string | null | undefined): string {
if (value == null) return ''
if (typeof value === 'string') return value
return textDecoder.decode(value)
}
/// Lazily imports `node:sqlite`. On Node 22/23 it emits an ExperimentalWarning the first
@ -56,15 +45,6 @@ function loadDriver(): boolean {
if (loadAttempted) return DatabaseSync !== null
loadAttempted = true
// Refuse to load on a Node version known to crash mid-query. Treating the
// SQLite providers as unavailable is much friendlier than letting the user
// hit a V8 CHECK abort that takes down the whole CLI.
const versionWarning = checkBuggyNodeVersion()
if (versionWarning !== null) {
loadError = versionWarning
return false
}
const origEmit = process.emit.bind(process)
let restored = false
const restore = () => {

View file

@ -0,0 +1,39 @@
import { describe, it, expect } from 'vitest'
import { blobToText } from '../src/sqlite.js'
describe('blobToText', () => {
it('returns empty string for null', () => {
expect(blobToText(null)).toBe('')
})
it('returns empty string for undefined', () => {
expect(blobToText(undefined)).toBe('')
})
it('passes through strings unchanged', () => {
expect(blobToText('hello world')).toBe('hello world')
})
it('decodes valid UTF-8 Uint8Array', () => {
const buf = new TextEncoder().encode('café ☕')
expect(blobToText(buf)).toBe('café ☕')
})
it('replaces invalid UTF-8 bytes with U+FFFD instead of crashing', () => {
const buf = new Uint8Array([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x80, 0xfe])
const result = blobToText(buf)
expect(result).toContain('Hello')
expect(result).toContain('<27>')
})
it('handles truncated multi-byte sequence', () => {
// é in UTF-8 is [0xc3, 0xa9]. Truncate to just [0xc3].
const buf = new Uint8Array([0x63, 0x61, 0x66, 0xc3])
const result = blobToText(buf)
expect(result).toBe('caf<61>')
})
it('handles empty Uint8Array', () => {
expect(blobToText(new Uint8Array(0))).toBe('')
})
})