opencode/packages/opencode/src/tool/read.ts
opencode-agent[bot] dd432e3cde chore: generate
2026-05-16 13:21:57 +00:00

341 lines
12 KiB
TypeScript

import { Effect, Option, Schema, Scope, Stream } from "effect"
import { NonNegativeInt } from "@opencode-ai/core/schema"
import * as path from "path"
import * as Tool from "./tool"
import { AppFileSystem } from "@opencode-ai/core/filesystem"
import { LSP } from "@/lsp/lsp"
import DESCRIPTION from "./read.txt"
import { InstanceState } from "@/effect/instance-state"
import { assertExternalDirectoryEffect } from "./external-directory"
import { Instruction } from "../session/instruction"
import { isPdfAttachment, sniffAttachmentMime } from "@/util/media"
import { Reference } from "@/reference/reference"
const DEFAULT_READ_LIMIT = 2000
const MAX_LINE_LENGTH = 2000
const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`
const MAX_BYTES = 50 * 1024
const MAX_BYTES_LABEL = `${MAX_BYTES / 1024} KB`
const SAMPLE_BYTES = 4096
const SUPPORTED_IMAGE_MIMES = new Set(["image/jpeg", "image/png", "image/gif", "image/webp"])
class ReadStop extends Schema.TaggedErrorClass<ReadStop>()("ReadStop", {}) {}
// `offset` and `limit` were originally `z.coerce.number()` — the runtime
// coercion was useful when the tool was called from a shell but serves no
// purpose in the LLM tool-call path (the model emits typed JSON). The JSON
// Schema output is identical (`type: "number"`), so the LLM view is
// unchanged; purely CLI-facing uses must now send numbers rather than strings.
export const Parameters = Schema.Struct({
filePath: Schema.String.annotate({ description: "The absolute path to the file or directory to read" }),
offset: Schema.optional(NonNegativeInt).annotate({
description: "The line number to start reading from (1-indexed)",
}),
limit: Schema.optional(NonNegativeInt).annotate({
description: "The maximum number of lines to read (defaults to 2000)",
}),
})
export const ReadTool = Tool.define(
"read",
Effect.gen(function* () {
const fs = yield* AppFileSystem.Service
const instruction = yield* Instruction.Service
const lsp = yield* LSP.Service
const reference = yield* Reference.Service
const scope = yield* Scope.Scope
const miss = Effect.fn("ReadTool.miss")(function* (filepath: string) {
const dir = path.dirname(filepath)
const base = path.basename(filepath)
const items = yield* fs.readDirectory(dir).pipe(
Effect.map((items) =>
items
.filter(
(item) =>
item.toLowerCase().includes(base.toLowerCase()) || base.toLowerCase().includes(item.toLowerCase()),
)
.map((item) => path.join(dir, item))
.slice(0, 3),
),
Effect.catch(() => Effect.succeed([] as string[])),
)
if (items.length > 0) {
return yield* Effect.fail(
new Error(`File not found: ${filepath}\n\nDid you mean one of these?\n${items.join("\n")}`),
)
}
return yield* Effect.fail(new Error(`File not found: ${filepath}`))
})
const list = Effect.fn("ReadTool.list")(function* (filepath: string) {
const items = yield* fs.readDirectoryEntries(filepath)
return yield* Effect.forEach(
items,
Effect.fnUntraced(function* (item) {
if (item.type === "directory") return item.name + "/"
if (item.type !== "symlink") return item.name
const target = yield* fs.stat(path.join(filepath, item.name)).pipe(Effect.catch(() => Effect.void))
if (target?.type === "Directory") return item.name + "/"
return item.name
}),
{ concurrency: "unbounded" },
).pipe(Effect.map((items: string[]) => items.sort((a, b) => a.localeCompare(b))))
})
const warm = Effect.fn("ReadTool.warm")(function* (filepath: string) {
yield* lsp.touchFile(filepath).pipe(Effect.ignore, Effect.forkIn(scope))
})
const readSample = Effect.fn("ReadTool.readSample")(function* (
filepath: string,
fileSize: number,
sampleSize: number,
) {
if (fileSize === 0) return new Uint8Array()
return yield* Effect.scoped(
Effect.gen(function* () {
const file = yield* fs.open(filepath, { flag: "r" })
return Option.getOrElse(yield* file.readAlloc(Math.min(sampleSize, fileSize)), () => new Uint8Array())
}),
)
})
const lines = Effect.fn("ReadTool.lines")(function* (filepath: string, opts: { limit: number; offset: number }) {
const start = opts.offset - 1
const raw: string[] = []
const flags = { bytes: 0, count: 0, cut: false, more: false, done: false }
// Note: prefer manual TextDecoder over Stream.decodeText — when the source stream
// ends without flushing, decodeText drops the final unterminated line. We also
// avoid Stream.runForEachWhile (it currently swallows the final unterminated
// line of the upstream splitLines pipeline) and use a tagged error to stop the
// upstream file stream as soon as the byte cap is reached.
const decoder = new TextDecoder("utf-8")
yield* fs.stream(filepath).pipe(
Stream.map((bytes) => decoder.decode(bytes, { stream: true })),
Stream.splitLines,
Stream.runForEach((text) =>
Effect.gen(function* () {
if (flags.done) return yield* new ReadStop()
flags.count += 1
if (flags.count <= start) return
if (raw.length >= opts.limit) {
flags.more = true
return
}
const line = text.length > MAX_LINE_LENGTH ? text.substring(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX : text
const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0)
if (flags.bytes + size <= MAX_BYTES) {
raw.push(line)
flags.bytes += size
return
}
flags.cut = true
flags.more = true
flags.done = true
return yield* new ReadStop()
}),
),
Effect.catchTag("ReadStop", () => Effect.void),
)
return { raw, count: flags.count, cut: flags.cut, more: flags.more, offset: opts.offset }
})
const isBinaryFile = (filepath: string, bytes: Uint8Array) => {
const ext = path.extname(filepath).toLowerCase()
switch (ext) {
case ".zip":
case ".tar":
case ".gz":
case ".exe":
case ".dll":
case ".so":
case ".class":
case ".jar":
case ".war":
case ".7z":
case ".doc":
case ".docx":
case ".xls":
case ".xlsx":
case ".ppt":
case ".pptx":
case ".odt":
case ".ods":
case ".odp":
case ".bin":
case ".dat":
case ".obj":
case ".o":
case ".a":
case ".lib":
case ".wasm":
case ".pyc":
case ".pyo":
return true
}
if (bytes.length === 0) return false
let nonPrintableCount = 0
for (let i = 0; i < bytes.length; i++) {
if (bytes[i] === 0) return true
if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
nonPrintableCount++
}
}
return nonPrintableCount / bytes.length > 0.3
}
const run = Effect.fn("ReadTool.execute")(function* (
params: Schema.Schema.Type<typeof Parameters>,
ctx: Tool.Context,
) {
const instance = yield* InstanceState.context
let filepath = params.filePath
if (!path.isAbsolute(filepath)) {
filepath = path.resolve(instance.directory, filepath)
}
if (process.platform === "win32") {
filepath = AppFileSystem.normalizePath(filepath)
}
yield* reference.ensure(filepath)
const title = path.relative(instance.worktree, filepath)
const stat = yield* fs.stat(filepath).pipe(
Effect.catchIf(
(err) => "reason" in err && err.reason._tag === "NotFound",
() => Effect.succeed(undefined),
),
)
yield* assertExternalDirectoryEffect(ctx, filepath, {
bypass: Boolean(ctx.extra?.["bypassCwdCheck"]) || (yield* reference.contains(filepath)),
kind: stat?.type === "Directory" ? "directory" : "file",
})
yield* ctx.ask({
permission: "read",
patterns: [path.relative(instance.worktree, filepath)],
always: ["*"],
metadata: {},
})
if (!stat) return yield* miss(filepath)
if (stat.type === "Directory") {
const items = yield* list(filepath)
const limit = params.limit ?? DEFAULT_READ_LIMIT
const offset = params.offset || 1
const start = offset - 1
const sliced = items.slice(start, start + limit)
const truncated = start + sliced.length < items.length
return {
title,
output: [
`<path>${filepath}</path>`,
`<type>directory</type>`,
`<entries>`,
sliced.join("\n"),
truncated
? `\n(Showing ${sliced.length} of ${items.length} entries. Use 'offset' parameter to read beyond entry ${offset + sliced.length})`
: `\n(${items.length} entries)`,
`</entries>`,
].join("\n"),
metadata: {
preview: sliced.slice(0, 20).join("\n"),
truncated,
loaded: [] as string[],
},
}
}
const loaded = yield* instruction.resolve(ctx.messages, filepath, ctx.messageID)
const sample = yield* readSample(filepath, Number(stat.size), SAMPLE_BYTES)
const mime = sniffAttachmentMime(sample, AppFileSystem.mimeType(filepath))
const isImage = SUPPORTED_IMAGE_MIMES.has(mime)
if (isImage || isPdfAttachment(mime)) {
const bytes = yield* fs.readFile(filepath)
const msg = isPdfAttachment(mime) ? "PDF read successfully" : "Image read successfully"
return {
title,
output: msg,
metadata: {
preview: msg,
truncated: false,
loaded: loaded.map((item) => item.filepath),
},
attachments: [
{
type: "file" as const,
mime,
url: `data:${mime};base64,${Buffer.from(bytes).toString("base64")}`,
},
],
}
}
if (isBinaryFile(filepath, sample)) {
return yield* Effect.fail(new Error(`Cannot read binary file: ${filepath}`))
}
const file = yield* lines(filepath, { limit: params.limit ?? DEFAULT_READ_LIMIT, offset: params.offset || 1 })
if (file.count < file.offset && !(file.count === 0 && file.offset === 1)) {
return yield* Effect.fail(
new Error(`Offset ${file.offset} is out of range for this file (${file.count} lines)`),
)
}
let output = [`<path>${filepath}</path>`, `<type>file</type>`, "<content>\n"].join("\n")
output += file.raw.map((line, i) => `${i + file.offset}: ${line}`).join("\n")
const last = file.offset + file.raw.length - 1
const next = last + 1
const truncated = file.more || file.cut
if (file.cut) {
output += `\n\n(Output capped at ${MAX_BYTES_LABEL}. Showing lines ${file.offset}-${last}. Use offset=${next} to continue.)`
} else if (file.more) {
output += `\n\n(Showing lines ${file.offset}-${last} of ${file.count}. Use offset=${next} to continue.)`
} else {
output += `\n\n(End of file - total ${file.count} lines)`
}
output += "\n</content>"
yield* warm(filepath)
if (loaded.length > 0) {
output += `\n\n<system-reminder>\n${loaded.map((item) => item.content).join("\n\n")}\n</system-reminder>`
}
return {
title,
output,
metadata: {
preview: file.raw.slice(0, 20).join("\n"),
truncated,
loaded: loaded.map((item) => item.filepath),
},
}
})
return {
description: DESCRIPTION,
parameters: Parameters,
execute: (params: Schema.Schema.Type<typeof Parameters>, ctx: Tool.Context) =>
run(params, ctx).pipe(Effect.orDie),
}
}),
)