openclaw/src/context-engine/types.ts
Josh Lehman 75e7fc97f8
fix: preserve runtime token budget in deferred context-engine maintenance (#66820)
* fix(context-engine): pass deferred maintenance token budget

Thread tokenBudget through the after-turn runtime context so background context-engine maintenance reuses the real model context window instead of falling back to 128k. Also pass through a best-effort currentTokenCount from the latest call total and make the runtime context type explicit about both fields.

Regeneration-Prompt: |
  OpenClaw already passed the real context token budget into direct context-engine calls like afterTurn and assemble, but deferred maintain() reused only the runtimeContext object and that object did not carry tokenBudget. Lossless Claw therefore fell back to 128k during background maintenance, which made budget-trigger fire much more aggressively than the live model context warranted. Thread the real contextTokenBudget into buildAfterTurnRuntimeContext so deferred maintenance receives the same budget, and pass a straightforward best-effort currentTokenCount from the latest call total while the relevant data is already in scope. Keep the change additive, update the runtime-context type, and cover the background maintenance/runtime-context behavior with focused tests.

* fix(context-engine): use prompt usage for deferred maintenance
2026-04-14 15:30:37 -07:00

297 lines
9.4 KiB
TypeScript

import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { MemoryCitationsMode } from "../config/types.memory.js";
// Result types
export type AssembleResult = {
/** Ordered messages to use as model context */
messages: AgentMessage[];
/** Estimated total tokens in assembled context */
estimatedTokens: number;
/** Optional context-engine-provided instructions prepended to the runtime system prompt */
systemPromptAddition?: string;
};
export type CompactResult = {
ok: boolean;
compacted: boolean;
reason?: string;
result?: {
summary?: string;
firstKeptEntryId?: string;
tokensBefore: number;
tokensAfter?: number;
details?: unknown;
};
};
export type IngestResult = {
/** Whether the message was ingested (false if duplicate or no-op) */
ingested: boolean;
};
export type IngestBatchResult = {
/** Number of messages ingested from the supplied batch */
ingestedCount: number;
};
export type BootstrapResult = {
/** Whether bootstrap ran and initialized the engine's store */
bootstrapped: boolean;
/** Number of historical messages imported (if applicable) */
importedMessages?: number;
/** Optional reason when bootstrap was skipped */
reason?: string;
};
export type ContextEngineInfo = {
id: string;
name: string;
version?: string;
/** True when the engine manages its own compaction lifecycle. */
ownsCompaction?: boolean;
/**
* Controls how turn-triggered maintenance should be executed.
*
* Engines remain compatible by default unless the host explicitly opts into
* background turn maintenance.
*/
turnMaintenanceMode?: "foreground" | "background";
};
export type SubagentSpawnPreparation = {
/** Roll back pre-spawn setup when subagent launch fails. */
rollback: () => void | Promise<void>;
};
export type SubagentEndReason = "deleted" | "completed" | "swept" | "released";
export type TranscriptRewriteReplacement = {
/** Existing transcript entry id to replace on the active branch. */
entryId: string;
/** Replacement message content for that entry. */
message: AgentMessage;
};
export type TranscriptRewriteRequest = {
/** Message entry replacements to apply in one branch-and-reappend pass. */
replacements: TranscriptRewriteReplacement[];
};
export type TranscriptRewriteResult = {
/** Whether the active branch changed. */
changed: boolean;
/** Estimated bytes removed from the active branch message payloads. */
bytesFreed: number;
/** Number of transcript message entries rewritten. */
rewrittenEntries: number;
/** Optional reason when no rewrite occurred. */
reason?: string;
};
export type ContextEngineMaintenanceResult = TranscriptRewriteResult;
export type ContextEnginePromptCacheRetention = "none" | "short" | "long" | "in_memory" | "24h";
export type ContextEnginePromptCacheUsage = {
input?: number;
output?: number;
cacheRead?: number;
cacheWrite?: number;
total?: number;
};
export type ContextEnginePromptCacheObservationChangeCode =
| "cacheRetention"
| "model"
| "streamStrategy"
| "systemPrompt"
| "tools"
| "transport";
export type ContextEnginePromptCacheObservationChange = {
code: ContextEnginePromptCacheObservationChangeCode;
detail: string;
};
export type ContextEnginePromptCacheObservation = {
broke: boolean;
previousCacheRead?: number;
cacheRead?: number;
changes?: ContextEnginePromptCacheObservationChange[];
};
export type ContextEnginePromptCacheInfo = {
/** Runtime-resolved retention for the actual provider/model/request path. */
retention?: ContextEnginePromptCacheRetention;
/** Usage from the most recent API call, not accumulated retry/tool-loop totals. */
lastCallUsage?: ContextEnginePromptCacheUsage;
/** Result from the runtime's prompt-cache observability heuristic. */
observation?: ContextEnginePromptCacheObservation;
/** Last known cache-touch timestamp from runtime-managed cache-TTL bookkeeping. */
lastCacheTouchAt?: number;
/** Known cache expiry time when the runtime can source it confidently. */
expiresAt?: number;
};
export type ContextEngineRuntimeContext = Record<string, unknown> & {
/**
* True when the host has explicitly opted this maintenance run into
* consuming deferred compaction debt.
*/
allowDeferredCompactionExecution?: boolean;
/** Runtime-resolved context window budget for the active model call. */
tokenBudget?: number;
/** Best-effort current prompt/context token estimate for this turn. */
currentTokenCount?: number;
/** Optional prompt-cache telemetry for cache-aware engines. */
promptCache?: ContextEnginePromptCacheInfo;
/**
* Safe transcript rewrite helper implemented by the runtime.
*
* Engines decide what is safe to rewrite; the runtime owns how the session
* DAG is updated on disk.
*/
rewriteTranscriptEntries?: (
request: TranscriptRewriteRequest,
) => Promise<TranscriptRewriteResult>;
};
/**
* ContextEngine defines the pluggable contract for context management.
*
* Required methods define a generic lifecycle; optional methods allow engines
* to provide additional capabilities (retrieval, lineage, etc.).
*/
export interface ContextEngine {
/** Engine identifier and metadata */
readonly info: ContextEngineInfo;
/**
* Initialize engine state for a session, optionally importing historical context.
*/
bootstrap?(params: {
sessionId: string;
sessionKey?: string;
sessionFile: string;
}): Promise<BootstrapResult>;
/**
* Run transcript maintenance after bootstrap, successful turns, or compaction.
*
* Engines can use runtimeContext.rewriteTranscriptEntries() to request safe
* branch-and-reappend transcript rewrites without depending on Pi internals.
*/
maintain?(params: {
sessionId: string;
sessionKey?: string;
sessionFile: string;
runtimeContext?: ContextEngineRuntimeContext;
}): Promise<ContextEngineMaintenanceResult>;
/**
* Ingest a single message into the engine's store.
*/
ingest(params: {
sessionId: string;
sessionKey?: string;
message: AgentMessage;
/** True when the message belongs to a heartbeat run. */
isHeartbeat?: boolean;
}): Promise<IngestResult>;
/**
* Ingest a completed turn batch as a single unit.
*/
ingestBatch?(params: {
sessionId: string;
sessionKey?: string;
messages: AgentMessage[];
/** True when the batch belongs to a heartbeat run. */
isHeartbeat?: boolean;
}): Promise<IngestBatchResult>;
/**
* Execute optional post-turn lifecycle work after a run attempt completes.
* Engines can use this to persist canonical context and trigger background
* compaction decisions.
*/
afterTurn?(params: {
sessionId: string;
sessionKey?: string;
sessionFile: string;
messages: AgentMessage[];
/** Number of messages that existed before the prompt was sent. */
prePromptMessageCount: number;
/** Optional auto-compaction summary emitted by the runtime. */
autoCompactionSummary?: string;
/** True when this turn belongs to a heartbeat run. */
isHeartbeat?: boolean;
/** Optional model context token budget for proactive compaction. */
tokenBudget?: number;
/** Optional runtime-owned context for engines that need caller state. */
runtimeContext?: ContextEngineRuntimeContext;
}): Promise<void>;
/**
* Assemble model context under a token budget.
* Returns an ordered set of messages ready for the model.
*/
assemble(params: {
sessionId: string;
sessionKey?: string;
messages: AgentMessage[];
tokenBudget?: number;
/** Tool names available for this run so engines can align prompt guidance with runtime tool access. */
availableTools?: Set<string>;
/** Active memory citation mode when engines want to mirror memory prompt guidance. */
citationsMode?: MemoryCitationsMode;
/** Current model identifier (e.g. "claude-opus-4", "gpt-4o", "qwen2.5-7b").
* Allows context engine plugins to adapt formatting per model. */
model?: string;
/** The incoming user prompt for this turn (useful for retrieval-oriented engines). */
prompt?: string;
}): Promise<AssembleResult>;
/**
* Compact context to reduce token usage.
* May create summaries, prune old turns, etc.
*/
compact(params: {
sessionId: string;
sessionKey?: string;
sessionFile: string;
tokenBudget?: number;
/** Force compaction even below the default trigger threshold. */
force?: boolean;
/** Optional live token estimate from the caller's active context. */
currentTokenCount?: number;
/** Controls convergence target; defaults to budget. */
compactionTarget?: "budget" | "threshold";
customInstructions?: string;
/** Optional runtime-owned context for engines that need caller state. */
runtimeContext?: ContextEngineRuntimeContext;
}): Promise<CompactResult>;
/**
* Prepare context-engine-managed subagent state before the child run starts.
*
* Implementations can return a rollback handle that is invoked when spawn
* fails after preparation succeeds.
*/
prepareSubagentSpawn?(params: {
parentSessionKey: string;
childSessionKey: string;
ttlMs?: number;
}): Promise<SubagentSpawnPreparation | undefined>;
/**
* Notify the context engine that a subagent lifecycle ended.
*/
onSubagentEnded?(params: { childSessionKey: string; reason: SubagentEndReason }): Promise<void>;
/**
* Dispose of any resources held by the engine.
*/
dispose?(): Promise<void>;
}