Pulse/internal/ai/chat/agentic.go
2026-04-03 19:00:40 +01:00

1588 lines
56 KiB
Go

package chat
import (
"context"
"encoding/json"
"fmt"
"runtime/debug"
"strings"
"sync"
"time"
"github.com/google/uuid"
"github.com/rcourtman/pulse-go-rewrite/internal/ai/approval"
"github.com/rcourtman/pulse-go-rewrite/internal/ai/providers"
"github.com/rcourtman/pulse-go-rewrite/internal/ai/tools"
"github.com/rs/zerolog/log"
)
// parallelToolResult holds the result of a single tool execution during
// parallel tool execution in the agentic loop. Defined at package level
// because the executeWithTools function's `tools` parameter shadows the
// tools package import, preventing inline type references.
type parallelToolResult struct {
Result tools.CallToolResult
Err error
}
func isRetryableProviderStreamError(err error) bool {
if err == nil {
return false
}
msg := strings.ToLower(strings.TrimSpace(err.Error()))
if msg == "" {
return false
}
retryable := []string{
"timeout",
"timed out",
"context deadline exceeded",
"connection reset",
"connection refused",
"connection aborted",
"broken pipe",
"eof",
"unexpected eof",
"network is unreachable",
"no such host",
"dial tcp",
"temporarily unavailable",
"http2: client connection lost",
"stream error",
"upstream",
"502",
"503",
"504",
}
for _, token := range retryable {
if strings.Contains(msg, token) {
return true
}
}
return false
}
func fallbackProviderStreamErrorMessage(err error) string {
const defaultMessage = "AI response stream interrupted before completion. Please retry."
if err == nil {
return defaultMessage
}
msg := strings.TrimSpace(err.Error())
if msg == "" {
return defaultMessage
}
msgLower := strings.ToLower(msg)
switch {
case strings.Contains(msgLower, "context canceled"):
return "AI response was canceled before completion."
case strings.Contains(msgLower, "timeout"),
strings.Contains(msgLower, "timed out"),
strings.Contains(msgLower, "deadline exceeded"):
return "AI response timed out before completion. Please retry."
}
const maxLen = 220
if len(msg) > maxLen {
msg = msg[:maxLen] + "..."
}
return "AI response stream interrupted: " + msg
}
func (a *AgenticLoop) executeToolSafely(ctx context.Context, name string, input map[string]interface{}) (result tools.CallToolResult, err error) {
defer func() {
if recovered := recover(); recovered != nil {
panicErr := fmt.Errorf("tool panic in %s: %v", name, recovered)
log.Error().
Str("tool", name).
Interface("panic", recovered).
Bytes("stack", debug.Stack()).
Msg("[AgenticLoop] Recovered tool panic")
result = tools.NewErrorResult(panicErr)
err = panicErr
}
}()
return a.executor.ExecuteTool(ctx, name, input)
}
// AgenticLoop handles the tool-calling loop with streaming
type AgenticLoop struct {
provider providers.StreamingProvider
executor *tools.PulseToolExecutor
tools []providers.Tool
baseSystemPrompt string // Base prompt without mode context
maxTurns int
orgID string
executionID string
// Provider info for telemetry (e.g., "anthropic", "claude-3-sonnet")
providerName string
modelName string
// Token accumulation across all turns
totalInputTokens int
totalOutputTokens int
// State for ongoing executions
mu sync.Mutex
aborted map[string]bool // sessionID -> aborted
pendingQs map[string]chan []QuestionAnswer // questionID -> answer channel
autonomousMode bool // When true, don't wait for approvals (for investigations)
// Per-session FSMs for workflow enforcement (set before each execution)
sessionFSM *SessionFSM
// Knowledge accumulator for fact extraction across turns
knowledgeAccumulator *KnowledgeAccumulator
// Budget checker called after each turn to enforce token spending limits
budgetChecker func() error
}
// NewAgenticLoop creates a new agentic loop
func NewAgenticLoop(provider providers.StreamingProvider, executor *tools.PulseToolExecutor, systemPrompt string) *AgenticLoop {
// Convert MCP tools to provider format
mcpTools := executor.ListTools()
providerTools := ConvertMCPToolsToProvider(mcpTools)
providerTools = append(providerTools, userQuestionTool())
return &AgenticLoop{
provider: provider,
executor: executor,
tools: providerTools,
baseSystemPrompt: systemPrompt,
maxTurns: MaxAgenticTurns,
orgID: approval.DefaultOrgID,
aborted: make(map[string]bool),
pendingQs: make(map[string]chan []QuestionAnswer),
}
}
// UpdateTools refreshes the tool list from the executor
func (a *AgenticLoop) UpdateTools() {
mcpTools := a.executor.ListTools()
tools := ConvertMCPToolsToProvider(mcpTools)
a.tools = append(tools, userQuestionTool())
}
// SetOrgID sets the org scope used when validating approval decisions.
func (a *AgenticLoop) SetOrgID(orgID string) {
a.mu.Lock()
defer a.mu.Unlock()
a.orgID = approval.NormalizeOrgID(orgID)
}
// Execute runs the agentic loop with streaming
func (a *AgenticLoop) Execute(ctx context.Context, sessionID string, messages []Message, callback StreamCallback) ([]Message, error) {
return a.executeWithTools(ctx, sessionID, messages, a.tools, callback)
}
// ExecuteWithTools runs the agentic loop with a filtered tool set
func (a *AgenticLoop) ExecuteWithTools(ctx context.Context, sessionID string, messages []Message, tools []providers.Tool, callback StreamCallback) ([]Message, error) {
return a.executeWithTools(ctx, sessionID, messages, tools, callback)
}
func (a *AgenticLoop) executeWithTools(ctx context.Context, sessionID string, messages []Message, tools []providers.Tool, callback StreamCallback) ([]Message, error) {
// Snapshot maxTurns under the lock — callers may override via SetMaxTurns
// before calling ExecuteWithTools, and this avoids races with concurrent sessions.
a.mu.Lock()
maxTurns := a.maxTurns
a.aborted[sessionID] = false
a.mu.Unlock()
defer func() {
a.mu.Lock()
delete(a.aborted, sessionID)
a.mu.Unlock()
}()
// Convert our messages to provider format
messagesForModel := pruneMessagesForModel(messages)
providerMessages := convertToProviderMessages(messagesForModel)
if tools == nil {
tools = a.tools
}
// Cache tool definition token estimate - tools don't change during the loop.
cachedToolTokens := EstimateToolsTokens(tools)
var resultMessages []Message
turn := 0
toolsSucceededThisEpisode := false // Track if any tool executed successfully this episode
writeCompletedLastTurn := false // When true, force text-only response on next turn
toolBlockedLastTurn := false // When true, force text-only response after budget/loop block
preferredToolName := ""
preferredToolRetried := false
singleToolRequested := isSingleToolRequest(providerMessages)
singleToolEnforced := false
// Fresh-data intent: if the user's latest message indicates they want
// fresh/updated data, bypass the knowledge gate so tools re-execute.
userWantsFresh := detectFreshDataIntent(messages)
// Loop detection: track identical tool calls (name + serialized input).
// After maxIdenticalCalls identical invocations, the next one is blocked.
const maxIdenticalCalls = 3
recentCallCounts := make(map[string]int)
// Track where each turn's messages begin in providerMessages for compaction.
// We keep the last N turns' tool results in full; older ones get compacted.
const compactionKeepTurns = 2 // Keep last 2 turns' tool results in full (KA preserves key facts)
const compactionMinChars = 300 // Only compact results longer than this
currentTurnStartIndex := len(providerMessages) // Initial messages are never compacted
// Wrap-up nudge: after this many cumulative tool calls, hint the model to start wrapping up.
const wrapUpNudgeAfterCalls = 12
const wrapUpEscalateAfterCalls = 18
const maxConsecutiveToolOnlyTurns = 4
totalToolCalls := 0
wrapUpNudgeFired := false
wrapUpEscalateFired := false
consecutiveToolOnlyTurns := 0
consecutiveAllErrorTurns := 0
for turn < maxTurns {
// === CONTEXT COMPACTION: Compact old tool results to prevent context blowout ===
if turn > 0 {
compactOldToolResults(providerMessages, currentTurnStartIndex, compactionKeepTurns, compactionMinChars, a.knowledgeAccumulator)
}
// Check if aborted
a.mu.Lock()
if a.aborted[sessionID] {
a.mu.Unlock()
return resultMessages, fmt.Errorf("session aborted")
}
providerName := a.providerName
modelName := a.modelName
a.mu.Unlock()
// Record telemetry for loop iteration
if metrics := GetAIMetrics(); metrics != nil {
metrics.RecordAgenticIteration(providerName, modelName)
}
// Check context
select {
case <-ctx.Done():
return resultMessages, ctx.Err()
default:
}
log.Info().
Int("turn", turn).
Int("messages", len(providerMessages)).
Int("tools", len(tools)).
Int("total_input_tokens", a.totalInputTokens).
Int("total_output_tokens", a.totalOutputTokens).
Int("estimated_context_tokens", EstimateMessagesTokens(providerMessages)).
Str("session_id", sessionID).
Msg("[AgenticLoop] Starting turn")
// Build the request with dynamic system prompt (includes current mode)
systemPrompt := a.getSystemPrompt()
req := providers.ChatRequest{
Messages: providerMessages,
System: systemPrompt,
Tools: tools,
ExecutionID: a.executionID,
}
// Determine tool_choice based on turn, intent, and explicit tool requests.
// We only force tool use when:
// 1. Tools are available
// 2. It's the first turn
// 3. The user's message indicates they need live data or an action
// This prevents forcing tool calls on conceptual questions like "What is TCP?"
forcedTextOnly := false
if turn >= maxTurns-1 {
// Last turn before hitting the limit — force a text-only response so
// the model summarizes its findings instead of silently stopping.
req.ToolChoice = &providers.ToolChoice{Type: providers.ToolChoiceNone}
forcedTextOnly = true
log.Warn().
Int("turn", turn).
Int("max_turns", maxTurns).
Str("session_id", sessionID).
Msg("[AgenticLoop] Approaching max turns — forcing text-only response for summary")
} else if writeCompletedLastTurn {
// A write action completed successfully on the previous turn.
// Force text-only response so the model summarizes the result instead of
// making more tool calls (which often return stale cached data and cause loops).
req.ToolChoice = &providers.ToolChoice{Type: providers.ToolChoiceNone}
forcedTextOnly = true
writeCompletedLastTurn = false
log.Debug().
Str("session_id", sessionID).
Msg("[AgenticLoop] Write completed last turn — forcing text-only response")
} else if toolBlockedLastTurn {
// Tool calls were blocked last turn (budget exceeded or loop detected).
// The model already has the data it gathered — force it to produce a text
// response instead of continuing to call tools that will just be blocked again.
req.ToolChoice = &providers.ToolChoice{Type: providers.ToolChoiceNone}
forcedTextOnly = true
toolBlockedLastTurn = false
log.Debug().
Str("session_id", sessionID).
Msg("[AgenticLoop] Tool calls blocked last turn — forcing text-only response")
} else if len(tools) > 0 {
if preferredToolName == "" {
preferredToolName = getPreferredTool(providerMessages, tools)
}
if preferredToolName != "" {
req.ToolChoice = &providers.ToolChoice{Type: providers.ToolChoiceTool, Name: preferredToolName}
if singleToolRequested {
singleToolEnforced = true
}
log.Debug().
Str("session_id", sessionID).
Str("tool", preferredToolName).
Msg("[AgenticLoop] Explicit tool request - forcing tool")
} else if turn == 0 && requiresToolUse(providerMessages) {
// First turn with action intent: force the model to use a tool
req.ToolChoice = &providers.ToolChoice{Type: providers.ToolChoiceAny}
log.Debug().
Str("session_id", sessionID).
Msg("[AgenticLoop] First turn with action intent - forcing tool use")
} else {
req.ToolChoice = &providers.ToolChoice{Type: providers.ToolChoiceAuto}
if turn == 0 {
log.Debug().
Str("session_id", sessionID).
Msg("[AgenticLoop] First turn appears conceptual - using auto tool choice")
}
}
}
// Pre-request context validation: catch overflow from message history growth.
// Phase 2 handles first-turn overflow via seed budget; this catches
// subsequent turns where tool results accumulate beyond the model's limit.
{
estimatedTokens := EstimateTokens(req.System) + EstimateMessagesTokens(req.Messages) + cachedToolTokens
modelID := ""
a.mu.Lock()
modelID = a.providerName + ":" + a.modelName
a.mu.Unlock()
contextLimit := providers.ContextWindowTokens(modelID)
if estimatedTokens > contextLimit {
log.Warn().
Int("estimated_tokens", estimatedTokens).
Int("context_limit", contextLimit).
Str("model", modelID).
Str("session_id", sessionID).
Msg("[AgenticLoop] Pre-request context overflow detected - emergency compaction")
// Emergency: aggressively compact ALL old tool results (keepTurns=0, minChars=100)
compactOldToolResults(providerMessages, currentTurnStartIndex, 0, 100, a.knowledgeAccumulator)
// Re-build request with compacted messages
req.Messages = providerMessages
// Re-estimate after compaction
estimatedTokens = EstimateTokens(req.System) + EstimateMessagesTokens(req.Messages) + cachedToolTokens
if estimatedTokens > contextLimit {
log.Warn().
Int("estimated_tokens", estimatedTokens).
Int("context_limit", contextLimit).
Str("session_id", sessionID).
Msg("[AgenticLoop] Still over limit after compaction - dropping tools for this turn")
req.Tools = nil
req.ToolChoice = &providers.ToolChoice{Type: providers.ToolChoiceNone}
forcedTextOnly = true
// Final check: if system + messages alone still exceed limit,
// prune old messages as last resort.
estimatedTokens = EstimateTokens(req.System) + EstimateMessagesTokens(req.Messages)
if estimatedTokens > contextLimit {
log.Warn().
Int("estimated_tokens", estimatedTokens).
Int("context_limit", contextLimit).
Str("session_id", sessionID).
Msg("[AgenticLoop] Still over limit after dropping tools - pruning old messages")
// Keep the first message (original user prompt) and the most recent half.
// This preserves the question context and recent investigation state.
if len(providerMessages) > 4 {
keepRecent := len(providerMessages) / 2
if keepRecent < 2 {
keepRecent = 2
}
pruned := make([]providers.Message, 0, 1+keepRecent)
pruned = append(pruned, providerMessages[0])
pruned = append(pruned, providerMessages[len(providerMessages)-keepRecent:]...)
providerMessages = pruned
req.Messages = providerMessages
estimatedTokens = EstimateTokens(req.System) + EstimateMessagesTokens(req.Messages)
log.Warn().
Int("messages_after_prune", len(providerMessages)).
Int("estimated_tokens_after_prune", estimatedTokens).
Str("session_id", sessionID).
Msg("[AgenticLoop] Pruned old messages to fit context window")
}
// If still over after all interventions, abort gracefully
if estimatedTokens > contextLimit {
log.Error().
Int("estimated_tokens", estimatedTokens).
Int("context_limit", contextLimit).
Str("session_id", sessionID).
Msg("[AgenticLoop] Cannot fit request in context window - aborting turn")
break
}
}
}
}
}
// Collect streaming response
var contentBuilder strings.Builder
var thinkingBuilder strings.Builder
var toolCalls []providers.ToolCall
log.Debug().
Str("session_id", sessionID).
Int("system_prompt_len", len(systemPrompt)).
Msg("[AgenticLoop] Calling provider.ChatStream")
const maxProviderAttempts = 2
err := error(nil)
for attempt := 1; attempt <= maxProviderAttempts; attempt++ {
attemptSawDone := false
attemptEmittedVisibleEvents := false
var attemptErrorMessages []string
err = a.provider.ChatStream(ctx, req, func(event providers.StreamEvent) {
switch event.Type {
case "content":
if data, ok := event.Data.(providers.ContentEvent); ok {
attemptEmittedVisibleEvents = true
// Check for tool call marker leakage - if detected, stop streaming this chunk.
// These markers indicate the model is outputting internal tool call
// formatting instead of using the proper tool calling API.
if containsToolCallMarker(data.Text) {
// Don't append or stream this content
return
}
// Also check if the accumulated content already has the marker
// (in case it arrived in a previous chunk)
if containsToolCallMarker(contentBuilder.String()) {
return
}
contentBuilder.WriteString(data.Text)
// Forward to callback - send ContentData struct
jsonData, _ := json.Marshal(ContentData{Text: data.Text})
callback(StreamEvent{Type: "content", Data: jsonData})
}
case "thinking":
if data, ok := event.Data.(providers.ThinkingEvent); ok {
attemptEmittedVisibleEvents = true
thinkingBuilder.WriteString(data.Text)
// Forward to callback - send ThinkingData struct
jsonData, _ := json.Marshal(ThinkingData{Text: data.Text})
callback(StreamEvent{Type: "thinking", Data: jsonData})
}
case "tool_start":
if data, ok := event.Data.(providers.ToolStartEvent); ok {
attemptEmittedVisibleEvents = true
// pulse_question is rendered as a dedicated "question" card; suppress tool UI events.
if data.Name == pulseQuestionToolName {
return
}
// Format input for frontend display
// For control tools, show a human-readable summary instead of raw JSON to avoid "hallucination" look
inputStr, rawInput := formatToolInputForFrontend(data.Name, data.Input, false)
jsonData, _ := json.Marshal(ToolStartData{
ID: data.ID,
Name: data.Name,
Input: inputStr,
RawInput: rawInput,
})
callback(StreamEvent{Type: "tool_start", Data: jsonData})
}
case "done":
if data, ok := event.Data.(providers.DoneEvent); ok {
attemptSawDone = true
toolCalls = data.ToolCalls
a.totalInputTokens += data.InputTokens
a.totalOutputTokens += data.OutputTokens
log.Info().
Int("turn", turn).
Int("input_tokens_this_turn", data.InputTokens).
Int("output_tokens_this_turn", data.OutputTokens).
Int("total_input_tokens", a.totalInputTokens).
Int("total_output_tokens", a.totalOutputTokens).
Int("tool_calls", len(data.ToolCalls)).
Str("session_id", sessionID).
Msg("[AgenticLoop] Turn completed")
}
case "error":
if data, ok := event.Data.(providers.ErrorEvent); ok {
if msg := strings.TrimSpace(data.Message); msg != "" {
attemptErrorMessages = append(attemptErrorMessages, msg)
}
}
}
})
effectiveErr := err
if effectiveErr == nil && len(attemptErrorMessages) > 0 {
effectiveErr = fmt.Errorf("stream error: %s", attemptErrorMessages[0])
}
if effectiveErr == nil {
break
}
if attemptSawDone {
// Some providers can return a transport error after already emitting
// a terminal done event. The turn is complete; keep the response.
log.Warn().
Int("attempt", attempt).
Err(effectiveErr).
Str("session_id", sessionID).
Msg("[AgenticLoop] Provider returned error after done event; ignoring")
err = nil
break
}
if attempt < maxProviderAttempts && !attemptEmittedVisibleEvents && isRetryableProviderStreamError(effectiveErr) && ctx.Err() == nil {
backoff := time.Duration(200*attempt) * time.Millisecond
log.Warn().
Int("attempt", attempt).
Err(effectiveErr).
Dur("backoff", backoff).
Str("session_id", sessionID).
Msg("[AgenticLoop] Provider stream failed before events; retrying turn")
select {
case <-time.After(backoff):
case <-ctx.Done():
err = ctx.Err()
break
}
continue
}
if len(attemptErrorMessages) > 0 {
// Defer emitting error events until retries are exhausted so transient
// provider blips don't leak to the client stream.
jsonData, _ := json.Marshal(ErrorData{Message: attemptErrorMessages[0]})
callback(StreamEvent{Type: "error", Data: jsonData})
} else {
// Transport-level failures may not include an explicit provider error event.
// Emit a fallback error so clients can render a clear failure state.
jsonData, _ := json.Marshal(ErrorData{Message: fallbackProviderStreamErrorMessage(effectiveErr)})
callback(StreamEvent{Type: "error", Data: jsonData})
}
err = effectiveErr
break
}
log.Debug().
Str("session_id", sessionID).
Err(err).
Int("content_len", contentBuilder.Len()).
Int("tool_calls", len(toolCalls)).
Msg("[AgenticLoop] provider.ChatStream returned")
if err != nil {
log.Error().
Err(err).
Str("session_id", sessionID).
Msg("[AgenticLoop] Provider error")
return resultMessages, fmt.Errorf("provider error: %w", err)
}
// Guard: if we forced text-only but the model still returned tool calls
// (some providers like Gemini can hallucinate function calls from conversation
// history even when tools are not offered in the request), strip them so the
// model's text content is treated as the final response.
if forcedTextOnly && len(toolCalls) > 0 {
log.Warn().
Str("session_id", sessionID).
Int("stripped_tool_calls", len(toolCalls)).
Msg("[AgenticLoop] Model returned tool calls despite ToolChoiceNone — stripping them")
toolCalls = nil
}
// Check mid-run budget after each turn completes
if a.budgetChecker != nil {
if budgetErr := a.budgetChecker(); budgetErr != nil {
log.Warn().Err(budgetErr).Int("turn", turn).Str("session_id", sessionID).
Msg("[AgenticLoop] Budget exceeded mid-run, stopping")
return resultMessages, fmt.Errorf("budget exceeded: %w", budgetErr)
}
}
// Create assistant message
// Clean tool call artifacts from the content before storing.
cleanedContent := cleanToolCallArtifacts(contentBuilder.String())
assistantMsg := Message{
ID: uuid.New().String(),
Role: "assistant",
Content: cleanedContent,
ReasoningContent: thinkingBuilder.String(),
Timestamp: time.Now(),
}
if len(toolCalls) > 0 {
assistantMsg.ToolCalls = make([]ToolCall, len(toolCalls))
for i, tc := range toolCalls {
assistantMsg.ToolCalls[i] = ToolCall{
ID: tc.ID,
Name: tc.Name,
Input: tc.Input,
ThoughtSignature: tc.ThoughtSignature,
}
}
}
resultMessages = append(resultMessages, assistantMsg)
// Convert to provider format for next turn
providerAssistant := providers.Message{
Role: "assistant",
Content: assistantMsg.Content,
ReasoningContent: assistantMsg.ReasoningContent,
}
for _, tc := range toolCalls {
providerAssistant.ToolCalls = append(providerAssistant.ToolCalls, providers.ToolCall{
ID: tc.ID,
Name: tc.Name,
Input: tc.Input,
ThoughtSignature: tc.ThoughtSignature,
})
}
providerMessages = append(providerMessages, providerAssistant)
// Track turns where the model emitted tool calls but no user-facing text.
// In chat mode this can otherwise spiral into long tool-only chains.
if len(toolCalls) > 0 && strings.TrimSpace(assistantMsg.Content) == "" {
consecutiveToolOnlyTurns++
} else {
consecutiveToolOnlyTurns = 0
}
// If no tool calls, we're done - but first check FSM and phantom execution
if len(toolCalls) == 0 {
// No tool calls breaks the "consecutive all-error tool turns" streak.
consecutiveAllErrorTurns = 0
// If the user explicitly requested a tool and the model didn't comply, retry once.
if preferredToolName != "" && !preferredToolRetried {
preferredToolRetried = true
retryPrompt := fmt.Sprintf("Tool required: use %s for this request.", preferredToolName)
if len(resultMessages) > 0 {
resultMessages[len(resultMessages)-1].Content = retryPrompt
}
turn++
continue
}
// === FSM ENFORCEMENT GATE 2: Check if final answer is allowed ===
a.mu.Lock()
fsm := a.sessionFSM
a.mu.Unlock()
if fsm != nil {
if fsmErr := fsm.CanFinalAnswer(); fsmErr != nil {
log.Warn().
Str("session_id", sessionID).
Str("state", string(fsm.State)).
Bool("wrote_this_episode", fsm.WroteThisEpisode).
Bool("read_after_write", fsm.ReadAfterWrite).
Msg("[AgenticLoop] FSM blocked final answer - must verify write first")
// Record telemetry for FSM final answer block
if metrics := GetAIMetrics(); metrics != nil {
metrics.RecordFSMFinalBlock(fsm.State)
}
// Inject a minimal, factual constraint - not a narrative or example.
// This tells the model what is required, not how to do it.
verifyPrompt := fmt.Sprintf(
"Verification required: perform a read or status check on %s before responding.",
fsm.LastWriteTool,
)
// Update the last assistant message to include verification constraint
if len(resultMessages) > 0 {
resultMessages[len(resultMessages)-1].Content = verifyPrompt
}
// Note: verification constraint is injected into resultMessages above (for the model).
// We intentionally do NOT emit this to the user callback — it's an internal protocol
// prompt that would appear as spam in the chat output.
// Mark that we completed verification (the next read will set ReadAfterWrite)
// and continue the loop to force a verification read
turn++
continue
}
// If we're completing successfully and there was a write, mark verification complete
if fsm.State == StateVerifying && fsm.ReadAfterWrite {
fsm.CompleteVerification()
log.Debug().
Str("session_id", sessionID).
Str("new_state", string(fsm.State)).
Msg("[AgenticLoop] FSM verification complete, transitioning to READING")
}
}
// Detect phantom execution: model claims to have done something without tool calls
// This is especially important for providers that can't force tool use (e.g., Ollama)
// IMPORTANT: Skip this check if tools already succeeded this episode - the model is
// legitimately summarizing tool results, not hallucinating.
log.Debug().
Bool("toolsSucceededThisEpisode", toolsSucceededThisEpisode).
Bool("hasPhantomExecution", hasPhantomExecution(assistantMsg.Content)).
Str("content_preview", truncateForLog(assistantMsg.Content, 200)).
Msg("[AgenticLoop] Phantom detection check")
if !toolsSucceededThisEpisode && hasPhantomExecution(assistantMsg.Content) {
log.Warn().
Str("session_id", sessionID).
Str("content_preview", truncateForLog(assistantMsg.Content, 200)).
Msg("[AgenticLoop] Phantom execution detected - model claims action without tool call")
// Record telemetry for phantom detection
if metrics := GetAIMetrics(); metrics != nil {
metrics.RecordPhantomDetected(providerName, modelName)
}
// Replace the response with a safe failure message
safeResponse := "I apologize, but I wasn't able to access the infrastructure tools needed to complete that request. This can happen when:\n\n" +
"1. The tools aren't available right now\n" +
"2. There was a connection issue\n" +
"3. The model I'm running on doesn't support function calling\n\n" +
"Please try again, or let me know if you have a question I can answer without checking live infrastructure."
// Update the last result message
if len(resultMessages) > 0 {
resultMessages[len(resultMessages)-1].Content = safeResponse
}
// Send corrected content to callback
jsonData, _ := json.Marshal(ContentData{Text: "\n\n---\n" + safeResponse})
callback(StreamEvent{Type: "content", Data: jsonData})
}
log.Debug().Msg("agentic loop complete - no tool calls")
resultMessages = a.ensureFinalTextResponse(ctx, sessionID, resultMessages, providerMessages, callback)
return resultMessages, nil
}
// === Execute tool calls (three-phase pipeline) ===
// Phase 1: Pre-check (sequential) — FSM, loop detection, knowledge gate
// Phase 2: Execute (parallel) — actual tool calls via goroutines
// Phase 3: Post-process (sequential) — streaming, FSM transitions, KA extraction
if len(toolCalls) > 0 && preferredToolName != "" {
// Clear preferred tool once the model has used any tool.
preferredToolName = ""
}
firstToolResultText := ""
budgetBlockedThisTurn := 0
anyToolSucceededThisTurn := false
// --- Phase 1: Pre-check all tool calls sequentially ---
// Pre-checks share mutable state (FSM, loop counts) so must be sequential.
a.mu.Lock()
if a.aborted[sessionID] {
a.mu.Unlock()
return resultMessages, fmt.Errorf("session aborted")
}
fsm := a.sessionFSM
a.mu.Unlock()
type pendingToolExec struct {
tc providers.ToolCall
toolKind ToolKind
}
var pendingExec []pendingToolExec
// pulse_question is interactive and must not run in parallel with other tools.
// If the provider emits multiple tool calls alongside pulse_question, skip the
// others and let the model retry after receiving the user's answer.
hasPulseQuestion := false
for _, tc := range toolCalls {
if tc.Name == pulseQuestionToolName {
hasPulseQuestion = true
break
}
}
if hasPulseQuestion {
for _, tc := range toolCalls {
log.Debug().
Str("tool", tc.Name).
Str("id", tc.ID).
Msg("Processing interactive tool call set (pulse_question present)")
toolKind := ClassifyToolCall(tc.Name, tc.Input)
// FSM enforcement still applies (even if we're skipping execution).
if fsm != nil {
if fsmErr := fsm.CanExecuteTool(toolKind, tc.Name); fsmErr != nil {
log.Warn().
Str("tool", tc.Name).
Str("kind", toolKind.String()).
Str("state", string(fsm.State)).
Err(fsmErr).
Msg("[AgenticLoop] FSM blocked tool execution (interactive set)")
fsmBlockedErr, ok := fsmErr.(*FSMBlockedError)
if ok && fsmBlockedErr.Recoverable {
fsm.TrackPendingRecovery("FSM_BLOCKED", tc.Name)
if metrics := GetAIMetrics(); metrics != nil {
metrics.RecordAutoRecoveryAttempt("FSM_BLOCKED", tc.Name)
}
}
inputStr, rawInput := formatToolInputForFrontend(tc.Name, tc.Input, true)
jsonData, _ := json.Marshal(ToolEndData{
ID: tc.ID,
Name: tc.Name,
Input: inputStr,
RawInput: rawInput,
Output: fsmErr.Error(),
Success: false,
})
callback(StreamEvent{Type: "tool_end", Data: jsonData})
toolResultMsg := Message{
ID: uuid.New().String(),
Role: "user",
Timestamp: time.Now(),
ToolResult: &ToolResult{
ToolUseID: tc.ID,
Content: fsmErr.Error(),
IsError: true,
},
}
resultMessages = append(resultMessages, toolResultMsg)
providerMessages = append(providerMessages, providers.Message{
Role: "user",
ToolResult: &providers.ToolResult{
ToolUseID: tc.ID,
Content: truncateToolResultForModel(fsmErr.Error()),
IsError: true,
},
})
continue
}
}
// LOOP DETECTION
callKey := toolCallKey(tc.Name, tc.Input)
recentCallCounts[callKey]++
if recentCallCounts[callKey] > maxIdenticalCalls {
loopMsg := fmt.Sprintf("LOOP_DETECTED: You have called %s with the same arguments %d times. This call is blocked. Try a different tool or approach.", tc.Name, recentCallCounts[callKey])
inputStr, rawInput := formatToolInputForFrontend(tc.Name, tc.Input, true)
jsonData, _ := json.Marshal(ToolEndData{
ID: tc.ID,
Name: tc.Name,
Input: inputStr,
RawInput: rawInput,
Output: loopMsg,
Success: false,
})
callback(StreamEvent{Type: "tool_end", Data: jsonData})
toolResultMsg := Message{
ID: uuid.New().String(),
Role: "user",
Timestamp: time.Now(),
ToolResult: &ToolResult{
ToolUseID: tc.ID,
Content: loopMsg,
IsError: true,
},
}
resultMessages = append(resultMessages, toolResultMsg)
providerMessages = append(providerMessages, providers.Message{
Role: "user",
ToolResult: &providers.ToolResult{
ToolUseID: tc.ID,
Content: truncateToolResultForModel(loopMsg),
IsError: true,
},
})
continue
}
// Skip non-question tools in this turn; the model must retry after user input.
if tc.Name != pulseQuestionToolName {
skipMsg := fmt.Sprintf("SKIPPED: %s was requested this turn. Wait for the user's answer, then re-issue this tool call with the clarified inputs.", pulseQuestionToolName)
inputStr, rawInput := formatToolInputForFrontend(tc.Name, tc.Input, true)
jsonData, _ := json.Marshal(ToolEndData{
ID: tc.ID,
Name: tc.Name,
Input: inputStr,
RawInput: rawInput,
Output: skipMsg,
Success: false,
})
callback(StreamEvent{Type: "tool_end", Data: jsonData})
toolResultMsg := Message{
ID: uuid.New().String(),
Role: "user",
Timestamp: time.Now(),
ToolResult: &ToolResult{
ToolUseID: tc.ID,
Content: skipMsg,
IsError: true,
},
}
resultMessages = append(resultMessages, toolResultMsg)
providerMessages = append(providerMessages, providers.Message{
Role: "user",
ToolResult: &providers.ToolResult{
ToolUseID: tc.ID,
Content: truncateToolResultForModel(skipMsg),
IsError: true,
},
})
continue
}
// Execute the interactive question tool synchronously (blocks until user answers).
resultText, isError := a.executeQuestionTool(ctx, sessionID, tc, callback)
// For pulse_question we suppress tool_end on success (UI uses the question card).
if isError {
inputStr, rawInput := formatToolInputForFrontend(tc.Name, tc.Input, true)
jsonData, _ := json.Marshal(ToolEndData{
ID: tc.ID,
Name: tc.Name,
Input: inputStr,
RawInput: rawInput,
Output: resultText,
Success: false,
})
callback(StreamEvent{Type: "tool_end", Data: jsonData})
}
if fsm != nil && !isError {
fsm.OnToolSuccess(toolKind, tc.Name)
}
toolResultMsg := Message{
ID: uuid.New().String(),
Role: "user",
Timestamp: time.Now(),
ToolResult: &ToolResult{
ToolUseID: tc.ID,
Content: resultText,
IsError: isError,
},
}
resultMessages = append(resultMessages, toolResultMsg)
providerMessages = append(providerMessages, providers.Message{
Role: "user",
ToolResult: &providers.ToolResult{
ToolUseID: tc.ID,
Content: truncateToolResultForModel(resultText),
IsError: isError,
},
})
}
// Mark the start of the next turn's messages for compaction tracking.
currentTurnStartIndex = len(providerMessages)
turn++
continue
}
for _, tc := range toolCalls {
log.Debug().
Str("tool", tc.Name).
Str("id", tc.ID).
Msg("Pre-checking tool call")
// === FSM ENFORCEMENT GATE 1: Check if tool is allowed in current state ===
toolKind := ClassifyToolCall(tc.Name, tc.Input)
if fsm != nil {
if fsmErr := fsm.CanExecuteTool(toolKind, tc.Name); fsmErr != nil {
log.Warn().
Str("tool", tc.Name).
Str("kind", toolKind.String()).
Str("state", string(fsm.State)).
Err(fsmErr).
Msg("[AgenticLoop] FSM blocked tool execution")
// Record telemetry for FSM tool block
if metrics := GetAIMetrics(); metrics != nil {
metrics.RecordFSMToolBlock(fsm.State, tc.Name, toolKind)
}
// Return the FSM error as a tool result so the model can self-correct
fsmBlockedErr, ok := fsmErr.(*FSMBlockedError)
if ok && fsmBlockedErr.Recoverable {
// Track pending recovery for success correlation
fsm.TrackPendingRecovery("FSM_BLOCKED", tc.Name)
// Record auto-recovery attempt (model gets a chance to self-correct)
if metrics := GetAIMetrics(); metrics != nil {
metrics.RecordAutoRecoveryAttempt("FSM_BLOCKED", tc.Name)
}
}
// Send tool_end event with error
jsonData, _ := json.Marshal(ToolEndData{
ID: tc.ID,
Name: tc.Name,
Input: "",
Output: fsmErr.Error(),
Success: false,
})
callback(StreamEvent{Type: "tool_end", Data: jsonData})
// Create tool result message with the error
toolResultMsg := Message{
ID: uuid.New().String(),
Role: "user",
Timestamp: time.Now(),
ToolResult: &ToolResult{
ToolUseID: tc.ID,
Content: fsmErr.Error(),
IsError: true,
},
}
resultMessages = append(resultMessages, toolResultMsg)
// Add to provider messages for next turn
providerMessages = append(providerMessages, providers.Message{
Role: "user",
ToolResult: &providers.ToolResult{
ToolUseID: tc.ID,
Content: fsmErr.Error(),
IsError: true,
},
})
// Skip execution but continue the loop to process other tool calls
continue
}
}
// === LOOP DETECTION: Block identical repeated tool calls ===
callKey := toolCallKey(tc.Name, tc.Input)
recentCallCounts[callKey]++
if recentCallCounts[callKey] > maxIdenticalCalls {
log.Warn().
Str("tool", tc.Name).
Int("count", recentCallCounts[callKey]).
Str("session_id", sessionID).
Msg("[AgenticLoop] LOOP_DETECTED: blocking repeated identical tool call")
loopMsg := fmt.Sprintf("LOOP_DETECTED: You have called %s with the same arguments %d times. This call is blocked. Try a different tool or approach.", tc.Name, recentCallCounts[callKey])
jsonData, _ := json.Marshal(ToolEndData{
ID: tc.ID,
Name: tc.Name,
Input: "",
Output: loopMsg,
Success: false,
})
callback(StreamEvent{Type: "tool_end", Data: jsonData})
toolResultMsg := Message{
ID: uuid.New().String(),
Role: "user",
Timestamp: time.Now(),
ToolResult: &ToolResult{
ToolUseID: tc.ID,
Content: loopMsg,
IsError: true,
},
}
resultMessages = append(resultMessages, toolResultMsg)
providerMessages = append(providerMessages, providers.Message{
Role: "user",
ToolResult: &providers.ToolResult{
ToolUseID: tc.ID,
Content: loopMsg,
IsError: true,
},
})
budgetBlockedThisTurn++
continue
}
// === KNOWLEDGE GATE: Return cached facts for redundant tool calls ===
// Skip gate on first turn if the user explicitly asked for fresh data.
if a.knowledgeAccumulator != nil && !(userWantsFresh && turn == 0) {
if keys := PredictFactKeys(tc.Name, tc.Input); len(keys) > 0 {
var cachedParts []string
for _, key := range keys {
if value, found := a.knowledgeAccumulator.Lookup(key); found {
cachedParts = append(cachedParts, value)
}
}
if len(cachedParts) > 0 {
// Enrich marker-based cache hits with related per-resource facts
for _, key := range keys {
if prefix, ok := MarkerExpansions[key]; ok {
if related := a.knowledgeAccumulator.RelatedFacts(prefix); related != "" {
cachedParts = append(cachedParts, related)
}
}
}
cachedResult := fmt.Sprintf("Already known (from earlier investigation): %s. If you need fresh data, use a different query or approach.", strings.Join(cachedParts, "; "))
anyToolSucceededThisTurn = true
log.Info().
Str("tool", tc.Name).
Str("session_id", sessionID).
Strs("matched_keys", keys).
Int("cached_parts", len(cachedParts)).
Msg("[AgenticLoop] Knowledge gate: returning cached fact instead of re-executing tool")
jsonData, _ := json.Marshal(ToolEndData{
ID: tc.ID,
Name: tc.Name,
Input: "",
Output: cachedResult,
Success: true,
})
callback(StreamEvent{Type: "tool_end", Data: jsonData})
toolResultMsg := Message{
ID: uuid.New().String(),
Role: "user",
Timestamp: time.Now(),
ToolResult: &ToolResult{
ToolUseID: tc.ID,
Content: cachedResult,
IsError: false,
},
}
resultMessages = append(resultMessages, toolResultMsg)
providerMessages = append(providerMessages, providers.Message{
Role: "user",
ToolResult: &providers.ToolResult{
ToolUseID: tc.ID,
Content: cachedResult,
IsError: false,
},
})
continue
}
}
}
// Tool passed all pre-checks — queue for execution
pendingExec = append(pendingExec, pendingToolExec{tc: tc, toolKind: toolKind})
}
// --- Phase 2: Execute pending tools (parallel if multiple) ---
// Tool execution is stateless I/O — safe to parallelize.
// Cap concurrency at 4 to avoid overwhelming infrastructure.
execResults := make([]parallelToolResult, len(pendingExec))
if len(pendingExec) > 1 {
log.Info().
Int("tool_count", len(pendingExec)).
Str("session_id", sessionID).
Msg("[AgenticLoop] Executing multiple tools in parallel")
var wg sync.WaitGroup
sem := make(chan struct{}, 4)
for j, pe := range pendingExec {
wg.Add(1)
go func(idx int, tc providers.ToolCall) {
defer wg.Done()
sem <- struct{}{}
defer func() { <-sem }()
r, e := a.executeToolSafely(ctx, tc.Name, tc.Input)
execResults[idx] = parallelToolResult{Result: r, Err: e}
}(j, pe.tc)
}
wg.Wait()
} else if len(pendingExec) == 1 {
r, e := a.executeToolSafely(ctx, pendingExec[0].tc.Name, pendingExec[0].tc.Input)
execResults[0] = parallelToolResult{Result: r, Err: e}
}
// --- Phase 3: Post-process results in original order (sequential) ---
// Streaming events, FSM transitions, KA extraction, approval flow
// must all be sequential and in the original tool call order.
for j, pe := range pendingExec {
tc := pe.tc
toolKind := pe.toolKind
result := execResults[j].Result
err := execResults[j].Err
var resultText string
var isError bool
if err != nil {
resultText = fmt.Sprintf("Error: %v", err)
isError = true
} else {
resultText = FormatToolResult(result)
isError = result.IsError
if !isError {
toolsSucceededThisEpisode = true // Tool executed successfully
log.Debug().
Str("tool", tc.Name).
Msg("[AgenticLoop] Tool succeeded - toolsSucceededThisEpisode set to true")
}
// Extract and accumulate knowledge facts from both success and structured error responses
if a.knowledgeAccumulator != nil {
a.knowledgeAccumulator.SetTurn(turn)
facts := ExtractFacts(tc.Name, tc.Input, resultText)
for _, f := range facts {
a.knowledgeAccumulator.AddFactForTool(tc.ID, f.Category, f.Key, f.Value)
log.Debug().
Str("tool", tc.Name).
Str("fact_key", f.Key).
Int("value_len", len(f.Value)).
Msg("[AgenticLoop] Stored knowledge fact")
}
// If no facts were extracted but we predicted keys, store negative markers
// to prevent the gate from re-executing the same tool call.
if len(facts) == 0 && !isError {
if predictedKeys := PredictFactKeys(tc.Name, tc.Input); len(predictedKeys) > 0 {
summary := summarizeForNegativeMarker(resultText)
for _, key := range predictedKeys {
if _, found := a.knowledgeAccumulator.Lookup(key); !found {
cat := categoryForPredictedKey(key)
a.knowledgeAccumulator.AddFactForTool(tc.ID, cat, key, fmt.Sprintf("checked: %s", summary))
log.Debug().
Str("tool", tc.Name).
Str("fact_key", key).
Msg("[AgenticLoop] Stored negative marker (text response)")
}
}
}
}
}
}
if firstToolResultText == "" {
firstToolResultText = resultText
}
// Track pending recovery for strict resolution blocks
// (FSM blocks are tracked above; strict resolution blocks come from the executor)
if isError && fsm != nil && strings.Contains(resultText, "STRICT_RESOLUTION") {
fsm.TrackPendingRecovery("STRICT_RESOLUTION", tc.Name)
log.Debug().
Str("tool", tc.Name).
Msg("[AgenticLoop] Tracking pending recovery for strict resolution block")
}
// === AUTO-RECOVERY FOR STRUCTURED BLOCKS ===
// If a tool returns a blocked response with auto_recoverable=true and a
// governed recovery plan, automatically apply that plan and retry once.
// Note: err == nil means executor didn't throw, isError means the tool result indicates error/block
if err == nil && isError && strings.Contains(resultText, `"auto_recoverable":true`) {
// Result is a blocked response (not a hard error)
if recoveryPlan, recoveryAttempted := tryAutoRecovery(result, tc, a.executor, ctx); recoveryPlan != nil && !recoveryAttempted {
// This is a fresh recoverable block - attempt auto-recovery
log.Info().
Str("tool", tc.Name).
Str("recovery_tool", recoveryPlan.ToolName).
Interface("recovery_input", recoveryPlan.Input).
Msg("[AgenticLoop] Attempting structured auto-recovery")
// Record auto-recovery attempt
if metrics := GetAIMetrics(); metrics != nil {
metrics.RecordAutoRecoveryAttempt(recoveryPlan.ErrorCode, tc.Name)
}
retryResult, retryErr := a.executeToolSafely(ctx, recoveryPlan.ToolName, recoveryPlan.Input)
if retryErr != nil {
log.Warn().
Err(retryErr).
Str("tool", tc.Name).
Msg("[AgenticLoop] Auto-recovery retry failed with error")
} else if !retryResult.IsError {
// Recovery succeeded!
log.Info().
Str("tool", tc.Name).
Msg("[AgenticLoop] Auto-recovery succeeded")
if metrics := GetAIMetrics(); metrics != nil {
metrics.RecordAutoRecoverySuccess(recoveryPlan.ErrorCode, tc.Name)
}
// Use the successful result
result = retryResult
resultText = FormatToolResult(result)
isError = false
} else {
log.Warn().
Str("tool", tc.Name).
Str("retry_error", FormatToolResult(retryResult)).
Msg("[AgenticLoop] Auto-recovery retry still blocked")
// Keep original error but note the failed recovery attempt
resultText = resultText + "\n\n[Auto-recovery attempted but failed. Follow the recovery hint manually.]"
}
}
}
// Check if this is an approval request
if strings.HasPrefix(resultText, "APPROVAL_REQUIRED:") {
// Parse approval request
approvalJSON := strings.TrimPrefix(resultText, "APPROVAL_REQUIRED:")
approvalJSON = strings.TrimSpace(approvalJSON)
var approvalData struct {
ApprovalID string `json:"approval_id"`
Command string `json:"command"`
Risk string `json:"risk"`
Description string `json:"description"`
}
if err := json.Unmarshal([]byte(approvalJSON), &approvalData); err != nil {
log.Error().Err(err).Str("data", approvalJSON).Msg("failed to parse approval request")
resultText = "Error: failed to parse approval request"
isError = true
} else {
// Send approval_needed event
jsonData, _ := json.Marshal(ApprovalNeededData{
ApprovalID: approvalData.ApprovalID,
ToolID: tc.ID,
ToolName: tc.Name,
Command: approvalData.Command,
RunOnHost: true, // Control commands run on host
TargetHost: "", // Will be filled from approval context if available
Risk: approvalData.Risk,
Description: approvalData.Description,
})
callback(StreamEvent{Type: "approval_needed", Data: jsonData})
// In autonomous mode (investigations), don't wait for approval.
// Instead, return with approval info so the orchestrator can queue it.
a.mu.Lock()
isAutonomous := a.autonomousMode
loopOrgID := approval.NormalizeOrgID(a.orgID)
a.mu.Unlock()
if isAutonomous {
log.Debug().
Str("approval_id", approvalData.ApprovalID).
Str("command", approvalData.Command).
Msg("[AgenticLoop] Autonomous mode: returning approval request without waiting")
resultText = fmt.Sprintf("FIX_QUEUED: This action requires user approval. The fix has been queued for review. Approval ID: %s, Command: %s", approvalData.ApprovalID, approvalData.Command)
isError = false
} else {
// Wait for approval decision (poll with timeout)
store := approval.GetStore()
if store != nil {
approvalCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
decision, waitErr := waitForApprovalDecision(approvalCtx, store, approvalData.ApprovalID, loopOrgID)
cancel()
if waitErr != nil {
resultText = fmt.Sprintf("Approval timeout or error: %v", waitErr)
isError = true
} else if decision.Status == approval.StatusApproved {
// Re-execute the tool with approval granted
// Add approval_id to input so tool knows this is pre-approved
inputWithApproval := make(map[string]interface{})
for k, v := range tc.Input {
inputWithApproval[k] = v
}
inputWithApproval["_approval_id"] = approvalData.ApprovalID
result, err = a.executeToolSafely(ctx, tc.Name, inputWithApproval)
if err != nil {
resultText = fmt.Sprintf("Error after approval: %v", err)
isError = true
} else {
resultText = FormatToolResult(result)
isError = result.IsError
}
} else {
resultText = fmt.Sprintf("Command denied: %s", decision.DenyReason)
isError = false
}
} else {
resultText = "Approval system not available"
isError = true
}
}
}
}
if !isError {
anyToolSucceededThisTurn = true
}
// Send tool_end event
// Convert input to JSON string for frontend display
inputStr, rawInput := formatToolInputForFrontend(tc.Name, tc.Input, true)
jsonData, _ := json.Marshal(ToolEndData{
ID: tc.ID,
Name: tc.Name,
Input: inputStr,
RawInput: rawInput,
Output: resultText,
Success: !isError,
})
callback(StreamEvent{Type: "tool_end", Data: jsonData})
// === FSM STATE TRANSITION: Update FSM after successful tool execution ===
if fsm != nil && !isError {
fsm.OnToolSuccess(toolKind, tc.Name)
// If we just completed verification (read after write in VERIFYING), transition to READING
// This allows subsequent writes to proceed without being blocked
// CRITICAL: Must call this IMMEDIATELY after OnToolSuccess, not just when model gives final answer
if fsm.State == StateVerifying && fsm.ReadAfterWrite {
fsm.CompleteVerification()
log.Debug().
Str("tool", tc.Name).
Str("new_state", string(fsm.State)).
Msg("[AgenticLoop] FSM verification complete after read, transitioning to READING")
}
// If a write tool includes self-verification evidence, we can satisfy
// the "verify after write" invariant without requiring a separate read
// tool call (which may be stale depending on reporting cadence).
//
// Verification evidence is a structured field in the tool output:
// { "verification": { "ok": true, ... } }
if toolKind == ToolKindWrite && toolResultHasVerificationOK(resultText) {
fsm.OnToolSuccess(ToolKindRead, "self_verify")
if fsm.State == StateVerifying && fsm.ReadAfterWrite {
fsm.CompleteVerification()
}
writeCompletedLastTurn = true
log.Info().
Str("tool", tc.Name).
Str("new_state", string(fsm.State)).
Msg("[AgenticLoop] Write tool provided verification evidence; FSM verification satisfied")
}
log.Debug().
Str("tool", tc.Name).
Str("kind", toolKind.String()).
Str("new_state", string(fsm.State)).
Bool("wrote_this_episode", fsm.WroteThisEpisode).
Bool("read_after_write", fsm.ReadAfterWrite).
Msg("[AgenticLoop] FSM state transition after tool success")
// Check if this success resolves a pending recovery
if pr := fsm.CheckRecoverySuccess(tc.Name); pr != nil {
log.Info().
Str("tool", tc.Name).
Str("error_code", pr.ErrorCode).
Str("recovery_id", pr.RecoveryID).
Msg("[AgenticLoop] Auto-recovery succeeded")
if metrics := GetAIMetrics(); metrics != nil {
metrics.RecordAutoRecoverySuccess(pr.ErrorCode, pr.Tool)
}
}
}
// Compute model-facing result text AFTER auto-verify may have appended data.
// This ensures the model sees the verification result and task-completion signal.
modelResultText := truncateToolResultForModel(resultText)
// Create tool result message
toolResultMsg := Message{
ID: uuid.New().String(),
Role: "user", // Tool results are sent as user messages
Timestamp: time.Now(),
ToolResult: &ToolResult{
ToolUseID: tc.ID,
Content: resultText,
IsError: isError,
},
}
resultMessages = append(resultMessages, toolResultMsg)
// Add to provider messages for next turn
providerMessages = append(providerMessages, providers.Message{
Role: "user",
ToolResult: &providers.ToolResult{
ToolUseID: tc.ID,
Content: modelResultText,
IsError: isError,
},
})
}
// Track consecutive turns where ALL tool calls failed/were blocked.
// This catches stuck models that vary arguments to bypass identical-call detection.
{
if anyToolSucceededThisTurn {
consecutiveAllErrorTurns = 0
} else {
consecutiveAllErrorTurns++
if consecutiveAllErrorTurns >= 3 {
toolBlockedLastTurn = true
log.Warn().
Int("consecutive_all_error_turns", consecutiveAllErrorTurns).
Int("turn", turn).
Str("session_id", sessionID).
Msg("[AgenticLoop] All tool calls failed for 3 consecutive turns — forcing text-only response")
}
}
}
// If any tool call this turn was budget-blocked or loop-detected, force
// the model to produce text on the next turn. It already has the data from
// earlier successful calls — making more tool calls will just waste tokens.
if budgetBlockedThisTurn > 0 {
toolBlockedLastTurn = true
log.Warn().
Int("blocked", budgetBlockedThisTurn).
Int("total_calls", len(toolCalls)).
Int("turn", turn).
Str("session_id", sessionID).
Msg("[AgenticLoop] Tool calls blocked this turn — will force text-only next turn")
}
// Guardrail: in interactive chat mode, force wrap-up after repeated
// tool-only turns to avoid long no-answer chains.
a.mu.Lock()
autonomousMode := a.autonomousMode
a.mu.Unlock()
if !autonomousMode && !singleToolRequested && consecutiveToolOnlyTurns >= maxConsecutiveToolOnlyTurns {
toolBlockedLastTurn = true
log.Warn().
Int("consecutive_tool_only_turns", consecutiveToolOnlyTurns).
Int("turn", turn).
Str("session_id", sessionID).
Msg("[AgenticLoop] Consecutive tool-only turns exceeded threshold — forcing text-only response")
}
if singleToolEnforced && len(toolCalls) > 0 {
// Single tool request completed - ensure we have a proper response
// Don't just return raw tool output, let ensureFinalTextResponse synthesize if needed
resultMessages = a.ensureFinalTextResponse(ctx, sessionID, resultMessages, providerMessages, callback)
return resultMessages, nil
}
// Track cumulative tool calls and inject wrap-up nudge/escalation if threshold exceeded
totalToolCalls += len(toolCalls)
if !wrapUpNudgeFired && totalToolCalls >= wrapUpNudgeAfterCalls {
maybeInjectWrapUpNudge(providerMessages, totalToolCalls, maxTurns, turn, wrapUpNudgeAfterCalls)
wrapUpNudgeFired = true
} else if wrapUpNudgeFired && !wrapUpEscalateFired && totalToolCalls >= wrapUpEscalateAfterCalls {
maybeInjectWrapUpEscalation(providerMessages, totalToolCalls)
wrapUpEscalateFired = true
}
// Mark the start of the next turn's messages for compaction tracking
currentTurnStartIndex = len(providerMessages)
turn++
}
log.Warn().Int("max_turns", maxTurns).Str("session_id", sessionID).Msg("agentic loop hit max turns limit")
resultMessages = a.ensureFinalTextResponse(ctx, sessionID, resultMessages, providerMessages, callback)
return resultMessages, nil
}