From c80a56512418ffbd3e48db8c4aaa4e9c45cfdfe5 Mon Sep 17 00:00:00 2001 From: Vanja Emichi Date: Mon, 9 Mar 2026 03:15:39 +0000 Subject: [PATCH] fix(history): strip binary image blobs from chat.json serialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RawMessage content (base64-encoded images) was being written verbatim to chat.json during disk serialization. This caused two problems: 1. chat.json bloated to MBs after any image-containing conversation 2. Reloaded chats would re-send the full image blob to the LLM on every subsequent call, inflating token usage and context size. Fix: detect RawMessage content in Message.to_dict() and replace it with the preview text (if present) or a placeholder string before persisting. Binary blobs are ephemeral — they are only valid for the current LLM session and should never be stored on disk. Co-authored-by: BMad Master --- helpers/history.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/helpers/history.py b/helpers/history.py index be51f19f3..aed281fa6 100644 --- a/helpers/history.py +++ b/helpers/history.py @@ -111,10 +111,17 @@ class Message(Record): return output_text(self.output(), ai_label, human_label) def to_dict(self): + # Strip binary RawMessage content (e.g. base64 images) before disk serialization. + # raw_content blobs are for single-session LLM use only — persist only the preview text. + # Without this, chat.json bloats to MBs and reloaded chats re-send images on every LLM call. + content = self.content + if _is_raw_message(content): + preview = content.get("preview", "") # type: ignore + content = preview if preview else "" return { "_cls": "Message", "ai": self.ai, - "content": self.content, + "content": content, "summary": self.summary, "tokens": self.tokens, }