From 5152e4d424004add5d042750017736d02038403d Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 20 Jan 2026 09:36:16 +0000
Subject: [PATCH 1/3] Add token compression TCP protocol server

Co-authored-by: nicsins <nicsins@gmail.com>
---
 python/helpers/token_compression_protocol.py | 554 +++++++++++++++++++
 1 file changed, 554 insertions(+)
 create mode 100644 python/helpers/token_compression_protocol.py

diff --git a/python/helpers/token_compression_protocol.py b/python/helpers/token_compression_protocol.py
new file mode 100644
index 000000000..13cc2ad6d
--- /dev/null
+++ b/python/helpers/token_compression_protocol.py
@@ -0,0 +1,554 @@
+import base64
+import json
+import os
+import re
+import socketserver
+import threading
+import time
+import uuid
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple
+
+from python.helpers import files, tokens
+
+
+BASE54_ALPHABET = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstyz"
+BASE54_INDEX = {char: idx for idx, char in enumerate(BASE54_ALPHABET)}
+BASE54_BASE = len(BASE54_ALPHABET)
+
+CONTROL_TAG_SHOW_SAVINGS = "**show savings**"
+CONTROL_TAG_SHOW_TOTAL = "**show total**"
+
+
+def _safe_count_tokens(text: str) -> int:
+    if not text:
+        return 0
+    try:
+        return tokens.count_tokens(text)
+    except Exception:
+        return max(1, len(text.split()))
+
+
+def _token_stats(raw_text: str, encoded_text: str) -> Dict[str, int]:
+    raw_tokens = _safe_count_tokens(raw_text)
+    encoded_tokens = _safe_count_tokens(encoded_text)
+    saved = raw_tokens - encoded_tokens
+    if saved < 0:
+        saved = 0
+    return {
+        "raw": raw_tokens,
+        "encoded": encoded_tokens,
+        "saved": saved,
+    }
+
+
+def _strip_control_tags(text: str) -> Tuple[str, bool, bool]:
+    show_savings = False
+    show_total = False
+    if not text:
+        return text, show_savings, show_total
+
+    if re.search(re.escape(CONTROL_TAG_SHOW_SAVINGS), text, flags=re.IGNORECASE):
+        show_savings = True
+        text = re.sub(
+            re.escape(CONTROL_TAG_SHOW_SAVINGS), "", text, flags=re.IGNORECASE
+        )
+    if re.search(re.escape(CONTROL_TAG_SHOW_TOTAL), text, flags=re.IGNORECASE):
+        show_total = True
+        show_savings = True
+        text = re.sub(
+            re.escape(CONTROL_TAG_SHOW_TOTAL), "", text, flags=re.IGNORECASE
+        )
+
+    return text.strip(), show_savings, show_total
+
+
+def b54encode(payload: bytes) -> str:
+    if not payload:
+        return ""
+    num = int.from_bytes(payload, "big")
+    encoded: List[str] = []
+    while num > 0:
+        num, rem = divmod(num, BASE54_BASE)
+        encoded.append(BASE54_ALPHABET[rem])
+    pad = 0
+    for byte in payload:
+        if byte == 0:
+            pad += 1
+        else:
+            break
+    encoded_str = "".join(reversed(encoded)) if encoded else ""
+    return (BASE54_ALPHABET[0] * pad) + encoded_str
+
+
+def b54decode(payload: str) -> bytes:
+    if payload == "":
+        return b""
+    num = 0
+    for char in payload:
+        if char not in BASE54_INDEX:
+            raise ValueError(f"Invalid base54 character: {char!r}")
+        num = num * BASE54_BASE + BASE54_INDEX[char]
+    pad = 0
+    for char in payload:
+        if char == BASE54_ALPHABET[0]:
+            pad += 1
+        else:
+            break
+    decoded = b""
+    if num > 0:
+        byte_len = (num.bit_length() + 7) // 8
+        decoded = num.to_bytes(byte_len, "big")
+    return (b"\x00" * pad) + decoded
+
+
+def _b64encode_text(text: str, encoding: str) -> str:
+    return base64.b64encode(text.encode(encoding, errors="replace")).decode("ascii")
+
+
+def _context_text(messages: List[Dict[str, str]]) -> str:
+    return "\n".join(f"{entry['role']}: {entry['text']}" for entry in messages).strip()
+
+
+@dataclass
+class ConversationState:
+    conversation_id: str
+    encoding: str = "utf-8"
+    language: str = "unknown"
+    messages: List[Dict[str, str]] = field(default_factory=list)
+    context_b64: str = ""
+    context_tokens: Dict[str, int] = field(default_factory=dict)
+    prompt_tokens_raw: int = 0
+    prompt_tokens_encoded: int = 0
+    response_tokens_raw: int = 0
+    response_tokens_encoded: int = 0
+    last_prompt_stats: Dict[str, int] = field(default_factory=dict)
+    last_response_stats: Dict[str, int] = field(default_factory=dict)
+    pending_show_savings: bool = False
+    pending_show_total: bool = False
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "conversation_id": self.conversation_id,
+            "encoding": self.encoding,
+            "language": self.language,
+            "messages": self.messages,
+            "context_b64": self.context_b64,
+            "context_tokens": self.context_tokens,
+            "prompt_tokens_raw": self.prompt_tokens_raw,
+            "prompt_tokens_encoded": self.prompt_tokens_encoded,
+            "response_tokens_raw": self.response_tokens_raw,
+            "response_tokens_encoded": self.response_tokens_encoded,
+        }
+
+    @classmethod
+    def from_dict(cls, payload: Dict[str, Any]) -> "ConversationState":
+        return cls(
+            conversation_id=payload.get("conversation_id", ""),
+            encoding=payload.get("encoding", "utf-8"),
+            language=payload.get("language", "unknown"),
+            messages=payload.get("messages", []),
+            context_b64=payload.get("context_b64", ""),
+            context_tokens=payload.get("context_tokens", {}),
+            prompt_tokens_raw=payload.get("prompt_tokens_raw", 0),
+            prompt_tokens_encoded=payload.get("prompt_tokens_encoded", 0),
+            response_tokens_raw=payload.get("response_tokens_raw", 0),
+            response_tokens_encoded=payload.get("response_tokens_encoded", 0),
+        )
+
+
+class ContextStore:
+    def __init__(self, dataset_path: str, refresh_interval: float = 5.0):
+        self.dataset_path = dataset_path
+        self.refresh_interval = refresh_interval
+        self._lock = threading.Lock()
+        self._dirty = False
+        self._conversations: Dict[str, ConversationState] = {}
+        self._stop_event = threading.Event()
+        self._load()
+        self._thread = threading.Thread(
+            target=self._maintenance_loop,
+            name="tcp-context-maintainer",
+            daemon=True,
+        )
+        self._thread.start()
+
+    def _load(self) -> None:
+        if not os.path.exists(self.dataset_path):
+            return
+        try:
+            with open(self.dataset_path, "r", encoding="utf-8") as handle:
+                data = json.load(handle)
+        except (OSError, json.JSONDecodeError):
+            return
+        conversations = data.get("conversations", {})
+        for conv_id, payload in conversations.items():
+            state = ConversationState.from_dict(payload)
+            if not state.conversation_id:
+                state.conversation_id = conv_id
+            self._conversations[conv_id] = state
+
+    def _maintenance_loop(self) -> None:
+        while not self._stop_event.wait(self.refresh_interval):
+            self._flush_if_dirty()
+
+    def _flush_if_dirty(self) -> None:
+        with self._lock:
+            if not self._dirty:
+                return
+            snapshot = self._snapshot_locked()
+            self._dirty = False
+        self._persist_snapshot(snapshot)
+
+    def _snapshot_locked(self) -> Dict[str, Any]:
+        for state in self._conversations.values():
+            self._refresh_context_locked(state)
+        return {
+            "updated_at": time.strftime("%Y-%m-%d %H:%M:%S"),
+            "conversations": {
+                conv_id: state.to_dict()
+                for conv_id, state in self._conversations.items()
+            },
+        }
+
+    def _persist_snapshot(self, snapshot: Dict[str, Any]) -> None:
+        os.makedirs(os.path.dirname(self.dataset_path), exist_ok=True)
+        tmp_path = f"{self.dataset_path}.tmp"
+        with open(tmp_path, "w", encoding="utf-8") as handle:
+            json.dump(snapshot, handle, ensure_ascii=True, indent=2)
+        os.replace(tmp_path, self.dataset_path)
+
+    def stop(self) -> None:
+        self._stop_event.set()
+        self._thread.join(timeout=self.refresh_interval)
+        self._flush_if_dirty()
+
+    def get_or_create(
+        self,
+        conversation_id: Optional[str],
+        encoding: Optional[str],
+        language: Optional[str],
+    ) -> ConversationState:
+        with self._lock:
+            if not conversation_id:
+                conversation_id = str(uuid.uuid4())
+            state = self._conversations.get(conversation_id)
+            if state is None:
+                state = ConversationState(conversation_id=conversation_id)
+                self._conversations[conversation_id] = state
+            if encoding:
+                state.encoding = encoding
+            if language:
+                state.language = language
+            return state
+
+    def list_contexts(self) -> Dict[str, Dict[str, Any]]:
+        with self._lock:
+            contexts = {}
+            for conv_id, state in self._conversations.items():
+                self._refresh_context_locked(state)
+                contexts[conv_id] = {
+                    "context_b64": state.context_b64,
+                    "encoding": state.encoding,
+                    "language": state.language,
+                    "context_tokens": state.context_tokens,
+                }
+            return contexts
+
+    def get_context(self, conversation_id: str) -> Optional[Dict[str, Any]]:
+        with self._lock:
+            state = self._conversations.get(conversation_id)
+            if not state:
+                return None
+            self._refresh_context_locked(state)
+            return {
+                "conversation_id": state.conversation_id,
+                "context_b64": state.context_b64,
+                "encoding": state.encoding,
+                "language": state.language,
+                "context_tokens": state.context_tokens,
+            }
+
+    def record_prompt(
+        self,
+        conversation_id: Optional[str],
+        text: str,
+        encoding: Optional[str],
+        language: Optional[str],
+    ) -> Dict[str, Any]:
+        state = self.get_or_create(conversation_id, encoding, language)
+        clean_text, show_savings, show_total = _strip_control_tags(text)
+        encoded_prompt = _b64encode_text(clean_text, state.encoding)
+        prompt_stats = _token_stats(clean_text, encoded_prompt)
+        with self._lock:
+            state.messages.append({"role": "user", "text": clean_text})
+            state.prompt_tokens_raw += prompt_stats["raw"]
+            state.prompt_tokens_encoded += prompt_stats["encoded"]
+            state.last_prompt_stats = prompt_stats
+            state.pending_show_savings = show_savings or show_total
+            state.pending_show_total = show_total
+            self._refresh_context_locked(state)
+            self._dirty = True
+            response = {
+                "conversation_id": state.conversation_id,
+                "encoding": state.encoding,
+                "language": state.language,
+                "encoded_prompt_b64": encoded_prompt,
+                "context_b64": state.context_b64,
+                "context_tokens": state.context_tokens,
+                "prompt_tokens": prompt_stats,
+                "savings_request": {
+                    "show_savings": state.pending_show_savings,
+                    "show_total": state.pending_show_total,
+                },
+            }
+        return response
+
+    def record_response(
+        self,
+        conversation_id: str,
+        payload_b54: str,
+    ) -> Dict[str, Any]:
+        with self._lock:
+            state = self._conversations.get(conversation_id)
+            if not state:
+                raise KeyError("Unknown conversation_id")
+            encoding = state.encoding
+            language = state.language
+
+        decoded_bytes = b54decode(payload_b54)
+        decoded_text = decoded_bytes.decode(encoding, errors="replace")
+        response_stats = _token_stats(decoded_text, payload_b54)
+
+        with self._lock:
+            state.messages.append({"role": "assistant", "text": decoded_text})
+            state.response_tokens_raw += response_stats["raw"]
+            state.response_tokens_encoded += response_stats["encoded"]
+            state.last_response_stats = response_stats
+            self._refresh_context_locked(state)
+            savings_payload = None
+            tagline = None
+            decoded_text_with_tagline = None
+            if state.pending_show_savings:
+                savings_payload = self._build_savings_payload(state)
+                tagline = self._format_tagline(
+                    savings_payload,
+                    include_total=state.pending_show_total,
+                )
+                decoded_text_with_tagline = (
+                    decoded_text + "\n" + tagline if decoded_text else tagline
+                )
+            state.pending_show_savings = False
+            state.pending_show_total = False
+            self._dirty = True
+            response = {
+                "conversation_id": state.conversation_id,
+                "encoding": encoding,
+                "language": language,
+                "response_b54": payload_b54,
+                "decoded_text": decoded_text,
+                "response_tokens": response_stats,
+                "context_b64": state.context_b64,
+                "context_tokens": state.context_tokens,
+            }
+            if savings_payload:
+                response["savings"] = savings_payload
+            if tagline:
+                response["tagline"] = tagline
+                response["decoded_text_with_tagline"] = decoded_text_with_tagline
+        return response
+
+    def _refresh_context_locked(self, state: ConversationState) -> None:
+        context_text = _context_text(state.messages)
+        state.context_b64 = _b64encode_text(context_text, state.encoding)
+        state.context_tokens = _token_stats(context_text, state.context_b64)
+
+    def _build_savings_payload(self, state: ConversationState) -> Dict[str, Any]:
+        prompt_stats = state.last_prompt_stats or {"raw": 0, "encoded": 0, "saved": 0}
+        response_stats = state.last_response_stats or {
+            "raw": 0,
+            "encoded": 0,
+            "saved": 0,
+        }
+        context_stats = state.context_tokens or {"raw": 0, "encoded": 0, "saved": 0}
+        combined_saved = (
+            prompt_stats.get("saved", 0)
+            + response_stats.get("saved", 0)
+            + context_stats.get("saved", 0)
+        )
+        totals = {
+            "prompt": {
+                "raw": state.prompt_tokens_raw,
+                "encoded": state.prompt_tokens_encoded,
+                "saved": max(
+                    0, state.prompt_tokens_raw - state.prompt_tokens_encoded
+                ),
+            },
+            "response": {
+                "raw": state.response_tokens_raw,
+                "encoded": state.response_tokens_encoded,
+                "saved": max(
+                    0, state.response_tokens_raw - state.response_tokens_encoded
+                ),
+            },
+            "context": context_stats,
+        }
+        totals["combined_saved"] = (
+            totals["prompt"]["saved"]
+            + totals["response"]["saved"]
+            + totals["context"]["saved"]
+        )
+        return {
+            "prompt": prompt_stats,
+            "response": response_stats,
+            "context": context_stats,
+            "combined_saved": combined_saved,
+            "totals": totals,
+        }
+
+    def _format_tagline(self, savings: Dict[str, Any], include_total: bool) -> str:
+        prompt_saved = savings["prompt"]["saved"]
+        response_saved = savings["response"]["saved"]
+        context_saved = savings["context"]["saved"]
+        combined_saved = savings["combined_saved"]
+        tagline = (
+            "Token savings (prompt/response/context/combined): "
+            f"{prompt_saved}/{response_saved}/{context_saved}/{combined_saved}."
+        )
+        if include_total:
+            totals = savings.get("totals", {})
+            totals_prompt = totals.get("prompt", {}).get("saved", 0)
+            totals_response = totals.get("response", {}).get("saved", 0)
+            totals_context = totals.get("context", {}).get("saved", 0)
+            totals_combined = totals.get("combined_saved", 0)
+            tagline += (
+                " Total savings (prompt/response/context/combined): "
+                f"{totals_prompt}/{totals_response}/{totals_context}/{totals_combined}."
+            )
+        return tagline
+
+
+class TokenCompressionProtocolProcessor:
+    def __init__(self, store: ContextStore):
+        self.store = store
+
+    def handle(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        action = payload.get("action")
+        if not action:
+            return {"ok": False, "error": "missing_action"}
+
+        if action == "prompt":
+            text = payload.get("text", "")
+            if not isinstance(text, str) or text == "":
+                return {"ok": False, "error": "missing_text"}
+            response = self.store.record_prompt(
+                conversation_id=payload.get("conversation_id"),
+                text=text,
+                encoding=payload.get("encoding"),
+                language=payload.get("language"),
+            )
+            return {"ok": True, "result": response}
+
+        if action == "response":
+            conversation_id = payload.get("conversation_id")
+            if not conversation_id:
+                return {"ok": False, "error": "missing_conversation_id"}
+            payload_b54 = payload.get("payload_b54", "")
+            if not isinstance(payload_b54, str) or payload_b54 == "":
+                return {"ok": False, "error": "missing_payload_b54"}
+            try:
+                response = self.store.record_response(
+                    conversation_id=conversation_id,
+                    payload_b54=payload_b54,
+                )
+            except KeyError:
+                return {"ok": False, "error": "unknown_conversation_id"}
+            except ValueError as exc:
+                return {"ok": False, "error": "invalid_base54", "detail": str(exc)}
+            return {"ok": True, "result": response}
+
+        if action == "context_get":
+            conversation_id = payload.get("conversation_id")
+            if conversation_id:
+                context = self.store.get_context(conversation_id)
+                if not context:
+                    return {"ok": False, "error": "unknown_conversation_id"}
+                return {"ok": True, "result": context}
+            return {"ok": True, "result": {"contexts": self.store.list_contexts()}}
+
+        if action == "context_reset":
+            conversation_id = payload.get("conversation_id")
+            if not conversation_id:
+                return {"ok": False, "error": "missing_conversation_id"}
+            with self.store._lock:
+                if conversation_id in self.store._conversations:
+                    del self.store._conversations[conversation_id]
+                    self.store._dirty = True
+                    return {"ok": True, "result": {"conversation_id": conversation_id}}
+            return {"ok": False, "error": "unknown_conversation_id"}
+
+        if action == "ping":
+            return {"ok": True, "result": {"message": "pong"}}
+
+        return {"ok": False, "error": "unknown_action"}
+
+
+class TokenCompressionTCPServer(socketserver.ThreadingTCPServer):
+    allow_reuse_address = True
+    daemon_threads = True
+
+    def __init__(self, server_address, RequestHandlerClass, processor):
+        super().__init__(server_address, RequestHandlerClass)
+        self.processor = processor
+
+
+class TokenCompressionRequestHandler(socketserver.StreamRequestHandler):
+    def handle(self) -> None:
+        while True:
+            raw_line = self.rfile.readline()
+            if not raw_line:
+                break
+            raw_line = raw_line.strip()
+            if not raw_line:
+                continue
+            try:
+                request = json.loads(raw_line.decode("utf-8"))
+            except json.JSONDecodeError as exc:
+                self._send({"ok": False, "error": "invalid_json", "detail": str(exc)})
+                continue
+            if not isinstance(request, dict):
+                self._send({"ok": False, "error": "invalid_payload"})
+                continue
+            response = self.server.processor.handle(request)
+            self._send(response)
+
+    def _send(self, payload: Dict[str, Any]) -> None:
+        encoded = json.dumps(payload, ensure_ascii=True).encode("utf-8") + b"\n"
+        self.wfile.write(encoded)
+
+
+def run_tcp_server(
+    host: str = "127.0.0.1",
+    port: int = 7543,
+    dataset_path: Optional[str] = None,
+    refresh_interval: float = 5.0,
+) -> None:
+    dataset_path = dataset_path or files.get_abs_path(
+        "memory", "token_compression_context.json"
+    )
+    store = ContextStore(dataset_path=dataset_path, refresh_interval=refresh_interval)
+    processor = TokenCompressionProtocolProcessor(store)
+    server = TokenCompressionTCPServer(
+        (host, port), TokenCompressionRequestHandler, processor
+    )
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        pass
+    finally:
+        store.stop()
+        server.server_close()
+
+
+if __name__ == "__main__":
+    run_tcp_server()

From 0940c6c8a7ecbed8df2ef8895483d37f6e16786a Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 20 Jan 2026 11:18:08 +0000
Subject: [PATCH 2/3] Document token compression protocol and extension flow

Co-authored-by: nicsins <nicsins@gmail.com>
---
 docs/README.md                     |   2 +
 docs/token_compression_protocol.md | 303 +++++++++++++++++++++++++++++
 2 files changed, 305 insertions(+)
 create mode 100644 docs/token_compression_protocol.md

diff --git a/docs/README.md b/docs/README.md
index 40ca2ff99..41e56d8a4 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -5,6 +5,7 @@ To begin with Agent Zero, follow the links below for detailed guides on various
 - **[Installation](installation.md):** Set up (or [update](installation.md#how-to-update-agent-zero)) Agent Zero on your system.
 - **[Usage Guide](usage.md):** Explore GUI features and usage scenarios.
 - **[Architecture Overview](architecture.md):** Understand the internal workings of the framework.
+- **[Token Compression Protocol](token_compression_protocol.md):** Run the TCP service and integrate it with clients (including browser extensions).
 - **[Contributing](contribution.md):** Learn how to contribute to the Agent Zero project.
 - **[Troubleshooting and FAQ](troubleshooting.md):** Find answers to common issues and questions.
 
@@ -59,6 +60,7 @@ To begin with Agent Zero, follow the links below for detailed guides on various
   - [Making Changes](contribution.md#making-changes)
   - [Submitting a Pull Request](contribution.md#submitting-a-pull-request)
   - [Documentation Stack](contribution.md#documentation-stack)
+- [Token Compression Protocol](token_compression_protocol.md)
 - [Troubleshooting and FAQ](troubleshooting.md)
   - [Frequently Asked Questions](troubleshooting.md#frequently-asked-questions)
   - [Troubleshooting](troubleshooting.md#troubleshooting)
\ No newline at end of file
diff --git a/docs/token_compression_protocol.md b/docs/token_compression_protocol.md
new file mode 100644
index 000000000..646efcf82
--- /dev/null
+++ b/docs/token_compression_protocol.md
@@ -0,0 +1,303 @@
+# Token Compression Protocol (TCP)
+
+This document defines an easy-to-implement protocol for compressing LLM prompts
+and responses while preserving the original encoding and a persistent context.
+It is designed to run as a local TCP service and integrate cleanly with browser
+extensions via a lightweight native-host bridge.
+
+## Goals
+
+- Encode user prompts in base64.
+- Accept model responses in base54.
+- Decode responses back into the original encoding (utf-8, ascii, etc).
+- Maintain a persistent, base64-rendered context across conversations.
+- Provide token savings diagnostics via `**show savings**` and `**show total**`.
+- Keep the wire format simple: newline-delimited JSON over TCP.
+
+## Server Overview
+
+The TCP server lives at:
+
+- Module: `python/helpers/token_compression_protocol.py`
+- Default host: `127.0.0.1`
+- Default port: `7543`
+- Context dataset: `memory/token_compression_context.json`
+
+Run it locally:
+
+```bash
+python3 /workspace/python/helpers/token_compression_protocol.py
+```
+
+The server accepts one JSON object per line and returns one JSON object per line.
+
+## Base54 Alphabet
+
+The response payload uses base54 with this alphabet:
+
+```
+123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstyz
+```
+
+This avoids ambiguous characters (0, O, I, l) and a few lower-case letters to
+hit an even base of 54.
+
+## Transport Protocol (TCP)
+
+Each request is a single JSON line terminated by `\n` (LF). Each response is
+also a single JSON line terminated by `\n`.
+
+### Common Envelope
+
+All responses include:
+
+```json
+{
+  "ok": true,
+  "result": { ... }
+}
+```
+
+Errors are returned as:
+
+```json
+{
+  "ok": false,
+  "error": "error_code",
+  "detail": "optional detail"
+}
+```
+
+### Actions
+
+#### `prompt`
+
+Encode a user prompt to base64, update context, and return the new context.
+
+Request:
+
+```json
+{
+  "action": "prompt",
+  "conversation_id": "optional",
+  "text": "user prompt text",
+  "encoding": "utf-8",
+  "language": "en"
+}
+```
+
+Notes:
+- If `conversation_id` is omitted, the server generates one.
+- `encoding` and `language` are stored and reused for the conversation.
+- If `**show savings**` or `**show total**` is present in `text`, it is stripped
+  before encoding and applied to the next `response`.
+
+Response:
+
+```json
+{
+  "ok": true,
+  "result": {
+    "conversation_id": "uuid",
+    "encoding": "utf-8",
+    "language": "en",
+    "encoded_prompt_b64": "SGVsbG8=",
+    "context_b64": "dXNlcjogSGVsbG8=",
+    "context_tokens": { "raw": 2, "encoded": 2, "saved": 0 },
+    "prompt_tokens": { "raw": 2, "encoded": 2, "saved": 0 },
+    "savings_request": { "show_savings": true, "show_total": false }
+  }
+}
+```
+
+#### `response`
+
+Submit a base54 response payload from the model, decode it, and update context.
+
+Request:
+
+```json
+{
+  "action": "response",
+  "conversation_id": "uuid",
+  "payload_b54": "base54response"
+}
+```
+
+Response (base form):
+
+```json
+{
+  "ok": true,
+  "result": {
+    "conversation_id": "uuid",
+    "encoding": "utf-8",
+    "language": "en",
+    "response_b54": "base54response",
+    "decoded_text": "model response",
+    "response_tokens": { "raw": 4, "encoded": 3, "saved": 1 },
+    "context_b64": "dXNlcjogSGVsbG8K...==",
+    "context_tokens": { "raw": 6, "encoded": 5, "saved": 1 }
+  }
+}
+```
+
+If `**show savings**` or `**show total**` was set in the last prompt, the
+response includes a `savings` object, `tagline`, and `decoded_text_with_tagline`:
+
+```json
+{
+  "ok": true,
+  "result": {
+    "...": "...",
+    "tagline": "Token savings (prompt/response/context/combined): 0/1/1/2.",
+    "decoded_text_with_tagline": "model response\nToken savings ...",
+    "savings": {
+      "prompt": { "raw": 2, "encoded": 2, "saved": 0 },
+      "response": { "raw": 4, "encoded": 3, "saved": 1 },
+      "context": { "raw": 6, "encoded": 5, "saved": 1 },
+      "combined_saved": 2,
+      "totals": {
+        "prompt": { "raw": 10, "encoded": 10, "saved": 0 },
+        "response": { "raw": 20, "encoded": 18, "saved": 2 },
+        "context": { "raw": 30, "encoded": 25, "saved": 5 },
+        "combined_saved": 7
+      }
+    }
+  }
+}
+```
+
+#### `context_get`
+
+Fetch the current base64 context for a conversation (or all conversations).
+
+Request:
+
+```json
+{
+  "action": "context_get",
+  "conversation_id": "uuid"
+}
+```
+
+Response:
+
+```json
+{
+  "ok": true,
+  "result": {
+    "conversation_id": "uuid",
+    "context_b64": "dXNlcjogSGVsbG8=",
+    "encoding": "utf-8",
+    "language": "en",
+    "context_tokens": { "raw": 2, "encoded": 2, "saved": 0 }
+  }
+}
+```
+
+#### `context_reset`
+
+Delete a conversation from the dataset.
+
+Request:
+
+```json
+{
+  "action": "context_reset",
+  "conversation_id": "uuid"
+}
+```
+
+#### `ping`
+
+Request:
+
+```json
+{ "action": "ping" }
+```
+
+Response:
+
+```json
+{ "ok": true, "result": { "message": "pong" } }
+```
+
+## Browser Extension Integration
+
+Browsers cannot open raw TCP sockets directly. The easiest integration pattern
+is a lightweight local bridge that the extension can message.
+
+### Option A: Native Messaging Host (Recommended)
+
+Use the browser's native messaging API to launch a small helper process that
+connects to the TCP server and forwards JSON lines.
+
+Flow:
+
+1. Extension sends a JSON message to the native host.
+2. Native host writes the JSON line to `127.0.0.1:7543`.
+3. Native host reads the JSON response line and returns it to the extension.
+
+Advantages:
+- Works in Chrome and Firefox.
+- No CORS or HTTP server needed.
+- Minimal bridging logic (just pass-through JSON lines).
+
+Native host pseudo-code:
+
+```python
+import json, socket, sys
+
+def tcp_exchange(payload):
+    data = json.dumps(payload).encode("utf-8") + b"\n"
+    with socket.create_connection(("127.0.0.1", 7543)) as sock:
+        sock.sendall(data)
+        response = sock.recv(1024 * 1024).split(b"\n", 1)[0]
+    return json.loads(response.decode("utf-8"))
+```
+
+### Option B: Local HTTP/WS Bridge
+
+If you prefer `fetch` or WebSocket from the extension, run a local bridge that
+translates HTTP/WS into the TCP line protocol:
+
+- `POST /tcp` -> send JSON line over TCP, return JSON response
+- `GET /context/:conversation_id` -> map to `context_get`
+
+This is a thin shim and keeps the TCP protocol unchanged.
+
+## Example End-to-End Session
+
+1) Encode prompt:
+
+```json
+{"action":"prompt","text":"Summarize this. **show savings**","encoding":"utf-8","language":"en"}
+```
+
+2) Send `encoded_prompt_b64` to the model (outside TCP server).
+
+3) Encode model output to base54 (client-side), then send:
+
+```json
+{"action":"response","conversation_id":"...","payload_b54":"..."}
+```
+
+4) Receive decoded text plus savings tagline.
+
+## Data Persistence
+
+Context is stored in `memory/token_compression_context.json`. The server keeps a
+background thread that refreshes and persists the context every few seconds.
+
+## Security Notes
+
+- Run the TCP server on `127.0.0.1` only.
+- Treat `context_b64` as sensitive; it contains full conversation history.
+- Use the native messaging approach if you need strict extension isolation.
+
+## Troubleshooting
+
+- `missing_payload_b54`: Ensure you send base54 for responses, not base64.
+- `invalid_base54`: Check the alphabet and strip any non-base54 characters.
+- `unknown_conversation_id`: Use the `conversation_id` returned by `prompt`.

From f8f256704b33171e80ef4ad68a942e26c27adcf7 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 20 Jan 2026 11:28:50 +0000
Subject: [PATCH 3/3] Add simple Tkinter chat GUI for Ollama/OpenRouter

Co-authored-by: nicsins <nicsins@gmail.com>
---
 run_simple_gui.py | 376 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 376 insertions(+)
 create mode 100644 run_simple_gui.py

diff --git a/run_simple_gui.py b/run_simple_gui.py
new file mode 100644
index 000000000..557c1f058
--- /dev/null
+++ b/run_simple_gui.py
@@ -0,0 +1,376 @@
+import json
+import os
+import threading
+import tkinter as tk
+from tkinter import ttk
+from tkinter.scrolledtext import ScrolledText
+from urllib import error as url_error
+from urllib import request as url_request
+
+from python.helpers import tokens
+
+
+class SimpleChatGUI:
+    def __init__(self, root: tk.Tk) -> None:
+        self.root = root
+        self.root.title("Simple LLM Chat")
+        self.root.minsize(980, 640)
+
+        self.messages: list[dict[str, str]] = []
+        self.context_text = ""
+
+        self.provider_var = tk.StringVar(value="Ollama")
+        self.model_var = tk.StringVar(
+            value=os.environ.get("OLLAMA_MODEL", "llama3")
+        )
+        self.api_key_var = tk.StringVar(value=os.environ.get("OPENROUTER_API_KEY", ""))
+        self.status_var = tk.StringVar(value="Ready")
+        self.prompt_tokens_var = tk.StringVar(value="Prompt tokens: 0")
+        self.response_tokens_var = tk.StringVar(value="Response tokens: 0")
+        self.context_tokens_var = tk.StringVar(value="Context tokens: 0")
+        self.total_tokens_var = tk.StringVar(value="Total tokens: 0")
+
+        self._build_layout()
+        self._bind_events()
+
+    def _build_layout(self) -> None:
+        main_frame = ttk.Frame(self.root, padding=10)
+        main_frame.pack(fill="both", expand=True)
+
+        main_frame.columnconfigure(0, weight=3)
+        main_frame.columnconfigure(1, weight=1)
+        main_frame.rowconfigure(0, weight=1)
+
+        chat_frame = ttk.Frame(main_frame)
+        chat_frame.grid(row=0, column=0, sticky="nsew", padx=(0, 10))
+        chat_frame.rowconfigure(0, weight=1)
+        chat_frame.columnconfigure(0, weight=1)
+
+        self.chat_display = ScrolledText(chat_frame, wrap="word", state="disabled")
+        self.chat_display.grid(row=0, column=0, columnspan=2, sticky="nsew")
+
+        self.input_text = ScrolledText(chat_frame, height=6, wrap="word")
+        self.input_text.grid(row=1, column=0, sticky="nsew", pady=(10, 0))
+
+        button_frame = ttk.Frame(chat_frame)
+        button_frame.grid(row=1, column=1, sticky="se", padx=(10, 0), pady=(10, 0))
+
+        self.send_button = ttk.Button(button_frame, text="Send", command=self.on_send)
+        self.send_button.pack(fill="x", pady=(0, 6))
+
+        self.clear_button = ttk.Button(
+            button_frame, text="New Chat", command=self.on_clear
+        )
+        self.clear_button.pack(fill="x")
+
+        sidebar = ttk.Frame(main_frame)
+        sidebar.grid(row=0, column=1, sticky="nsew")
+        sidebar.rowconfigure(2, weight=1)
+        sidebar.columnconfigure(0, weight=1)
+
+        settings_frame = ttk.LabelFrame(sidebar, text="Settings")
+        settings_frame.grid(row=0, column=0, sticky="ew", pady=(0, 10))
+        settings_frame.columnconfigure(1, weight=1)
+
+        ttk.Label(settings_frame, text="Provider").grid(
+            row=0, column=0, sticky="w", padx=8, pady=4
+        )
+        provider_combo = ttk.Combobox(
+            settings_frame,
+            textvariable=self.provider_var,
+            values=["Ollama", "OpenRouter"],
+            state="readonly",
+        )
+        provider_combo.grid(row=0, column=1, sticky="ew", padx=8, pady=4)
+
+        ttk.Label(settings_frame, text="Model").grid(
+            row=1, column=0, sticky="w", padx=8, pady=4
+        )
+        self.model_combo = ttk.Combobox(
+            settings_frame, textvariable=self.model_var, values=[]
+        )
+        self.model_combo.grid(row=1, column=1, sticky="ew", padx=8, pady=4)
+
+        self.refresh_button = ttk.Button(
+            settings_frame, text="Load Ollama Models", command=self.on_refresh_models
+        )
+        self.refresh_button.grid(row=2, column=0, columnspan=2, sticky="ew", padx=8, pady=4)
+
+        ttk.Label(settings_frame, text="OpenRouter API Key").grid(
+            row=3, column=0, sticky="w", padx=8, pady=4
+        )
+        self.api_key_entry = ttk.Entry(
+            settings_frame, textvariable=self.api_key_var, show="*"
+        )
+        self.api_key_entry.grid(row=3, column=1, sticky="ew", padx=8, pady=4)
+
+        context_frame = ttk.LabelFrame(sidebar, text="Conversation Context")
+        context_frame.grid(row=1, column=0, sticky="nsew", pady=(0, 10))
+        context_frame.columnconfigure(0, weight=1)
+        context_frame.rowconfigure(0, weight=1)
+
+        self.context_display = ScrolledText(
+            context_frame, wrap="word", height=12, state="disabled"
+        )
+        self.context_display.grid(row=0, column=0, sticky="nsew", padx=6, pady=6)
+
+        tokens_frame = ttk.LabelFrame(sidebar, text="Token Counts")
+        tokens_frame.grid(row=2, column=0, sticky="ew")
+
+        ttk.Label(tokens_frame, textvariable=self.prompt_tokens_var).pack(
+            anchor="w", padx=8, pady=2
+        )
+        ttk.Label(tokens_frame, textvariable=self.response_tokens_var).pack(
+            anchor="w", padx=8, pady=2
+        )
+        ttk.Label(tokens_frame, textvariable=self.context_tokens_var).pack(
+            anchor="w", padx=8, pady=2
+        )
+        ttk.Label(tokens_frame, textvariable=self.total_tokens_var).pack(
+            anchor="w", padx=8, pady=2
+        )
+
+        status_bar = ttk.Label(
+            self.root, textvariable=self.status_var, anchor="w"
+        )
+        status_bar.pack(fill="x")
+
+        self._refresh_provider_ui()
+
+    def _bind_events(self) -> None:
+        self.input_text.bind("<Control-Return>", lambda _evt: self.on_send())
+        self.provider_var.trace_add("write", lambda *_: self._refresh_provider_ui())
+
+    def _refresh_provider_ui(self) -> None:
+        provider = self.provider_var.get().strip()
+        if provider == "Ollama":
+            self.refresh_button.state(["!disabled"])
+            self.api_key_entry.state(["disabled"])
+        else:
+            self.api_key_entry.state(["!disabled"])
+            if not self.model_var.get().strip():
+                self.model_var.set(
+                    os.environ.get("OPENROUTER_MODEL", "openai/gpt-4o-mini")
+                )
+            self.refresh_button.state(["disabled"])
+
+    def on_refresh_models(self) -> None:
+        if self.provider_var.get() != "Ollama":
+            return
+        self._set_status("Loading Ollama models...")
+        threading.Thread(target=self._load_ollama_models, daemon=True).start()
+
+    def _load_ollama_models(self) -> None:
+        try:
+            response = self._post_json(
+                "http://localhost:11434/api/tags", payload={}, method="GET"
+            )
+            models = sorted([entry["name"] for entry in response.get("models", [])])
+        except Exception as exc:
+            self.root.after(
+                0, lambda: self._set_status(f"Failed to load Ollama models: {exc}")
+            )
+            return
+
+        def update_models() -> None:
+            self.model_combo["values"] = models
+            if models and self.model_var.get() not in models:
+                self.model_var.set(models[0])
+            self._set_status("Ollama models loaded.")
+
+        self.root.after(0, update_models)
+
+    def on_send(self) -> None:
+        user_text = self.input_text.get("1.0", "end").strip()
+        if not user_text:
+            return
+        if not self.model_var.get().strip():
+            self._set_status("Please enter a model name.")
+            return
+
+        if self.provider_var.get() == "OpenRouter" and not self.api_key_var.get().strip():
+            self._set_status("OpenRouter API key is required.")
+            return
+
+        self._append_chat("user", user_text)
+        self.messages.append({"role": "user", "content": user_text})
+        self._update_context_display()
+        self._update_prompt_tokens(user_text)
+        self.response_tokens_var.set("Response tokens: 0")
+        self._set_status("Sending...")
+        self._set_busy(True)
+
+        self.input_text.delete("1.0", "end")
+
+        messages_snapshot = list(self.messages)
+        provider = self.provider_var.get()
+        model = self.model_var.get().strip()
+        api_key = self.api_key_var.get().strip()
+
+        threading.Thread(
+            target=self._call_model,
+            args=(provider, model, api_key, messages_snapshot),
+            daemon=True,
+        ).start()
+
+    def _call_model(
+        self,
+        provider: str,
+        model: str,
+        api_key: str,
+        messages: list[dict[str, str]],
+    ) -> None:
+        try:
+            if provider == "Ollama":
+                response_text = self._call_ollama(model, messages)
+            else:
+                response_text = self._call_openrouter(model, api_key, messages)
+        except Exception as exc:
+            self.root.after(0, lambda: self._handle_error(exc))
+            return
+        self.root.after(0, lambda: self._handle_response(response_text))
+
+    def _handle_response(self, response_text: str) -> None:
+        self._append_chat("assistant", response_text)
+        self.messages.append({"role": "assistant", "content": response_text})
+        self._update_context_display()
+        self._update_token_counts(response_text)
+        self._set_busy(False)
+        self._set_status("Ready")
+
+    def _handle_error(self, exc: Exception) -> None:
+        self._append_chat("assistant", f"[Error] {exc}")
+        self._set_busy(False)
+        self._set_status("Error while calling model.")
+
+    def on_clear(self) -> None:
+        self.messages.clear()
+        self.context_text = ""
+        self.chat_display.configure(state="normal")
+        self.chat_display.delete("1.0", "end")
+        self.chat_display.configure(state="disabled")
+        self._append_chat("assistant", "[New conversation started]")
+        self._update_context_display()
+        self.prompt_tokens_var.set("Prompt tokens: 0")
+        self.response_tokens_var.set("Response tokens: 0")
+        self.context_tokens_var.set("Context tokens: 0")
+        self.total_tokens_var.set("Total tokens: 0")
+
+    def _append_chat(self, role: str, content: str) -> None:
+        self.chat_display.configure(state="normal")
+        label = "You" if role == "user" else "Assistant"
+        self.chat_display.insert("end", f"{label}: {content}\n\n")
+        self.chat_display.configure(state="disabled")
+        self.chat_display.see("end")
+
+    def _update_context_display(self) -> None:
+        self.context_text = "\n".join(
+            f"{message['role']}: {message['content']}" for message in self.messages
+        )
+        self.context_display.configure(state="normal")
+        self.context_display.delete("1.0", "end")
+        self.context_display.insert("end", self.context_text)
+        self.context_display.configure(state="disabled")
+        self._update_context_tokens()
+
+    def _update_token_counts(self, response_text: str) -> None:
+        prompt_text = self.messages[-2]["content"] if len(self.messages) >= 2 else ""
+        prompt_tokens = self._safe_count_tokens(prompt_text)
+        response_tokens = self._safe_count_tokens(response_text)
+        context_tokens = self._safe_count_tokens(self.context_text)
+        total_tokens = context_tokens
+
+        self.prompt_tokens_var.set(f"Prompt tokens: {prompt_tokens}")
+        self.response_tokens_var.set(f"Response tokens: {response_tokens}")
+        self.context_tokens_var.set(f"Context tokens: {context_tokens}")
+        self.total_tokens_var.set(f"Total tokens: {total_tokens}")
+
+    def _update_prompt_tokens(self, prompt_text: str) -> None:
+        prompt_tokens = self._safe_count_tokens(prompt_text)
+        self.prompt_tokens_var.set(f"Prompt tokens: {prompt_tokens}")
+
+    def _update_context_tokens(self) -> None:
+        context_tokens = self._safe_count_tokens(self.context_text)
+        self.context_tokens_var.set(f"Context tokens: {context_tokens}")
+        self.total_tokens_var.set(f"Total tokens: {context_tokens}")
+
+    def _set_status(self, message: str) -> None:
+        self.status_var.set(message)
+
+    def _set_busy(self, busy: bool) -> None:
+        if busy:
+            self.send_button.state(["disabled"])
+        else:
+            self.send_button.state(["!disabled"])
+
+    def _safe_count_tokens(self, text: str) -> int:
+        if not text:
+            return 0
+        try:
+            return tokens.count_tokens(text)
+        except Exception:
+            return len(text.split())
+
+    def _call_ollama(self, model: str, messages: list[dict[str, str]]) -> str:
+        payload = {"model": model, "messages": messages, "stream": False}
+        response = self._post_json("http://localhost:11434/api/chat", payload=payload)
+        message = response.get("message", {})
+        return str(message.get("content", "")).strip()
+
+    def _call_openrouter(
+        self, model: str, api_key: str, messages: list[dict[str, str]]
+    ) -> str:
+        payload = {
+            "model": model,
+            "messages": messages,
+            "temperature": 0.7,
+        }
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+            "HTTP-Referer": "http://localhost",
+            "X-Title": "Agent Zero Simple GUI",
+        }
+        response = self._post_json(
+            "https://openrouter.ai/api/v1/chat/completions",
+            payload=payload,
+            headers=headers,
+        )
+        choices = response.get("choices", [])
+        if not choices:
+            return ""
+        message = choices[0].get("message", {})
+        return str(message.get("content", "")).strip()
+
+    def _post_json(
+        self,
+        url: str,
+        payload: dict[str, object],
+        headers: dict[str, str] | None = None,
+        method: str = "POST",
+    ) -> dict[str, object]:
+        data = None if method == "GET" else json.dumps(payload).encode("utf-8")
+        req_headers = {"Content-Type": "application/json"}
+        if headers:
+            req_headers.update(headers)
+        req = url_request.Request(url, data=data, headers=req_headers, method=method)
+        try:
+            with url_request.urlopen(req, timeout=120) as resp:
+                body = resp.read().decode("utf-8")
+        except url_error.HTTPError as exc:
+            detail = exc.read().decode("utf-8", errors="replace")
+            raise RuntimeError(f"HTTP {exc.code}: {detail}") from exc
+        except url_error.URLError as exc:
+            raise RuntimeError(f"Connection error: {exc}") from exc
+        return json.loads(body) if body else {}
+
+
+def main() -> None:
+    root = tk.Tk()
+    app = SimpleChatGUI(root)
+    app._append_chat("assistant", "Welcome! Enter a prompt to begin.")
+    root.mainloop()
+
+
+if __name__ == "__main__":
+    main()