From 5152e4d424004add5d042750017736d02038403d Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 20 Jan 2026 09:36:16 +0000 Subject: [PATCH 1/3] Add token compression TCP protocol server Co-authored-by: nicsins --- python/helpers/token_compression_protocol.py | 554 +++++++++++++++++++ 1 file changed, 554 insertions(+) create mode 100644 python/helpers/token_compression_protocol.py diff --git a/python/helpers/token_compression_protocol.py b/python/helpers/token_compression_protocol.py new file mode 100644 index 000000000..13cc2ad6d --- /dev/null +++ b/python/helpers/token_compression_protocol.py @@ -0,0 +1,554 @@ +import base64 +import json +import os +import re +import socketserver +import threading +import time +import uuid +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Tuple + +from python.helpers import files, tokens + + +BASE54_ALPHABET = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstyz" +BASE54_INDEX = {char: idx for idx, char in enumerate(BASE54_ALPHABET)} +BASE54_BASE = len(BASE54_ALPHABET) + +CONTROL_TAG_SHOW_SAVINGS = "**show savings**" +CONTROL_TAG_SHOW_TOTAL = "**show total**" + + +def _safe_count_tokens(text: str) -> int: + if not text: + return 0 + try: + return tokens.count_tokens(text) + except Exception: + return max(1, len(text.split())) + + +def _token_stats(raw_text: str, encoded_text: str) -> Dict[str, int]: + raw_tokens = _safe_count_tokens(raw_text) + encoded_tokens = _safe_count_tokens(encoded_text) + saved = raw_tokens - encoded_tokens + if saved < 0: + saved = 0 + return { + "raw": raw_tokens, + "encoded": encoded_tokens, + "saved": saved, + } + + +def _strip_control_tags(text: str) -> Tuple[str, bool, bool]: + show_savings = False + show_total = False + if not text: + return text, show_savings, show_total + + if re.search(re.escape(CONTROL_TAG_SHOW_SAVINGS), text, flags=re.IGNORECASE): + show_savings = True + text = re.sub( + re.escape(CONTROL_TAG_SHOW_SAVINGS), "", text, flags=re.IGNORECASE + ) + if re.search(re.escape(CONTROL_TAG_SHOW_TOTAL), text, flags=re.IGNORECASE): + show_total = True + show_savings = True + text = re.sub( + re.escape(CONTROL_TAG_SHOW_TOTAL), "", text, flags=re.IGNORECASE + ) + + return text.strip(), show_savings, show_total + + +def b54encode(payload: bytes) -> str: + if not payload: + return "" + num = int.from_bytes(payload, "big") + encoded: List[str] = [] + while num > 0: + num, rem = divmod(num, BASE54_BASE) + encoded.append(BASE54_ALPHABET[rem]) + pad = 0 + for byte in payload: + if byte == 0: + pad += 1 + else: + break + encoded_str = "".join(reversed(encoded)) if encoded else "" + return (BASE54_ALPHABET[0] * pad) + encoded_str + + +def b54decode(payload: str) -> bytes: + if payload == "": + return b"" + num = 0 + for char in payload: + if char not in BASE54_INDEX: + raise ValueError(f"Invalid base54 character: {char!r}") + num = num * BASE54_BASE + BASE54_INDEX[char] + pad = 0 + for char in payload: + if char == BASE54_ALPHABET[0]: + pad += 1 + else: + break + decoded = b"" + if num > 0: + byte_len = (num.bit_length() + 7) // 8 + decoded = num.to_bytes(byte_len, "big") + return (b"\x00" * pad) + decoded + + +def _b64encode_text(text: str, encoding: str) -> str: + return base64.b64encode(text.encode(encoding, errors="replace")).decode("ascii") + + +def _context_text(messages: List[Dict[str, str]]) -> str: + return "\n".join(f"{entry['role']}: {entry['text']}" for entry in messages).strip() + + +@dataclass +class ConversationState: + conversation_id: str + encoding: str = "utf-8" + language: str = "unknown" + messages: List[Dict[str, str]] = field(default_factory=list) + context_b64: str = "" + context_tokens: Dict[str, int] = field(default_factory=dict) + prompt_tokens_raw: int = 0 + prompt_tokens_encoded: int = 0 + response_tokens_raw: int = 0 + response_tokens_encoded: int = 0 + last_prompt_stats: Dict[str, int] = field(default_factory=dict) + last_response_stats: Dict[str, int] = field(default_factory=dict) + pending_show_savings: bool = False + pending_show_total: bool = False + + def to_dict(self) -> Dict[str, Any]: + return { + "conversation_id": self.conversation_id, + "encoding": self.encoding, + "language": self.language, + "messages": self.messages, + "context_b64": self.context_b64, + "context_tokens": self.context_tokens, + "prompt_tokens_raw": self.prompt_tokens_raw, + "prompt_tokens_encoded": self.prompt_tokens_encoded, + "response_tokens_raw": self.response_tokens_raw, + "response_tokens_encoded": self.response_tokens_encoded, + } + + @classmethod + def from_dict(cls, payload: Dict[str, Any]) -> "ConversationState": + return cls( + conversation_id=payload.get("conversation_id", ""), + encoding=payload.get("encoding", "utf-8"), + language=payload.get("language", "unknown"), + messages=payload.get("messages", []), + context_b64=payload.get("context_b64", ""), + context_tokens=payload.get("context_tokens", {}), + prompt_tokens_raw=payload.get("prompt_tokens_raw", 0), + prompt_tokens_encoded=payload.get("prompt_tokens_encoded", 0), + response_tokens_raw=payload.get("response_tokens_raw", 0), + response_tokens_encoded=payload.get("response_tokens_encoded", 0), + ) + + +class ContextStore: + def __init__(self, dataset_path: str, refresh_interval: float = 5.0): + self.dataset_path = dataset_path + self.refresh_interval = refresh_interval + self._lock = threading.Lock() + self._dirty = False + self._conversations: Dict[str, ConversationState] = {} + self._stop_event = threading.Event() + self._load() + self._thread = threading.Thread( + target=self._maintenance_loop, + name="tcp-context-maintainer", + daemon=True, + ) + self._thread.start() + + def _load(self) -> None: + if not os.path.exists(self.dataset_path): + return + try: + with open(self.dataset_path, "r", encoding="utf-8") as handle: + data = json.load(handle) + except (OSError, json.JSONDecodeError): + return + conversations = data.get("conversations", {}) + for conv_id, payload in conversations.items(): + state = ConversationState.from_dict(payload) + if not state.conversation_id: + state.conversation_id = conv_id + self._conversations[conv_id] = state + + def _maintenance_loop(self) -> None: + while not self._stop_event.wait(self.refresh_interval): + self._flush_if_dirty() + + def _flush_if_dirty(self) -> None: + with self._lock: + if not self._dirty: + return + snapshot = self._snapshot_locked() + self._dirty = False + self._persist_snapshot(snapshot) + + def _snapshot_locked(self) -> Dict[str, Any]: + for state in self._conversations.values(): + self._refresh_context_locked(state) + return { + "updated_at": time.strftime("%Y-%m-%d %H:%M:%S"), + "conversations": { + conv_id: state.to_dict() + for conv_id, state in self._conversations.items() + }, + } + + def _persist_snapshot(self, snapshot: Dict[str, Any]) -> None: + os.makedirs(os.path.dirname(self.dataset_path), exist_ok=True) + tmp_path = f"{self.dataset_path}.tmp" + with open(tmp_path, "w", encoding="utf-8") as handle: + json.dump(snapshot, handle, ensure_ascii=True, indent=2) + os.replace(tmp_path, self.dataset_path) + + def stop(self) -> None: + self._stop_event.set() + self._thread.join(timeout=self.refresh_interval) + self._flush_if_dirty() + + def get_or_create( + self, + conversation_id: Optional[str], + encoding: Optional[str], + language: Optional[str], + ) -> ConversationState: + with self._lock: + if not conversation_id: + conversation_id = str(uuid.uuid4()) + state = self._conversations.get(conversation_id) + if state is None: + state = ConversationState(conversation_id=conversation_id) + self._conversations[conversation_id] = state + if encoding: + state.encoding = encoding + if language: + state.language = language + return state + + def list_contexts(self) -> Dict[str, Dict[str, Any]]: + with self._lock: + contexts = {} + for conv_id, state in self._conversations.items(): + self._refresh_context_locked(state) + contexts[conv_id] = { + "context_b64": state.context_b64, + "encoding": state.encoding, + "language": state.language, + "context_tokens": state.context_tokens, + } + return contexts + + def get_context(self, conversation_id: str) -> Optional[Dict[str, Any]]: + with self._lock: + state = self._conversations.get(conversation_id) + if not state: + return None + self._refresh_context_locked(state) + return { + "conversation_id": state.conversation_id, + "context_b64": state.context_b64, + "encoding": state.encoding, + "language": state.language, + "context_tokens": state.context_tokens, + } + + def record_prompt( + self, + conversation_id: Optional[str], + text: str, + encoding: Optional[str], + language: Optional[str], + ) -> Dict[str, Any]: + state = self.get_or_create(conversation_id, encoding, language) + clean_text, show_savings, show_total = _strip_control_tags(text) + encoded_prompt = _b64encode_text(clean_text, state.encoding) + prompt_stats = _token_stats(clean_text, encoded_prompt) + with self._lock: + state.messages.append({"role": "user", "text": clean_text}) + state.prompt_tokens_raw += prompt_stats["raw"] + state.prompt_tokens_encoded += prompt_stats["encoded"] + state.last_prompt_stats = prompt_stats + state.pending_show_savings = show_savings or show_total + state.pending_show_total = show_total + self._refresh_context_locked(state) + self._dirty = True + response = { + "conversation_id": state.conversation_id, + "encoding": state.encoding, + "language": state.language, + "encoded_prompt_b64": encoded_prompt, + "context_b64": state.context_b64, + "context_tokens": state.context_tokens, + "prompt_tokens": prompt_stats, + "savings_request": { + "show_savings": state.pending_show_savings, + "show_total": state.pending_show_total, + }, + } + return response + + def record_response( + self, + conversation_id: str, + payload_b54: str, + ) -> Dict[str, Any]: + with self._lock: + state = self._conversations.get(conversation_id) + if not state: + raise KeyError("Unknown conversation_id") + encoding = state.encoding + language = state.language + + decoded_bytes = b54decode(payload_b54) + decoded_text = decoded_bytes.decode(encoding, errors="replace") + response_stats = _token_stats(decoded_text, payload_b54) + + with self._lock: + state.messages.append({"role": "assistant", "text": decoded_text}) + state.response_tokens_raw += response_stats["raw"] + state.response_tokens_encoded += response_stats["encoded"] + state.last_response_stats = response_stats + self._refresh_context_locked(state) + savings_payload = None + tagline = None + decoded_text_with_tagline = None + if state.pending_show_savings: + savings_payload = self._build_savings_payload(state) + tagline = self._format_tagline( + savings_payload, + include_total=state.pending_show_total, + ) + decoded_text_with_tagline = ( + decoded_text + "\n" + tagline if decoded_text else tagline + ) + state.pending_show_savings = False + state.pending_show_total = False + self._dirty = True + response = { + "conversation_id": state.conversation_id, + "encoding": encoding, + "language": language, + "response_b54": payload_b54, + "decoded_text": decoded_text, + "response_tokens": response_stats, + "context_b64": state.context_b64, + "context_tokens": state.context_tokens, + } + if savings_payload: + response["savings"] = savings_payload + if tagline: + response["tagline"] = tagline + response["decoded_text_with_tagline"] = decoded_text_with_tagline + return response + + def _refresh_context_locked(self, state: ConversationState) -> None: + context_text = _context_text(state.messages) + state.context_b64 = _b64encode_text(context_text, state.encoding) + state.context_tokens = _token_stats(context_text, state.context_b64) + + def _build_savings_payload(self, state: ConversationState) -> Dict[str, Any]: + prompt_stats = state.last_prompt_stats or {"raw": 0, "encoded": 0, "saved": 0} + response_stats = state.last_response_stats or { + "raw": 0, + "encoded": 0, + "saved": 0, + } + context_stats = state.context_tokens or {"raw": 0, "encoded": 0, "saved": 0} + combined_saved = ( + prompt_stats.get("saved", 0) + + response_stats.get("saved", 0) + + context_stats.get("saved", 0) + ) + totals = { + "prompt": { + "raw": state.prompt_tokens_raw, + "encoded": state.prompt_tokens_encoded, + "saved": max( + 0, state.prompt_tokens_raw - state.prompt_tokens_encoded + ), + }, + "response": { + "raw": state.response_tokens_raw, + "encoded": state.response_tokens_encoded, + "saved": max( + 0, state.response_tokens_raw - state.response_tokens_encoded + ), + }, + "context": context_stats, + } + totals["combined_saved"] = ( + totals["prompt"]["saved"] + + totals["response"]["saved"] + + totals["context"]["saved"] + ) + return { + "prompt": prompt_stats, + "response": response_stats, + "context": context_stats, + "combined_saved": combined_saved, + "totals": totals, + } + + def _format_tagline(self, savings: Dict[str, Any], include_total: bool) -> str: + prompt_saved = savings["prompt"]["saved"] + response_saved = savings["response"]["saved"] + context_saved = savings["context"]["saved"] + combined_saved = savings["combined_saved"] + tagline = ( + "Token savings (prompt/response/context/combined): " + f"{prompt_saved}/{response_saved}/{context_saved}/{combined_saved}." + ) + if include_total: + totals = savings.get("totals", {}) + totals_prompt = totals.get("prompt", {}).get("saved", 0) + totals_response = totals.get("response", {}).get("saved", 0) + totals_context = totals.get("context", {}).get("saved", 0) + totals_combined = totals.get("combined_saved", 0) + tagline += ( + " Total savings (prompt/response/context/combined): " + f"{totals_prompt}/{totals_response}/{totals_context}/{totals_combined}." + ) + return tagline + + +class TokenCompressionProtocolProcessor: + def __init__(self, store: ContextStore): + self.store = store + + def handle(self, payload: Dict[str, Any]) -> Dict[str, Any]: + action = payload.get("action") + if not action: + return {"ok": False, "error": "missing_action"} + + if action == "prompt": + text = payload.get("text", "") + if not isinstance(text, str) or text == "": + return {"ok": False, "error": "missing_text"} + response = self.store.record_prompt( + conversation_id=payload.get("conversation_id"), + text=text, + encoding=payload.get("encoding"), + language=payload.get("language"), + ) + return {"ok": True, "result": response} + + if action == "response": + conversation_id = payload.get("conversation_id") + if not conversation_id: + return {"ok": False, "error": "missing_conversation_id"} + payload_b54 = payload.get("payload_b54", "") + if not isinstance(payload_b54, str) or payload_b54 == "": + return {"ok": False, "error": "missing_payload_b54"} + try: + response = self.store.record_response( + conversation_id=conversation_id, + payload_b54=payload_b54, + ) + except KeyError: + return {"ok": False, "error": "unknown_conversation_id"} + except ValueError as exc: + return {"ok": False, "error": "invalid_base54", "detail": str(exc)} + return {"ok": True, "result": response} + + if action == "context_get": + conversation_id = payload.get("conversation_id") + if conversation_id: + context = self.store.get_context(conversation_id) + if not context: + return {"ok": False, "error": "unknown_conversation_id"} + return {"ok": True, "result": context} + return {"ok": True, "result": {"contexts": self.store.list_contexts()}} + + if action == "context_reset": + conversation_id = payload.get("conversation_id") + if not conversation_id: + return {"ok": False, "error": "missing_conversation_id"} + with self.store._lock: + if conversation_id in self.store._conversations: + del self.store._conversations[conversation_id] + self.store._dirty = True + return {"ok": True, "result": {"conversation_id": conversation_id}} + return {"ok": False, "error": "unknown_conversation_id"} + + if action == "ping": + return {"ok": True, "result": {"message": "pong"}} + + return {"ok": False, "error": "unknown_action"} + + +class TokenCompressionTCPServer(socketserver.ThreadingTCPServer): + allow_reuse_address = True + daemon_threads = True + + def __init__(self, server_address, RequestHandlerClass, processor): + super().__init__(server_address, RequestHandlerClass) + self.processor = processor + + +class TokenCompressionRequestHandler(socketserver.StreamRequestHandler): + def handle(self) -> None: + while True: + raw_line = self.rfile.readline() + if not raw_line: + break + raw_line = raw_line.strip() + if not raw_line: + continue + try: + request = json.loads(raw_line.decode("utf-8")) + except json.JSONDecodeError as exc: + self._send({"ok": False, "error": "invalid_json", "detail": str(exc)}) + continue + if not isinstance(request, dict): + self._send({"ok": False, "error": "invalid_payload"}) + continue + response = self.server.processor.handle(request) + self._send(response) + + def _send(self, payload: Dict[str, Any]) -> None: + encoded = json.dumps(payload, ensure_ascii=True).encode("utf-8") + b"\n" + self.wfile.write(encoded) + + +def run_tcp_server( + host: str = "127.0.0.1", + port: int = 7543, + dataset_path: Optional[str] = None, + refresh_interval: float = 5.0, +) -> None: + dataset_path = dataset_path or files.get_abs_path( + "memory", "token_compression_context.json" + ) + store = ContextStore(dataset_path=dataset_path, refresh_interval=refresh_interval) + processor = TokenCompressionProtocolProcessor(store) + server = TokenCompressionTCPServer( + (host, port), TokenCompressionRequestHandler, processor + ) + try: + server.serve_forever() + except KeyboardInterrupt: + pass + finally: + store.stop() + server.server_close() + + +if __name__ == "__main__": + run_tcp_server() From 0940c6c8a7ecbed8df2ef8895483d37f6e16786a Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 20 Jan 2026 11:18:08 +0000 Subject: [PATCH 2/3] Document token compression protocol and extension flow Co-authored-by: nicsins --- docs/README.md | 2 + docs/token_compression_protocol.md | 303 +++++++++++++++++++++++++++++ 2 files changed, 305 insertions(+) create mode 100644 docs/token_compression_protocol.md diff --git a/docs/README.md b/docs/README.md index 40ca2ff99..41e56d8a4 100644 --- a/docs/README.md +++ b/docs/README.md @@ -5,6 +5,7 @@ To begin with Agent Zero, follow the links below for detailed guides on various - **[Installation](installation.md):** Set up (or [update](installation.md#how-to-update-agent-zero)) Agent Zero on your system. - **[Usage Guide](usage.md):** Explore GUI features and usage scenarios. - **[Architecture Overview](architecture.md):** Understand the internal workings of the framework. +- **[Token Compression Protocol](token_compression_protocol.md):** Run the TCP service and integrate it with clients (including browser extensions). - **[Contributing](contribution.md):** Learn how to contribute to the Agent Zero project. - **[Troubleshooting and FAQ](troubleshooting.md):** Find answers to common issues and questions. @@ -59,6 +60,7 @@ To begin with Agent Zero, follow the links below for detailed guides on various - [Making Changes](contribution.md#making-changes) - [Submitting a Pull Request](contribution.md#submitting-a-pull-request) - [Documentation Stack](contribution.md#documentation-stack) +- [Token Compression Protocol](token_compression_protocol.md) - [Troubleshooting and FAQ](troubleshooting.md) - [Frequently Asked Questions](troubleshooting.md#frequently-asked-questions) - [Troubleshooting](troubleshooting.md#troubleshooting) \ No newline at end of file diff --git a/docs/token_compression_protocol.md b/docs/token_compression_protocol.md new file mode 100644 index 000000000..646efcf82 --- /dev/null +++ b/docs/token_compression_protocol.md @@ -0,0 +1,303 @@ +# Token Compression Protocol (TCP) + +This document defines an easy-to-implement protocol for compressing LLM prompts +and responses while preserving the original encoding and a persistent context. +It is designed to run as a local TCP service and integrate cleanly with browser +extensions via a lightweight native-host bridge. + +## Goals + +- Encode user prompts in base64. +- Accept model responses in base54. +- Decode responses back into the original encoding (utf-8, ascii, etc). +- Maintain a persistent, base64-rendered context across conversations. +- Provide token savings diagnostics via `**show savings**` and `**show total**`. +- Keep the wire format simple: newline-delimited JSON over TCP. + +## Server Overview + +The TCP server lives at: + +- Module: `python/helpers/token_compression_protocol.py` +- Default host: `127.0.0.1` +- Default port: `7543` +- Context dataset: `memory/token_compression_context.json` + +Run it locally: + +```bash +python3 /workspace/python/helpers/token_compression_protocol.py +``` + +The server accepts one JSON object per line and returns one JSON object per line. + +## Base54 Alphabet + +The response payload uses base54 with this alphabet: + +``` +123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstyz +``` + +This avoids ambiguous characters (0, O, I, l) and a few lower-case letters to +hit an even base of 54. + +## Transport Protocol (TCP) + +Each request is a single JSON line terminated by `\n` (LF). Each response is +also a single JSON line terminated by `\n`. + +### Common Envelope + +All responses include: + +```json +{ + "ok": true, + "result": { ... } +} +``` + +Errors are returned as: + +```json +{ + "ok": false, + "error": "error_code", + "detail": "optional detail" +} +``` + +### Actions + +#### `prompt` + +Encode a user prompt to base64, update context, and return the new context. + +Request: + +```json +{ + "action": "prompt", + "conversation_id": "optional", + "text": "user prompt text", + "encoding": "utf-8", + "language": "en" +} +``` + +Notes: +- If `conversation_id` is omitted, the server generates one. +- `encoding` and `language` are stored and reused for the conversation. +- If `**show savings**` or `**show total**` is present in `text`, it is stripped + before encoding and applied to the next `response`. + +Response: + +```json +{ + "ok": true, + "result": { + "conversation_id": "uuid", + "encoding": "utf-8", + "language": "en", + "encoded_prompt_b64": "SGVsbG8=", + "context_b64": "dXNlcjogSGVsbG8=", + "context_tokens": { "raw": 2, "encoded": 2, "saved": 0 }, + "prompt_tokens": { "raw": 2, "encoded": 2, "saved": 0 }, + "savings_request": { "show_savings": true, "show_total": false } + } +} +``` + +#### `response` + +Submit a base54 response payload from the model, decode it, and update context. + +Request: + +```json +{ + "action": "response", + "conversation_id": "uuid", + "payload_b54": "base54response" +} +``` + +Response (base form): + +```json +{ + "ok": true, + "result": { + "conversation_id": "uuid", + "encoding": "utf-8", + "language": "en", + "response_b54": "base54response", + "decoded_text": "model response", + "response_tokens": { "raw": 4, "encoded": 3, "saved": 1 }, + "context_b64": "dXNlcjogSGVsbG8K...==", + "context_tokens": { "raw": 6, "encoded": 5, "saved": 1 } + } +} +``` + +If `**show savings**` or `**show total**` was set in the last prompt, the +response includes a `savings` object, `tagline`, and `decoded_text_with_tagline`: + +```json +{ + "ok": true, + "result": { + "...": "...", + "tagline": "Token savings (prompt/response/context/combined): 0/1/1/2.", + "decoded_text_with_tagline": "model response\nToken savings ...", + "savings": { + "prompt": { "raw": 2, "encoded": 2, "saved": 0 }, + "response": { "raw": 4, "encoded": 3, "saved": 1 }, + "context": { "raw": 6, "encoded": 5, "saved": 1 }, + "combined_saved": 2, + "totals": { + "prompt": { "raw": 10, "encoded": 10, "saved": 0 }, + "response": { "raw": 20, "encoded": 18, "saved": 2 }, + "context": { "raw": 30, "encoded": 25, "saved": 5 }, + "combined_saved": 7 + } + } + } +} +``` + +#### `context_get` + +Fetch the current base64 context for a conversation (or all conversations). + +Request: + +```json +{ + "action": "context_get", + "conversation_id": "uuid" +} +``` + +Response: + +```json +{ + "ok": true, + "result": { + "conversation_id": "uuid", + "context_b64": "dXNlcjogSGVsbG8=", + "encoding": "utf-8", + "language": "en", + "context_tokens": { "raw": 2, "encoded": 2, "saved": 0 } + } +} +``` + +#### `context_reset` + +Delete a conversation from the dataset. + +Request: + +```json +{ + "action": "context_reset", + "conversation_id": "uuid" +} +``` + +#### `ping` + +Request: + +```json +{ "action": "ping" } +``` + +Response: + +```json +{ "ok": true, "result": { "message": "pong" } } +``` + +## Browser Extension Integration + +Browsers cannot open raw TCP sockets directly. The easiest integration pattern +is a lightweight local bridge that the extension can message. + +### Option A: Native Messaging Host (Recommended) + +Use the browser's native messaging API to launch a small helper process that +connects to the TCP server and forwards JSON lines. + +Flow: + +1. Extension sends a JSON message to the native host. +2. Native host writes the JSON line to `127.0.0.1:7543`. +3. Native host reads the JSON response line and returns it to the extension. + +Advantages: +- Works in Chrome and Firefox. +- No CORS or HTTP server needed. +- Minimal bridging logic (just pass-through JSON lines). + +Native host pseudo-code: + +```python +import json, socket, sys + +def tcp_exchange(payload): + data = json.dumps(payload).encode("utf-8") + b"\n" + with socket.create_connection(("127.0.0.1", 7543)) as sock: + sock.sendall(data) + response = sock.recv(1024 * 1024).split(b"\n", 1)[0] + return json.loads(response.decode("utf-8")) +``` + +### Option B: Local HTTP/WS Bridge + +If you prefer `fetch` or WebSocket from the extension, run a local bridge that +translates HTTP/WS into the TCP line protocol: + +- `POST /tcp` -> send JSON line over TCP, return JSON response +- `GET /context/:conversation_id` -> map to `context_get` + +This is a thin shim and keeps the TCP protocol unchanged. + +## Example End-to-End Session + +1) Encode prompt: + +```json +{"action":"prompt","text":"Summarize this. **show savings**","encoding":"utf-8","language":"en"} +``` + +2) Send `encoded_prompt_b64` to the model (outside TCP server). + +3) Encode model output to base54 (client-side), then send: + +```json +{"action":"response","conversation_id":"...","payload_b54":"..."} +``` + +4) Receive decoded text plus savings tagline. + +## Data Persistence + +Context is stored in `memory/token_compression_context.json`. The server keeps a +background thread that refreshes and persists the context every few seconds. + +## Security Notes + +- Run the TCP server on `127.0.0.1` only. +- Treat `context_b64` as sensitive; it contains full conversation history. +- Use the native messaging approach if you need strict extension isolation. + +## Troubleshooting + +- `missing_payload_b54`: Ensure you send base54 for responses, not base64. +- `invalid_base54`: Check the alphabet and strip any non-base54 characters. +- `unknown_conversation_id`: Use the `conversation_id` returned by `prompt`. From f8f256704b33171e80ef4ad68a942e26c27adcf7 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 20 Jan 2026 11:28:50 +0000 Subject: [PATCH 3/3] Add simple Tkinter chat GUI for Ollama/OpenRouter Co-authored-by: nicsins --- run_simple_gui.py | 376 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 376 insertions(+) create mode 100644 run_simple_gui.py diff --git a/run_simple_gui.py b/run_simple_gui.py new file mode 100644 index 000000000..557c1f058 --- /dev/null +++ b/run_simple_gui.py @@ -0,0 +1,376 @@ +import json +import os +import threading +import tkinter as tk +from tkinter import ttk +from tkinter.scrolledtext import ScrolledText +from urllib import error as url_error +from urllib import request as url_request + +from python.helpers import tokens + + +class SimpleChatGUI: + def __init__(self, root: tk.Tk) -> None: + self.root = root + self.root.title("Simple LLM Chat") + self.root.minsize(980, 640) + + self.messages: list[dict[str, str]] = [] + self.context_text = "" + + self.provider_var = tk.StringVar(value="Ollama") + self.model_var = tk.StringVar( + value=os.environ.get("OLLAMA_MODEL", "llama3") + ) + self.api_key_var = tk.StringVar(value=os.environ.get("OPENROUTER_API_KEY", "")) + self.status_var = tk.StringVar(value="Ready") + self.prompt_tokens_var = tk.StringVar(value="Prompt tokens: 0") + self.response_tokens_var = tk.StringVar(value="Response tokens: 0") + self.context_tokens_var = tk.StringVar(value="Context tokens: 0") + self.total_tokens_var = tk.StringVar(value="Total tokens: 0") + + self._build_layout() + self._bind_events() + + def _build_layout(self) -> None: + main_frame = ttk.Frame(self.root, padding=10) + main_frame.pack(fill="both", expand=True) + + main_frame.columnconfigure(0, weight=3) + main_frame.columnconfigure(1, weight=1) + main_frame.rowconfigure(0, weight=1) + + chat_frame = ttk.Frame(main_frame) + chat_frame.grid(row=0, column=0, sticky="nsew", padx=(0, 10)) + chat_frame.rowconfigure(0, weight=1) + chat_frame.columnconfigure(0, weight=1) + + self.chat_display = ScrolledText(chat_frame, wrap="word", state="disabled") + self.chat_display.grid(row=0, column=0, columnspan=2, sticky="nsew") + + self.input_text = ScrolledText(chat_frame, height=6, wrap="word") + self.input_text.grid(row=1, column=0, sticky="nsew", pady=(10, 0)) + + button_frame = ttk.Frame(chat_frame) + button_frame.grid(row=1, column=1, sticky="se", padx=(10, 0), pady=(10, 0)) + + self.send_button = ttk.Button(button_frame, text="Send", command=self.on_send) + self.send_button.pack(fill="x", pady=(0, 6)) + + self.clear_button = ttk.Button( + button_frame, text="New Chat", command=self.on_clear + ) + self.clear_button.pack(fill="x") + + sidebar = ttk.Frame(main_frame) + sidebar.grid(row=0, column=1, sticky="nsew") + sidebar.rowconfigure(2, weight=1) + sidebar.columnconfigure(0, weight=1) + + settings_frame = ttk.LabelFrame(sidebar, text="Settings") + settings_frame.grid(row=0, column=0, sticky="ew", pady=(0, 10)) + settings_frame.columnconfigure(1, weight=1) + + ttk.Label(settings_frame, text="Provider").grid( + row=0, column=0, sticky="w", padx=8, pady=4 + ) + provider_combo = ttk.Combobox( + settings_frame, + textvariable=self.provider_var, + values=["Ollama", "OpenRouter"], + state="readonly", + ) + provider_combo.grid(row=0, column=1, sticky="ew", padx=8, pady=4) + + ttk.Label(settings_frame, text="Model").grid( + row=1, column=0, sticky="w", padx=8, pady=4 + ) + self.model_combo = ttk.Combobox( + settings_frame, textvariable=self.model_var, values=[] + ) + self.model_combo.grid(row=1, column=1, sticky="ew", padx=8, pady=4) + + self.refresh_button = ttk.Button( + settings_frame, text="Load Ollama Models", command=self.on_refresh_models + ) + self.refresh_button.grid(row=2, column=0, columnspan=2, sticky="ew", padx=8, pady=4) + + ttk.Label(settings_frame, text="OpenRouter API Key").grid( + row=3, column=0, sticky="w", padx=8, pady=4 + ) + self.api_key_entry = ttk.Entry( + settings_frame, textvariable=self.api_key_var, show="*" + ) + self.api_key_entry.grid(row=3, column=1, sticky="ew", padx=8, pady=4) + + context_frame = ttk.LabelFrame(sidebar, text="Conversation Context") + context_frame.grid(row=1, column=0, sticky="nsew", pady=(0, 10)) + context_frame.columnconfigure(0, weight=1) + context_frame.rowconfigure(0, weight=1) + + self.context_display = ScrolledText( + context_frame, wrap="word", height=12, state="disabled" + ) + self.context_display.grid(row=0, column=0, sticky="nsew", padx=6, pady=6) + + tokens_frame = ttk.LabelFrame(sidebar, text="Token Counts") + tokens_frame.grid(row=2, column=0, sticky="ew") + + ttk.Label(tokens_frame, textvariable=self.prompt_tokens_var).pack( + anchor="w", padx=8, pady=2 + ) + ttk.Label(tokens_frame, textvariable=self.response_tokens_var).pack( + anchor="w", padx=8, pady=2 + ) + ttk.Label(tokens_frame, textvariable=self.context_tokens_var).pack( + anchor="w", padx=8, pady=2 + ) + ttk.Label(tokens_frame, textvariable=self.total_tokens_var).pack( + anchor="w", padx=8, pady=2 + ) + + status_bar = ttk.Label( + self.root, textvariable=self.status_var, anchor="w" + ) + status_bar.pack(fill="x") + + self._refresh_provider_ui() + + def _bind_events(self) -> None: + self.input_text.bind("", lambda _evt: self.on_send()) + self.provider_var.trace_add("write", lambda *_: self._refresh_provider_ui()) + + def _refresh_provider_ui(self) -> None: + provider = self.provider_var.get().strip() + if provider == "Ollama": + self.refresh_button.state(["!disabled"]) + self.api_key_entry.state(["disabled"]) + else: + self.api_key_entry.state(["!disabled"]) + if not self.model_var.get().strip(): + self.model_var.set( + os.environ.get("OPENROUTER_MODEL", "openai/gpt-4o-mini") + ) + self.refresh_button.state(["disabled"]) + + def on_refresh_models(self) -> None: + if self.provider_var.get() != "Ollama": + return + self._set_status("Loading Ollama models...") + threading.Thread(target=self._load_ollama_models, daemon=True).start() + + def _load_ollama_models(self) -> None: + try: + response = self._post_json( + "http://localhost:11434/api/tags", payload={}, method="GET" + ) + models = sorted([entry["name"] for entry in response.get("models", [])]) + except Exception as exc: + self.root.after( + 0, lambda: self._set_status(f"Failed to load Ollama models: {exc}") + ) + return + + def update_models() -> None: + self.model_combo["values"] = models + if models and self.model_var.get() not in models: + self.model_var.set(models[0]) + self._set_status("Ollama models loaded.") + + self.root.after(0, update_models) + + def on_send(self) -> None: + user_text = self.input_text.get("1.0", "end").strip() + if not user_text: + return + if not self.model_var.get().strip(): + self._set_status("Please enter a model name.") + return + + if self.provider_var.get() == "OpenRouter" and not self.api_key_var.get().strip(): + self._set_status("OpenRouter API key is required.") + return + + self._append_chat("user", user_text) + self.messages.append({"role": "user", "content": user_text}) + self._update_context_display() + self._update_prompt_tokens(user_text) + self.response_tokens_var.set("Response tokens: 0") + self._set_status("Sending...") + self._set_busy(True) + + self.input_text.delete("1.0", "end") + + messages_snapshot = list(self.messages) + provider = self.provider_var.get() + model = self.model_var.get().strip() + api_key = self.api_key_var.get().strip() + + threading.Thread( + target=self._call_model, + args=(provider, model, api_key, messages_snapshot), + daemon=True, + ).start() + + def _call_model( + self, + provider: str, + model: str, + api_key: str, + messages: list[dict[str, str]], + ) -> None: + try: + if provider == "Ollama": + response_text = self._call_ollama(model, messages) + else: + response_text = self._call_openrouter(model, api_key, messages) + except Exception as exc: + self.root.after(0, lambda: self._handle_error(exc)) + return + self.root.after(0, lambda: self._handle_response(response_text)) + + def _handle_response(self, response_text: str) -> None: + self._append_chat("assistant", response_text) + self.messages.append({"role": "assistant", "content": response_text}) + self._update_context_display() + self._update_token_counts(response_text) + self._set_busy(False) + self._set_status("Ready") + + def _handle_error(self, exc: Exception) -> None: + self._append_chat("assistant", f"[Error] {exc}") + self._set_busy(False) + self._set_status("Error while calling model.") + + def on_clear(self) -> None: + self.messages.clear() + self.context_text = "" + self.chat_display.configure(state="normal") + self.chat_display.delete("1.0", "end") + self.chat_display.configure(state="disabled") + self._append_chat("assistant", "[New conversation started]") + self._update_context_display() + self.prompt_tokens_var.set("Prompt tokens: 0") + self.response_tokens_var.set("Response tokens: 0") + self.context_tokens_var.set("Context tokens: 0") + self.total_tokens_var.set("Total tokens: 0") + + def _append_chat(self, role: str, content: str) -> None: + self.chat_display.configure(state="normal") + label = "You" if role == "user" else "Assistant" + self.chat_display.insert("end", f"{label}: {content}\n\n") + self.chat_display.configure(state="disabled") + self.chat_display.see("end") + + def _update_context_display(self) -> None: + self.context_text = "\n".join( + f"{message['role']}: {message['content']}" for message in self.messages + ) + self.context_display.configure(state="normal") + self.context_display.delete("1.0", "end") + self.context_display.insert("end", self.context_text) + self.context_display.configure(state="disabled") + self._update_context_tokens() + + def _update_token_counts(self, response_text: str) -> None: + prompt_text = self.messages[-2]["content"] if len(self.messages) >= 2 else "" + prompt_tokens = self._safe_count_tokens(prompt_text) + response_tokens = self._safe_count_tokens(response_text) + context_tokens = self._safe_count_tokens(self.context_text) + total_tokens = context_tokens + + self.prompt_tokens_var.set(f"Prompt tokens: {prompt_tokens}") + self.response_tokens_var.set(f"Response tokens: {response_tokens}") + self.context_tokens_var.set(f"Context tokens: {context_tokens}") + self.total_tokens_var.set(f"Total tokens: {total_tokens}") + + def _update_prompt_tokens(self, prompt_text: str) -> None: + prompt_tokens = self._safe_count_tokens(prompt_text) + self.prompt_tokens_var.set(f"Prompt tokens: {prompt_tokens}") + + def _update_context_tokens(self) -> None: + context_tokens = self._safe_count_tokens(self.context_text) + self.context_tokens_var.set(f"Context tokens: {context_tokens}") + self.total_tokens_var.set(f"Total tokens: {context_tokens}") + + def _set_status(self, message: str) -> None: + self.status_var.set(message) + + def _set_busy(self, busy: bool) -> None: + if busy: + self.send_button.state(["disabled"]) + else: + self.send_button.state(["!disabled"]) + + def _safe_count_tokens(self, text: str) -> int: + if not text: + return 0 + try: + return tokens.count_tokens(text) + except Exception: + return len(text.split()) + + def _call_ollama(self, model: str, messages: list[dict[str, str]]) -> str: + payload = {"model": model, "messages": messages, "stream": False} + response = self._post_json("http://localhost:11434/api/chat", payload=payload) + message = response.get("message", {}) + return str(message.get("content", "")).strip() + + def _call_openrouter( + self, model: str, api_key: str, messages: list[dict[str, str]] + ) -> str: + payload = { + "model": model, + "messages": messages, + "temperature": 0.7, + } + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + "HTTP-Referer": "http://localhost", + "X-Title": "Agent Zero Simple GUI", + } + response = self._post_json( + "https://openrouter.ai/api/v1/chat/completions", + payload=payload, + headers=headers, + ) + choices = response.get("choices", []) + if not choices: + return "" + message = choices[0].get("message", {}) + return str(message.get("content", "")).strip() + + def _post_json( + self, + url: str, + payload: dict[str, object], + headers: dict[str, str] | None = None, + method: str = "POST", + ) -> dict[str, object]: + data = None if method == "GET" else json.dumps(payload).encode("utf-8") + req_headers = {"Content-Type": "application/json"} + if headers: + req_headers.update(headers) + req = url_request.Request(url, data=data, headers=req_headers, method=method) + try: + with url_request.urlopen(req, timeout=120) as resp: + body = resp.read().decode("utf-8") + except url_error.HTTPError as exc: + detail = exc.read().decode("utf-8", errors="replace") + raise RuntimeError(f"HTTP {exc.code}: {detail}") from exc + except url_error.URLError as exc: + raise RuntimeError(f"Connection error: {exc}") from exc + return json.loads(body) if body else {} + + +def main() -> None: + root = tk.Tk() + app = SimpleChatGUI(root) + app._append_chat("assistant", "Welcome! Enter a prompt to begin.") + root.mainloop() + + +if __name__ == "__main__": + main()