Prep 0.14.0 (#34)

* tts agent first progress * coqui support voice lists * orca-2 * tts tweaks * switch to ux for audio gen * some tweaks for the new audio queue * fix error handling if llm fails to create a good world state on initial scene load * loading creative mode for a new scene will now ask for confirmation if the current scene has unsaved progress * local tts support * fix voice list reloading when switching tts api fix agent config ux to auto save on change, remove save / close buttons * only do a delayed save on agent config on text input changes * OrionStar * dont allow scene loading when llm agents arent correctly configured * wire summarization to game loop, summarizer agent configs * fix issues with time passage * editor fix narrator messages * 0.14.0 * poetry lock * requires_llm_client moved to cls property * add additional config stubs * tts still load voices even if the agent is disabled * fix bugf that would keep losing voice selection for tts agent after backend restart * update tts install requirements * remove debug output
2025-09-01 09:59:08 +00:00 · 2023-11-24 22:08:13 +02:00 · 2023-11-24 22:08:13 +02:00 · 496eb469db
commit 496eb469db
parent b78fec3bac
36 changed files with 2204 additions and 714 deletions
--- a/README.md
+++ b/README.md
@ -162,7 +162,10 @@ Make sure you save the scene after the character is loaded as it can then be loa

 ## Further documentation

+Please read the documents in the `docs` folder for more advanced configuration and usage.
+
 - Creative mode (docs WIP)
 - Prompt template overrides
+- [Text-to-Speech (TTS)](docs/tts.md)
 - [ChromaDB (long term memory)](docs/chromadb.md)
 - Runpod Integration
--- a/config.example.yaml
+++ b/config.example.yaml
@ -14,13 +14,32 @@ game:
    gender: male
    name: Elmer

+## Long-term memory
+
 #chromadb:
 #  embeddings: instructor
 #  instructor_device: cuda
 #  instructor_model: hkunlp/instructor-xl
  
+## Remote LLMs
+
 #openai:
 #  api_key: <API_KEY>

 #runpod:
-#  api_key: <API_KEY>
+#  api_key: <API_KEY>
+
+## TTS (Text-to-Speech)
+
+#elevenlabs:
+#  api_key: <API_KEY>
+
+#coqui:
+#  api_key: <API_KEY>
+
+#tts:
+#  device: cuda
+#  model: tts_models/multilingual/multi-dataset/xtts_v2
+#  voices:
+#  - label: <name>
+#    value: <path to .wav for voice sample>
--- a/docs/tts.md
+++ b/docs/tts.md
@ -0,0 +1,84 @@
+# Talemate Text-to-Speech (TTS) Configuration
+
+Talemate supports Text-to-Speech (TTS) functionality, allowing users to convert text into spoken audio. This document outlines the steps required to configure TTS for Talemate using different providers, including ElevenLabs, Coqui, and a local TTS API.
+
+## Configuring ElevenLabs TTS
+
+To use ElevenLabs TTS with Talemate, follow these steps:
+
+1. Visit [ElevenLabs](https://elevenlabs.com) and create an account if you don't already have one.
+2. Click on your profile in the upper right corner of the Eleven Labs website to access your API key.
+3. In the `config.yaml` file, under the `elevenlabs` section, set the `api_key` field with your ElevenLabs API key.
+
+Example configuration snippet:
+
+```yaml
+elevenlabs:
+  api_key: <YOUR_ELEVENLABS_API_KEY>
+```
+
+## Configuring Coqui TTS
+
+To use Coqui TTS with Talemate, follow these steps:
+
+1. Visit [Coqui](https://app.coqui.ai) and sign up for an account.
+2. Go to the [account page](https://app.coqui.ai/account) and scroll to the bottom to find your API key.
+3. In the `config.yaml` file, under the `coqui` section, set the `api_key` field with your Coqui API key.
+
+Example configuration snippet:
+
+```yaml
+coqui:
+  api_key: <YOUR_COQUI_API_KEY>
+```
+
+## Configuring Local TTS API
+
+For running a local TTS API, Talemate requires specific dependencies to be installed.
+
+### Windows Installation
+
+Run `install-local-tts.bat` to install the necessary requirements.
+
+### Linux Installation
+
+Execute the following command:
+
+```bash
+pip install TTS
+```
+
+### Model and Device Configuration
+
+1. Choose a TTS model from the [Coqui TTS model list](https://github.com/coqui-ai/TTS).
+2. Decide whether to use `cuda` or `cpu` for the device setting.
+3. The first time you run TTS through the local API, it will download the specified model. Please note that this may take some time, and the download progress will be visible in the Talemate backend output.
+
+Example configuration snippet:
+
+```yaml
+tts:
+  device: cuda # or 'cpu'
+  model: tts_models/multilingual/multi-dataset/xtts_v2
+```
+
+### Voice Samples Configuration
+
+Configure voice samples by setting the `value` field to the path of a .wav file voice sample. Official samples can be downloaded from [Coqui XTTS-v2 samples](https://huggingface.co/coqui/XTTS-v2/tree/main/samples).
+
+Example configuration snippet:
+
+```yaml
+tts:
+  voices:
+    - label: English Male
+      value: path/to/english_male.wav
+    - label: English Female
+      value: path/to/english_female.wav
+```
+
+## Saving the Configuration
+
+After configuring the `config.yaml` file, save your changes. Talemate will use the updated settings the next time it starts.
+
+For more detailed information on configuring Talemate, refer to the `config.py` file in the Talemate source code and the `config.example.yaml` file for a barebone configuration example.
--- a/install-local-tts.bat
+++ b/install-local-tts.bat
@ -0,0 +1,4 @@
+REM activate the virtual environment
+call talemate_env\Scripts\activate
+
+call pip install "TTS>=0.21.1"
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api"

 [tool.poetry]
 name = "talemate"
-version = "0.13.2"
+version = "0.14.0"
 description = "AI-backed roleplay and narrative tools"
 authors = ["FinalWombat"]
 license = "GNU Affero General Public License v3.0"
@ -37,6 +37,7 @@ nest_asyncio = "^1.5.7"
 isodate = ">=0.6.1"
 thefuzz = ">=0.20.0"
 tiktoken = ">=0.5.1"
+nltk = ">=3.8.1"

 # ChromaDB
 chromadb = ">=0.4.17,<1"
--- a/src/talemate/init.py
+++ b/src/talemate/init.py
@ -2,4 +2,4 @@ from .agents import Agent
 from .client import TextGeneratorWebuiClient
 from .tale_mate import *

-VERSION = "0.13.2"
+VERSION = "0.14.0"
--- a/src/talemate/agents/init.py
+++ b/src/talemate/agents/init.py
@ -8,4 +8,5 @@ from .narrator import NarratorAgent
 from .registry import AGENT_CLASSES, get_agent_class, register
 from .summarize import SummarizeAgent
 from .editor import EditorAgent
-from .world_state import WorldStateAgent
+from .world_state import WorldStateAgent
+from .tts import TTSAgent
--- a/src/talemate/agents/base.py
+++ b/src/talemate/agents/base.py
@ -23,16 +23,31 @@ __all__ = [

 log = structlog.get_logger("talemate.agents.base")

+class CallableConfigValue:
+    def __init__(self, fn):
+        self.fn = fn
+    
+    def __str__(self):
+        return "CallableConfigValue"
+    
+    def __repr__(self):
+        return "CallableConfigValue"
+
 class AgentActionConfig(pydantic.BaseModel):
    type: str
    label: str
    description: str = ""
-    value: Union[int, float, str, bool]
+    value: Union[int, float, str, bool, None]
    default_value: Union[int, float, str, bool] = None
    max: Union[int, float, None] = None
    min: Union[int, float, None] = None
    step: Union[int, float, None] = None
    scope: str = "global"
+    choices: Union[list[dict[str, str]], None] = None
+        
+    class Config:
+        arbitrary_types_allowed = True
+        

 class AgentAction(pydantic.BaseModel):
    enabled: bool = True
@ -40,7 +55,6 @@ class AgentAction(pydantic.BaseModel):
    description: str = ""
    config: Union[dict[str, AgentActionConfig], None] = None
    
-
 def set_processing(fn):
    """
    decorator that emits the agent status as processing while the function
@ -70,6 +84,7 @@ class Agent(ABC):
    agent_type = "agent"
    verbose_name = None
    set_processing = set_processing
+    requires_llm_client = True

    @property
    def agent_details(self):
@ -135,6 +150,7 @@ class Agent(ABC):
            "enabled": agent.enabled if agent else True,
            "has_toggle": agent.has_toggle if agent else False,
            "experimental": agent.experimental if agent else False,
+            "requires_llm_client": cls.requires_llm_client,
        }
        actions = getattr(agent, "actions", None)
        
--- a/src/talemate/agents/conversation.py
+++ b/src/talemate/agents/conversation.py
@ -406,7 +406,7 @@ class ConversationAgent(Agent):
            
            context = await memory.multi_query(history, max_tokens=500, iterate=5)
             
-            self.current_memory_context = "\n".join(context)
+            self.current_memory_context = "\n\n".join(context)
        
        return self.current_memory_context

--- a/src/talemate/agents/editor.py
+++ b/src/talemate/agents/editor.py
@ -10,7 +10,7 @@ import talemate.emit.async_signals
 from talemate.prompts import Prompt
 from talemate.scene_message import DirectorMessage, TimePassageMessage

-from .base import Agent, set_processing, AgentAction
+from .base import Agent, set_processing, AgentAction, AgentActionConfig
 from .registry import register

 import structlog
@ -21,6 +21,7 @@ import re
 if TYPE_CHECKING:
    from talemate.tale_mate import Actor, Character, Scene
    from talemate.agents.conversation import ConversationAgentEmission
+    from talemate.agents.narrator import NarratorAgentEmission

 log = structlog.get_logger("talemate.agents.editor")

@ -40,7 +41,9 @@ class EditorAgent(Agent):
        self.is_enabled = True
        self.actions = {
            "edit_dialogue": AgentAction(enabled=False, label="Edit dialogue", description="Will attempt to improve the quality of dialogue based on the character and scene. Runs automatically after each AI dialogue."),
-            "fix_exposition": AgentAction(enabled=True, label="Fix exposition", description="Will attempt to fix exposition and emotes, making sure they are displayed in italics. Runs automatically after each AI dialogue."),
+            "fix_exposition": AgentAction(enabled=True, label="Fix exposition", description="Will attempt to fix exposition and emotes, making sure they are displayed in italics. Runs automatically after each AI dialogue.", config={
+                "narrator": AgentActionConfig(type="bool", label="Fix narrator messages", description="Will attempt to fix exposition issues in narrator messages", value=True),
+            }),
            "add_detail": AgentAction(enabled=False, label="Add detail", description="Will attempt to add extra detail and exposition to the dialogue. Runs automatically after each AI dialogue.")
        }
        
@ -59,6 +62,7 @@ class EditorAgent(Agent):
    def connect(self, scene):
        super().connect(scene)
        talemate.emit.async_signals.get("agent.conversation.generated").connect(self.on_conversation_generated)
+        talemate.emit.async_signals.get("agent.narrator.generated").connect(self.on_narrator_generated)
        
    async def on_conversation_generated(self, emission:ConversationAgentEmission):
        """
@ -93,6 +97,24 @@ class EditorAgent(Agent):
            
        emission.generation = edited
        
+    async def on_narrator_generated(self, emission:NarratorAgentEmission):
+        """
+        Called when a narrator message is generated
+        """
+        
+        if not self.enabled:
+            return
+        
+        log.info("editing narrator", emission=emission)
+        
+        edited = []
+        
+        for text in emission.generation:
+            edit = await self.fix_exposition_on_narrator(text)
+            edited.append(edit)
+            
+        emission.generation = edited
+        
        
    @set_processing
    async def edit_conversation(self, content:str, character:Character):
@ -127,12 +149,13 @@ class EditorAgent(Agent):
        if not self.actions["fix_exposition"].enabled:
            return content
        
-        #response = await Prompt.request("editor.fix-exposition", self.client, "edit_fix_exposition", vars={
-        #    "content": content,
-        #    "character": character,
-        #    "scene": self.scene,
-        #    "max_length": self.client.max_token_length
-        #})
+        if not character.is_player:
+            if '"' not in content and '*' not in content:
+                content = util.strip_partial_sentences(content)
+                character_prefix = f"{character.name}: "
+                message = content.split(character_prefix)[1]
+                content = f"{character_prefix}*{message.strip('*')}*"
+                return content
        
        content = util.clean_dialogue(content, main_name=character.name)        
        content = util.strip_partial_sentences(content)
@ -140,6 +163,24 @@ class EditorAgent(Agent):
        
        return content
    
+    @set_processing
+    async def fix_exposition_on_narrator(self, content:str):
+        
+        if not self.actions["fix_exposition"].enabled:
+            return content
+        
+        if not self.actions["fix_exposition"].config["narrator"].value:
+            return content
+        
+        content = util.strip_partial_sentences(content)
+        
+        if '"' not in content:
+            content = f"*{content.strip('*')}*"
+        else:
+            content = util.ensure_dialog_format(content)
+               
+        return content
+    
    @set_processing
    async def add_detail(self, content:str, character:Character):
        """
--- a/src/talemate/agents/memory.py
+++ b/src/talemate/agents/memory.py
@ -206,6 +206,7 @@ from .registry import register
@register(condition=lambda: chromadb is not None)
 class ChromaDBMemoryAgent(MemoryAgent):

+    requires_llm_client = False

    @property
    def ready(self):
@ -222,7 +223,7 @@ class ChromaDBMemoryAgent(MemoryAgent):
    @property
    def agent_details(self):
        return f"ChromaDB: {self.embeddings}"
-
+    
    @property
    def embeddings(self):
        """
@ -409,7 +410,7 @@ class ChromaDBMemoryAgent(MemoryAgent):
            id = uid or f"__narrator__-{self.memory_tracker['__narrator__']}"
            ids = [id]

-        log.debug("chromadb agent add", text=text, meta=meta, id=id)
+        #log.debug("chromadb agent add", text=text, meta=meta, id=id)

        self.db.upsert(documents=[text], metadatas=metadatas, ids=ids)
        
@ -479,9 +480,10 @@ class ChromaDBMemoryAgent(MemoryAgent):
            if distance < 1:
                
                try:
+                    log.debug("chromadb agent get", ts=ts, scene_ts=self.scene.ts)
                    date_prefix = util.iso8601_diff_to_human(ts, self.scene.ts)
-                except Exception:
-                    log.error("chromadb agent", error="failed to get date prefix", ts=ts, scene_ts=self.scene.ts)
+                except Exception as e:
+                    log.error("chromadb agent", error="failed to get date prefix", details=e, ts=ts, scene_ts=self.scene.ts)
                    date_prefix = None
                    
                if date_prefix:
--- a/src/talemate/agents/narrator.py
+++ b/src/talemate/agents/narrator.py
@ -1,13 +1,14 @@
 from __future__ import annotations

 from typing import TYPE_CHECKING, Callable, List, Optional, Union
+import dataclasses
 import structlog
 import random
 import talemate.util as util
 from talemate.emit import emit
 import talemate.emit.async_signals
 from talemate.prompts import Prompt
-from talemate.agents.base import set_processing, Agent, AgentAction, AgentActionConfig
+from talemate.agents.base import set_processing as _set_processing, Agent, AgentAction, AgentActionConfig, AgentEmission
 from talemate.agents.world_state import TimePassageEmission
 from talemate.scene_message import NarratorMessage
 from talemate.events import GameLoopActorIterEvent
@ -20,6 +21,33 @@ if TYPE_CHECKING:

 log = structlog.get_logger("talemate.agents.narrator")

+@dataclasses.dataclass
+class NarratorAgentEmission(AgentEmission):
+    generation: list[str] = dataclasses.field(default_factory=list)
+    
+talemate.emit.async_signals.register(
+    "agent.narrator.generated"
+)
+
+def set_processing(fn):
+    
+    """
+    Custom decorator that emits the agent status as processing while the function
+    is running and then emits the result of the function as a NarratorAgentEmission
+    """
+    
+    @_set_processing
+    async def wrapper(self, *args, **kwargs):
+        response = await fn(self, *args, **kwargs)
+        emission = NarratorAgentEmission(
+            agent=self,
+            generation=[response],
+        )
+        await talemate.emit.async_signals.get("agent.narrator.generated").send(emission)
+        return emission.generation[0]
+    wrapper.__name__ = fn.__name__
+    return wrapper
+
@register()
 class NarratorAgent(Agent):
    
--- a/src/talemate/agents/summarize.py
+++ b/src/talemate/agents/summarize.py
@ -5,11 +5,13 @@ import traceback
 from typing import TYPE_CHECKING, Callable, List, Optional, Union

 import talemate.data_objects as data_objects
+import talemate.emit.async_signals
 import talemate.util as util
 from talemate.prompts import Prompt
 from talemate.scene_message import DirectorMessage, TimePassageMessage
+from talemate.events import GameLoopEvent

-from .base import Agent, set_processing
+from .base import Agent, set_processing, AgentAction, AgentActionConfig
 from .registry import register

 import structlog
@ -34,14 +36,40 @@ class SummarizeAgent(Agent):

    def __init__(self, client, **kwargs):
        self.client = client
-
-    def on_history_add(self, event):
-        asyncio.ensure_future(self.build_archive(event.scene))
-
+        
+        self.actions = {
+            "archive": AgentAction(
+                enabled=True,
+                label="Summarize to long-term memory archive",
+                description="Automatically summarize scene dialogue when the number of tokens in the history exceeds a threshold. This helps keep the context history from growing too large.",
+                config={
+                    "threshold": AgentActionConfig(
+                        type="number",
+                        label="Token Threshold",
+                        description="Will summarize when the number of tokens in the history exceeds this threshold",
+                        min=512,
+                        max=8192,
+                        step=256,
+                        value=1536,
+                    )
+                }
+            )
+        }
+        
+    
    def connect(self, scene):
        super().connect(scene)
-        scene.signals["history_add"].connect(self.on_history_add)
+        talemate.emit.async_signals.get("game_loop").connect(self.on_game_loop)
+        
+        
+    async def on_game_loop(self, emission:GameLoopEvent):
+        """
+        Called when a conversation is generated
+        """

+        await self.build_archive(self.scene)
+        
+        
    def clean_result(self, result):
        if "#" in result:
            result = result.split("#")[0]
@ -53,21 +81,31 @@ class SummarizeAgent(Agent):
        return result

    @set_processing
-    async def build_archive(self, scene, token_threshold:int=1500):
+    async def build_archive(self, scene):
        end = None
-
+        
+        if not self.actions["archive"].enabled:
+            return
+        
        if not scene.archived_history:
            start = 0
            recent_entry = None
        else:
            recent_entry = scene.archived_history[-1]
-            start = recent_entry.get("end", 0) + 1
+            if "end" not in recent_entry:
+                # permanent historical archive entry, not tied to any specific history entry
+                # meaning we are still at the beginning of the scene
+                start = 0
+            else:
+                start = recent_entry.get("end", 0)+1

        tokens = 0
        dialogue_entries = []
        ts = "PT0S"
        time_passage_termination = False
        
+        token_threshold = self.actions["archive"].config["threshold"].value
+        
        log.debug("build_archive", start=start, recent_entry=recent_entry)
        
        if recent_entry:
@ -75,6 +113,9 @@ class SummarizeAgent(Agent):
        
        for i in range(start, len(scene.history)):
            dialogue = scene.history[i]
+            
+            #log.debug("build_archive", idx=i, content=str(dialogue)[:64]+"...")
+            
            if isinstance(dialogue, DirectorMessage):
                if i == start:
                    start += 1
@ -131,7 +172,7 @@ class SummarizeAgent(Agent):
                        break
                    adjusted_dialogue.append(line)
                dialogue_entries = adjusted_dialogue
-                end = start + len(dialogue_entries)
+                end = start + len(dialogue_entries)-1
            
        if dialogue_entries:
            summarized = await self.summarize(
--- a/src/talemate/agents/tts.py
+++ b/src/talemate/agents/tts.py
@ -0,0 +1,595 @@
+from __future__ import annotations
+
+from typing import Union
+import asyncio
+import httpx
+import io
+import os
+import pydantic
+import nltk
+import tempfile
+import base64
+import uuid
+import functools
+from nltk.tokenize import sent_tokenize
+
+import talemate.config as config
+import talemate.emit.async_signals
+from talemate.emit import emit
+from talemate.events import GameLoopNewMessageEvent
+from talemate.scene_message import CharacterMessage, NarratorMessage
+
+from .base import Agent, set_processing, AgentAction, AgentActionConfig
+from .registry import register
+
+import structlog
+
+import time
+
+try:
+    from TTS.api import TTS
+except ImportError:
+    TTS = None
+
+log = structlog.get_logger("talemate.agents.tts")#
+
+if not TTS:
+    # TTS installation is massive and requires a lot of dependencies
+    # so we don't want to require it unless the user wants to use it
+    log.info("TTS (local) requires the TTS package, please install with `pip install TTS` if you want to use the local api")
+
+nltk.download("punkt")
+
+def parse_chunks(text):
+    
+    text = text.replace("...", "__ellipsis__")
+    
+    chunks = sent_tokenize(text)
+    cleaned_chunks = []
+    
+    for chunk in chunks:
+        chunk = chunk.replace("*","")
+        if not chunk:
+            continue
+        cleaned_chunks.append(chunk)
+    
+    
+    for i, chunk in enumerate(cleaned_chunks):
+        chunk = chunk.replace("__ellipsis__", "...")
+        cleaned_chunks[i] = chunk
+    
+    return cleaned_chunks
+
+def rejoin_chunks(chunks:list[str], chunk_size:int=250):
+    
+    """
+    Will combine chunks split by punctuation into a single chunk until
+    max chunk size is reached
+    """
+    
+    joined_chunks = []
+    
+    current_chunk = ""
+    
+    for chunk in chunks:
+            
+        if len(current_chunk) + len(chunk) > chunk_size:
+            joined_chunks.append(current_chunk)
+            current_chunk = ""
+        
+        current_chunk += chunk
+        
+    if current_chunk:
+        joined_chunks.append(current_chunk)
+        
+    return joined_chunks
+
+
+class Voice(pydantic.BaseModel):
+    value:str
+    label:str
+
+class VoiceLibrary(pydantic.BaseModel):
+    
+    api: str
+    voices: list[Voice] = pydantic.Field(default_factory=list)
+    last_synced: float = None
+
+
+@register()
+class TTSAgent(Agent):
+    
+    """
+    Text to speech agent
+    """
+    
+    agent_type = "tts"
+    verbose_name = "Text to speech"
+    requires_llm_client = False
+    
+    @classmethod
+    def config_options(cls, agent=None):
+        config_options = super().config_options(agent=agent)
+        
+        if agent:
+            config_options["actions"]["_config"]["config"]["voice_id"]["choices"] = [
+                voice.model_dump() for voice in agent.list_voices_sync()
+            ]
+        
+        return config_options
+
+    def __init__(self, **kwargs):
+        
+        self.is_enabled = False
+        
+        self.voices = {
+            "elevenlabs": VoiceLibrary(api="elevenlabs"),
+            "coqui": VoiceLibrary(api="coqui"),
+            "tts": VoiceLibrary(api="tts"),
+        }
+        self.config = config.load_config()
+        self.playback_done_event = asyncio.Event()
+        self.actions = {
+            "_config": AgentAction(
+                enabled=True, 
+                label="Configure", 
+                description="TTS agent configuration",
+                config={
+                    "api": AgentActionConfig(
+                        type="text",
+                        choices=[
+                            # TODO at local TTS support
+                            {"value": "tts", "label": "TTS (Local)"},
+                            {"value": "elevenlabs", "label": "Eleven Labs"},
+                            {"value": "coqui", "label": "Coqui Studio"},
+                        ],
+                        value="tts",
+                        label="API",
+                        description="Which TTS API to use",
+                        onchange="emit",
+                    ),
+                    "voice_id": AgentActionConfig(
+                        type="text",
+                        value="default",
+                        label="Narrator Voice",
+                        description="Voice ID/Name to use for TTS",
+                        choices=[]
+                    ),
+                    "generate_for_player": AgentActionConfig(
+                        type="bool",
+                        value=False,
+                        label="Generate for player",
+                        description="Generate audio for player messages",
+                    ),
+                    "generate_for_npc": AgentActionConfig(
+                        type="bool",
+                        value=True,
+                        label="Generate for NPCs",
+                        description="Generate audio for NPC messages",
+                    ),
+                    "generate_for_narration": AgentActionConfig(
+                        type="bool",
+                        value=True,
+                        label="Generate for narration",
+                        description="Generate audio for narration messages",
+                    ),
+                    "generate_chunks": AgentActionConfig(
+                        type="bool",
+                        value=True,
+                        label="Split generation",
+                        description="Generate audio chunks for each sentence - will be much more responsive but may loose context to inform inflection",
+                    )
+                }  
+            ),
+        }
+        
+        self.actions["_config"].model_dump()
+
+
+    @property
+    def enabled(self):
+        return self.is_enabled
+    
+    @property
+    def has_toggle(self):
+        return True
+        
+    @property
+    def experimental(self):
+        return False
+    
+    @property
+    def not_ready_reason(self) -> str:
+        """
+        Returns a string explaining why the agent is not ready
+        """
+        
+        if self.ready:
+            return ""
+        
+        if self.api == "tts":
+            if not TTS:
+                return "TTS not installed"
+        
+        elif self.requires_token and not self.token:
+            return "No API token"
+        
+        elif not self.default_voice_id:
+            return "No voice selected"
+            
+    @property
+    def agent_details(self):
+        suffix = ""
+        
+        if not self.ready:
+            suffix = f"  - {self.not_ready_reason}"
+        else:
+            suffix = f"  - {self.voice_id_to_label(self.default_voice_id)}"
+        
+        api = self.api
+        choices = self.actions["_config"].config["api"].choices
+        api_label = api
+        for choice in choices:
+            if choice["value"] == api:
+                api_label = choice["label"]
+                break
+        
+        return f"{api_label}{suffix}"
+
+    @property
+    def api(self):
+        return self.actions["_config"].config["api"].value
+    
+    @property
+    def token(self):
+        api = self.api
+        return self.config.get(api,{}).get("api_key")
+    
+    @property
+    def default_voice_id(self):
+        return self.actions["_config"].config["voice_id"].value
+    
+    @property
+    def requires_token(self):
+        return self.api != "tts"
+    
+    @property
+    def ready(self):
+        
+        if self.api == "tts":
+            if not TTS:
+                return False
+            return True
+        
+        return (not self.requires_token or self.token) and self.default_voice_id
+
+    @property
+    def status(self):
+        if not self.enabled:
+            return "disabled"
+        if self.ready:
+            return "active" if not getattr(self, "processing", False) else "busy"
+        if self.requires_token and not self.token:
+            return "error"
+        if self.api == "tts":
+            if not TTS:
+                return "error"
+
+    @property
+    def max_generation_length(self):
+        if self.api == "elevenlabs":
+            return 1024
+        elif self.api == "coqui":
+            return 250
+            
+        return 250
+
+    def apply_config(self, *args, **kwargs):
+        
+        try:
+            api = kwargs["actions"]["_config"]["config"]["api"]["value"]
+        except KeyError:
+            api = self.api
+        
+        api_changed = api != self.api
+        
+        log.debug("apply_config", api=api, api_changed=api != self.api, current_api=self.api)
+
+        super().apply_config(*args, **kwargs)
+        
+        
+        if api_changed:
+            try:
+                self.actions["_config"].config["voice_id"].value = self.voices[api].voices[0].value
+            except IndexError:
+                self.actions["_config"].config["voice_id"].value = ""
+
+        
+    def connect(self, scene):
+        super().connect(scene)
+        talemate.emit.async_signals.get("game_loop_new_message").connect(self.on_game_loop_new_message)
+        
+    async def on_game_loop_new_message(self, emission:GameLoopNewMessageEvent):
+        """
+        Called when a conversation is generated
+        """
+        
+        if not self.enabled:
+            return
+        
+        if not isinstance(emission.message, (CharacterMessage, NarratorMessage)):
+            return
+    
+        if isinstance(emission.message, NarratorMessage) and not self.actions["_config"].config["generate_for_narration"].value:
+            return
+        
+        if isinstance(emission.message, CharacterMessage):
+            
+            if emission.message.source == "player" and not self.actions["_config"].config["generate_for_player"].value:
+                return
+            elif emission.message.source == "ai" and not self.actions["_config"].config["generate_for_npc"].value:
+                return
+        
+        if isinstance(emission.message, CharacterMessage):
+            character_prefix = emission.message.split(":", 1)[0]
+        else:
+            character_prefix = ""
+        
+        log.info("reactive tts", message=emission.message, character_prefix=character_prefix)
+        
+        await self.generate(str(emission.message).replace(character_prefix+": ", ""))
+
+
+    def voice(self, voice_id:str) -> Union[Voice, None]:
+        for voice in self.voices[self.api].voices:
+            if voice.value == voice_id:
+                return voice
+        return None
+    
+    def voice_id_to_label(self, voice_id:str):
+        for voice in self.voices[self.api].voices:
+            if voice.value == voice_id:
+                return voice.label
+        return None
+    
+    def list_voices_sync(self):
+        loop = asyncio.get_event_loop()
+        return loop.run_until_complete(self.list_voices())
+    
+    async def list_voices(self):
+        if self.requires_token and not self.token:
+            return []
+        
+        library = self.voices[self.api]
+        
+        log.info("Listing voices", api=self.api, last_synced=library.last_synced)
+        
+        # TODO: allow re-syncing voices
+        if library.last_synced:
+            return library.voices
+        
+        list_fn = getattr(self, f"_list_voices_{self.api}")
+        log.info("Listing voices", api=self.api)
+        library.voices = await list_fn()
+        library.last_synced = time.time()
+        
+        # if the current voice cannot be found, reset it
+        if not self.voice(self.default_voice_id):
+            self.actions["_config"].config["voice_id"].value = ""
+        
+        # set loading to false
+        return library.voices
+
+    @set_processing
+    async def generate(self, text: str):
+        if not self.enabled or not self.ready or not text:
+            return
+
+
+        self.playback_done_event.set()
+        
+        generate_fn = getattr(self, f"_generate_{self.api}")
+        
+        if self.actions["_config"].config["generate_chunks"].value:
+            chunks = parse_chunks(text)
+            chunks = rejoin_chunks(chunks)
+        else:
+            chunks = parse_chunks(text)
+            chunks = rejoin_chunks(chunks, chunk_size=self.max_generation_length)
+
+        # Start generating audio chunks in the background
+        generation_task = asyncio.create_task(self.generate_chunks(generate_fn, chunks))
+
+        # Wait for both tasks to complete
+        await asyncio.gather(generation_task)
+
+    async def generate_chunks(self, generate_fn, chunks):
+        for chunk in chunks:
+            chunk = chunk.replace("*","").strip()
+            log.info("Generating audio", api=self.api, chunk=chunk)
+            audio_data = await generate_fn(chunk)
+            self.play_audio(audio_data)
+
+    def play_audio(self, audio_data):
+        # play audio through the python audio player
+        #play(audio_data)
+        
+        emit("audio_queue", data={"audio_data": base64.b64encode(audio_data).decode("utf-8")})
+        
+        self.playback_done_event.set()  # Signal that playback is finished
+
+    # LOCAL
+    
+    async def _generate_tts(self, text: str) -> Union[bytes, None]:
+        
+        if not TTS:
+            return
+        
+        tts_config = self.config.get("tts",{})
+        model = tts_config.get("model")
+        device = tts_config.get("device", "cpu")
+        
+        log.debug("tts local", model=model, device=device)
+        
+        if not hasattr(self, "tts_instance"):
+            self.tts_instance = TTS(model).to(device)
+        
+        tts = self.tts_instance
+        
+        loop = asyncio.get_event_loop()
+        
+        voice = self.voice(self.default_voice_id)
+        
+        
+        with tempfile.TemporaryDirectory() as temp_dir:
+            file_path = os.path.join(temp_dir, f"tts-{uuid.uuid4()}.wav")
+            
+            await loop.run_in_executor(None, functools.partial(tts.tts_to_file, text=text, speaker_wav=voice.value, language="en", file_path=file_path))
+            #tts.tts_to_file(text=text, speaker_wav=voice.value, language="en", file_path=file_path)
+            
+            
+            with open(file_path, "rb") as f:
+                return f.read()
+        
+            
+    async def _list_voices_tts(self) -> dict[str, str]:
+        return [Voice(**voice) for voice in self.config.get("tts",{}).get("voices",[])]
+        
+    # ELEVENLABS
+
+    async def _generate_elevenlabs(self, text: str, chunk_size: int = 1024) -> Union[bytes, None]:
+        api_key = self.token
+        if not api_key:
+            return
+
+        async with httpx.AsyncClient() as client:
+            url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.default_voice_id}"
+            headers = {
+                "Accept": "audio/mpeg",
+                "Content-Type": "application/json",
+                "xi-api-key": api_key,
+            }
+            data = {
+                "text": text,
+                "model_id": "eleven_monolingual_v1",
+                "voice_settings": {
+                    "stability": 0.5,
+                    "similarity_boost": 0.5
+                }
+            }
+
+            response = await client.post(url, json=data, headers=headers, timeout=300)
+
+            if response.status_code == 200:
+                bytes_io = io.BytesIO()
+                for chunk in response.iter_bytes(chunk_size=chunk_size):
+                    if chunk:
+                        bytes_io.write(chunk)
+
+                # Put the audio data in the queue for playback
+                return bytes_io.getvalue()
+            else:
+                log.error(f"Error generating audio: {response.text}")
+
+    async def _list_voices_elevenlabs(self) -> dict[str, str]:
+        
+        url_voices = "https://api.elevenlabs.io/v1/voices"
+        
+        voices = []
+        
+        async with httpx.AsyncClient() as client:
+            headers = {
+                "Accept": "application/json",
+                "xi-api-key": self.token,
+            }
+            response = await client.get(url_voices, headers=headers, params={"per_page":1000})
+            speakers = response.json()["voices"]
+            voices.extend([Voice(value=speaker["voice_id"], label=speaker["name"]) for speaker in speakers])
+            
+        # sort by name
+        voices.sort(key=lambda x: x.label)
+            
+        return voices    
+            
+    # COQUI STUDIO
+                
+    async def _generate_coqui(self, text: str) -> Union[bytes, None]:
+        api_key = self.token
+        if not api_key:
+            return
+
+        async with httpx.AsyncClient() as client:
+            url = "https://app.coqui.ai/api/v2/samples/xtts/render/"
+            headers = {
+                "Accept": "application/json",
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {api_key}"
+            }
+            data = {
+                "voice_id": self.default_voice_id,
+                "text": text,
+                "language": "en"  # Assuming English language for simplicity; this could be parameterized
+            }
+
+            # Make the POST request to Coqui API
+            response = await client.post(url, json=data, headers=headers, timeout=300)
+            if response.status_code in [200, 201]:
+                # Parse the JSON response to get the audio URL
+                response_data = response.json()
+                audio_url = response_data.get('audio_url')
+                if audio_url:
+                    # Make a GET request to download the audio file
+                    audio_response = await client.get(audio_url)
+                    if audio_response.status_code == 200:
+                        # delete the sample from Coqui Studio
+                        # await self._cleanup_coqui(response_data.get('id'))
+                        return audio_response.content
+                    else:
+                        log.error(f"Error downloading audio: {audio_response.text}")
+                else:
+                    log.error("No audio URL in response")
+            else:
+                log.error(f"Error generating audio: {response.text}")
+                
+    async def _cleanup_coqui(self, sample_id: str):
+        api_key = self.token
+        if not api_key or not sample_id:
+            return
+
+        async with httpx.AsyncClient() as client:
+            url = f"https://app.coqui.ai/api/v2/samples/xtts/{sample_id}"
+            headers = {
+                "Authorization": f"Bearer {api_key}"
+            }
+
+            # Make the DELETE request to Coqui API
+            response = await client.delete(url, headers=headers)
+
+            if response.status_code == 204:
+                log.info(f"Successfully deleted sample with ID: {sample_id}")
+            else:
+                log.error(f"Error deleting sample with ID: {sample_id}: {response.text}")
+
+    async def _list_voices_coqui(self) -> dict[str, str]:
+        
+        url_speakers = "https://app.coqui.ai/api/v2/speakers"
+        url_custom_voices = "https://app.coqui.ai/api/v2/voices"
+        
+        voices = []
+        
+        async with httpx.AsyncClient() as client:
+            headers = {
+                "Authorization": f"Bearer {self.token}"
+            }
+            response = await client.get(url_speakers, headers=headers, params={"per_page":1000})
+            speakers = response.json()["result"]
+            voices.extend([Voice(value=speaker["id"], label=speaker["name"]) for speaker in speakers])
+            
+            response = await client.get(url_custom_voices, headers=headers, params={"per_page":1000})
+            custom_voices = response.json()["result"]
+            voices.extend([Voice(value=voice["id"], label=voice["name"]) for voice in custom_voices])
+            
+        # sort by name
+        voices.sort(key=lambda x: x.label)
+            
+        return voices
--- a/src/talemate/commands/init.py
+++ b/src/talemate/commands/init.py
@ -23,6 +23,7 @@ from .cmd_save_as import CmdSaveAs
 from .cmd_save_characters import CmdSaveCharacters
 from .cmd_setenv import CmdSetEnvironmentToScene, CmdSetEnvironmentToCreative
 from .cmd_time_util import *
+from .cmd_tts import *
 from .cmd_world_state import CmdWorldState
 from .cmd_run_helios_test import CmdHeliosTest
 from .manager import Manager
--- a/src/talemate/commands/cmd_rebuild_archive.py
+++ b/src/talemate/commands/cmd_rebuild_archive.py
@ -32,4 +32,5 @@ class CmdRebuildArchive(TalemateCommand):
            if not more:
                break

+        self.scene.sync_time()
        await self.scene.commit_to_memory()
--- a/src/talemate/commands/cmd_tts.py
+++ b/src/talemate/commands/cmd_tts.py
@ -0,0 +1,33 @@
+import asyncio
+import logging
+
+from talemate.commands.base import TalemateCommand
+from talemate.commands.manager import register
+from talemate.prompts.base import set_default_sectioning_handler
+from talemate.instance import get_agent
+
+__all__ = [
+    "CmdTestTTS",
+]
+
+@register
+class CmdTestTTS(TalemateCommand):
+    """
+    Command class for the 'test_tts' command
+    """
+
+    name = "test_tts"
+    description = "Test the TTS agent"
+    aliases = []
+
+    async def run(self):
+        tts_agent = get_agent("tts")
+        
+        try:
+            last_message = str(self.scene.history[-1])
+        except IndexError:
+            last_message = "Welcome to talemate!"
+        
+        
+        await tts_agent.generate(last_message)
+        
--- a/src/talemate/config.py
+++ b/src/talemate/config.py
@ -65,6 +65,21 @@ class OpenAIConfig(BaseModel):
    
 class RunPodConfig(BaseModel):
    api_key: Union[str,None]=None
+    
+class ElevenLabsConfig(BaseModel):
+    api_key: Union[str,None]=None
+    
+class CoquiConfig(BaseModel):
+    api_key: Union[str,None]=None
+    
+class TTSVoiceSamples(BaseModel):
+    label:str
+    value:str
+
+class TTSConfig(BaseModel):
+    device:str = "cuda"
+    model:str = "tts_models/multilingual/multi-dataset/xtts_v2"
+    voices: list[TTSVoiceSamples] = pydantic.Field(default_factory=list)

 class ChromaDB(BaseModel):
    instructor_device: str="cpu"
@ -85,6 +100,12 @@ class Config(BaseModel):
    
    chromadb: ChromaDB = ChromaDB()
    
+    elevenlabs: ElevenLabsConfig = ElevenLabsConfig()
+    
+    coqui: CoquiConfig = CoquiConfig()
+    
+    tts: TTSConfig = TTSConfig()
+    
    class Config:
        extra = "ignore"

--- a/src/talemate/emit/signals.py
+++ b/src/talemate/emit/signals.py
@ -24,6 +24,8 @@ CommandStatus = signal("command_status")
 WorldState = signal("world_state")
 ArchivedHistory = signal("archived_history")

+AudioQueue = signal("audio_queue")
+
 MessageEdited = signal("message_edited")

 handlers = {
@ -46,4 +48,5 @@ handlers = {
    "archived_history": ArchivedHistory,
    "message_edited": MessageEdited,
    "prompt_sent": PromptSent,
+    "audio_queue": AudioQueue,
 }
--- a/src/talemate/events.py
+++ b/src/talemate/events.py
@ -4,7 +4,7 @@ from dataclasses import dataclass
 from typing import TYPE_CHECKING

 if TYPE_CHECKING:
-    from talemate.tale_mate import Scene, Actor
+    from talemate.tale_mate import Scene, Actor, SceneMessage

 __all__ = [
    "Event",
@ -46,4 +46,8 @@ class GameLoopStartEvent(GameLoopEvent):

@dataclass
 class GameLoopActorIterEvent(GameLoopEvent):
-    actor: Actor
+    actor: Actor
+    
+@dataclass
+class GameLoopNewMessageEvent(GameLoopEvent):
+    message: SceneMessage
--- a/src/talemate/load.py
+++ b/src/talemate/load.py
@ -190,8 +190,11 @@ async def load_scene_from_data(
        await scene.add_actor(actor)
    
    if scene.environment != "creative":
-        await scene.world_state.request_update(initial_only=True)  
-
+        try:
+            await scene.world_state.request_update(initial_only=True)  
+        except Exception as e:
+            log.error("world_state.request_update", error=e)
+            
    # the scene has been saved before (since we just loaded it), so we set the saved flag to True
    # as long as the scene has a memory_id.
    scene.saved = "memory_id" in scene_data
--- a/src/talemate/prompts/base.py
+++ b/src/talemate/prompts/base.py
@ -473,8 +473,6 @@ class Prompt:
        
        # remove all duplicate whitespace
        cleaned = re.sub(r"\s+", " ", cleaned)
-        print("set_json_response", cleaned)
-        
        return self.set_prepared_response(cleaned)
        

--- a/src/talemate/server/api.py
+++ b/src/talemate/server/api.py
@ -110,7 +110,6 @@ async def websocket_endpoint(websocket, path):
            elif action_type == "request_scenes_list":
                query = data.get("query", "")
                handler.request_scenes_list(query)
-
            elif action_type == "configure_clients":
                handler.configure_clients(data.get("clients"))
            elif action_type == "configure_agents":
--- a/src/talemate/server/tts.py
+++ b/src/talemate/server/tts.py
@ -0,0 +1,26 @@
+import structlog
+
+import talemate.instance as instance
+
+log = structlog.get_logger("talemate.server.tts")
+
+class TTSPlugin:
+    router = "tts"
+    
+    def __init__(self, websocket_handler):
+        self.websocket_handler = websocket_handler
+        self.tts = None
+        
+    async def handle(self, data:dict):
+        
+        action = data.get("action")
+        
+        
+        if action == "test":
+            return await self.handle_test(data)
+        
+    async def handle_test(self, data:dict):
+        
+        tts_agent = instance.get_agent("tts")
+        
+        await tts_agent.generate("Welcome to talemate!")
--- a/src/talemate/server/websocket_server.py
+++ b/src/talemate/server/websocket_server.py
@ -91,7 +91,7 @@ class WebsocketHandler(Receiver):
        for agent_typ, agent_config in self.agents.items():
            try:
                client = self.llm_clients.get(agent_config.get("client"))["client"]
-            except TypeError:
+            except TypeError as e:
                client = None
                
            if not client:
@ -222,16 +222,25 @@ class WebsocketHandler(Receiver):
    def configure_agents(self, agents):
        self.agents = {typ: {} for typ in instance.agent_types()}
        
-        log.debug("Configuring agents", agents=agents)
+        log.debug("Configuring agents")

        for agent in agents:
            name = agent["name"]

            # special case for memory agent
-            if name == "memory":
+            if name == "memory" or name == "tts":
                self.agents[name] = {
                    "name": name,
                }
+                agent_instance = instance.get_agent(name, **self.agents[name])
+                if agent_instance.has_toggle:
+                    self.agents[name]["enabled"] = agent["enabled"]
+
+                if getattr(agent_instance, "actions", None):
+                    self.agents[name]["actions"] = agent.get("actions", {})
+                    
+                agent_instance.apply_config(**self.agents[name])
+                log.debug("Configured agent", name=name)
                continue

            if name not in self.agents:
@ -428,6 +437,14 @@ class WebsocketHandler(Receiver):
            }
        )

+    def handle_audio_queue(self, emission: Emission):
+        self.queue_put(
+            {
+                "type": "audio_queue",
+                "data": emission.data,
+            }
+        )
+
    def handle_request_input(self, emission: Emission):
        self.waiting_for_input = True

--- a/src/talemate/tale_mate.py
+++ b/src/talemate/tale_mate.py
@ -46,7 +46,7 @@ log = structlog.get_logger("talemate")
 async_signals.register("game_loop_start")
 async_signals.register("game_loop")
 async_signals.register("game_loop_actor_iter")
-
+async_signals.register("game_loop_new_message")

 class Character:
    """
@ -578,6 +578,7 @@ class Scene(Emitter):
            "game_loop": async_signals.get("game_loop"),
            "game_loop_start": async_signals.get("game_loop_start"),
            "game_loop_actor_iter": async_signals.get("game_loop_actor_iter"),
+            "game_loop_new_message": async_signals.get("game_loop_new_message"),
        }

        self.setup_emitter(scene=self)
@ -704,6 +705,12 @@ class Scene(Emitter):
                messages=messages,
            )
        )
+        
+        loop = asyncio.get_event_loop()
+        for message in messages:
+            loop.run_until_complete(self.signals["game_loop_new_message"].send(
+                events.GameLoopNewMessageEvent(scene=self, event_type="game_loop_new_message", message=message)
+            ))

    def push_archive(self, entry: data_objects.ArchiveEntry):
        
@ -1177,7 +1184,7 @@ class Scene(Emitter):
            },
        )
    
-        self.log.debug("scene_status", scene=self.name, scene_time=self.ts, saved=self.saved)
+        self.log.debug("scene_status", scene=self.name, scene_time=self.ts, human_ts=util.iso8601_duration_to_human(self.ts, suffix=""), saved=self.saved)

    def set_environment(self, environment: str):
        """
@ -1190,6 +1197,7 @@ class Scene(Emitter):
        """
        Accepts an iso6801 duration string and advances the scene's world state by that amount
        """
+        log.debug("advance_time", ts=ts, scene_ts=self.ts, duration=isodate.parse_duration(ts), scene_duration=isodate.parse_duration(self.ts))
        
        self.ts = isodate.duration_isoformat(
            isodate.parse_duration(self.ts) + isodate.parse_duration(ts)
@ -1212,9 +1220,12 @@ class Scene(Emitter):
                if self.archived_history[i].get("ts"):
                    self.ts = self.archived_history[i]["ts"]
                    break
+            
+            end = self.archived_history[-1].get("end", 0)
+        else:
+            end = 0
        
-        
-        for message in self.history:
+        for message in self.history[end:]:
            if isinstance(message, TimePassageMessage):
                self.advance_time(message.ts)
                
--- a/src/talemate/util.py
+++ b/src/talemate/util.py
@ -490,30 +490,43 @@ def clean_attribute(attribute: str) -> str:



+
 def duration_to_timedelta(duration):
-    """Convert an isodate.Duration object to a datetime.timedelta object."""
+    """Convert an isodate.Duration object or a datetime.timedelta object to a datetime.timedelta object."""
+    # Check if the duration is already a timedelta object
+    if isinstance(duration, datetime.timedelta):
+        return duration
+
+    # Check if the duration is an isodate.Duration object with a tdelta attribute
+    if hasattr(duration, 'tdelta'):
+        return duration.tdelta
+
+    # If it's an isodate.Duration object with separate year, month, day, hour, minute, second attributes
    days = int(duration.years) * 365 + int(duration.months) * 30 + int(duration.days)
-    return datetime.timedelta(days=days)
+    seconds = int(duration.hours) * 3600 + int(duration.minutes) * 60 + int(duration.seconds)
+    return datetime.timedelta(days=days, seconds=seconds)

 def timedelta_to_duration(delta):
    """Convert a datetime.timedelta object to an isodate.Duration object."""
+    # Extract days and convert to years, months, and days
    days = delta.days
    years = days // 365
    days %= 365
    months = days // 30
    days %= 30
-    return isodate.duration.Duration(years=years, months=months, days=days)
+    # Convert remaining seconds to hours, minutes, and seconds
+    seconds = delta.seconds
+    hours = seconds // 3600
+    seconds %= 3600
+    minutes = seconds // 60
+    seconds %= 60
+    return isodate.Duration(years=years, months=months, days=days, hours=hours, minutes=minutes, seconds=seconds)

 def parse_duration_to_isodate_duration(duration_str):
    """Parse ISO 8601 duration string and ensure the result is an isodate.Duration."""
    parsed_duration = isodate.parse_duration(duration_str)
    if isinstance(parsed_duration, datetime.timedelta):
-        days = parsed_duration.days
-        years = days // 365
-        days %= 365
-        months = days // 30
-        days %= 30
-        return isodate.duration.Duration(years=years, months=months, days=days)
+        return timedelta_to_duration(parsed_duration)
    return parsed_duration

 def iso8601_diff(duration_str1, duration_str2):
@ -533,40 +546,50 @@ def iso8601_diff(duration_str1, duration_str2):

    return difference

-def iso8601_duration_to_human(iso_duration, suffix:str=" ago"):
-    # Parse the ISO8601 duration string into an isodate duration object
+def iso8601_duration_to_human(iso_duration, suffix: str = " ago"):
    
-    if isinstance(iso_duration, isodate.Duration):
-        duration = iso_duration
-    else:
+    # Parse the ISO8601 duration string into an isodate duration object
+    if not isinstance(iso_duration, isodate.Duration):
        duration = isodate.parse_duration(iso_duration)
+    else:
+        duration = iso_duration
+
+    # Extract years, months, days, and the time part as seconds
+    years, months, days, hours, minutes, seconds = 0, 0, 0, 0, 0, 0

    if isinstance(duration, isodate.Duration):
        years = duration.years
        months = duration.months
        days = duration.days
-        seconds = duration.tdelta.total_seconds()
-    else:
-        years, months = 0, 0
+        hours = duration.tdelta.seconds // 3600
+        minutes = (duration.tdelta.seconds % 3600) // 60
+        seconds = duration.tdelta.seconds % 60
+    elif isinstance(duration, datetime.timedelta):
        days = duration.days
-        seconds = duration.total_seconds() - days * 86400  # Extract time-only part
+        hours = duration.seconds // 3600
+        minutes = (duration.seconds % 3600) // 60
+        seconds = duration.seconds % 60

-    hours, seconds = divmod(seconds, 3600)
-    minutes, seconds = divmod(seconds, 60)
-    
+    # Adjust for cases where duration is a timedelta object
+    # Convert days to weeks and days if applicable
+    weeks, days = divmod(days, 7)
+
+    # Build the human-readable components
    components = []
    if years:
        components.append(f"{years} Year{'s' if years > 1 else ''}")
    if months:
        components.append(f"{months} Month{'s' if months > 1 else ''}")
+    if weeks:
+        components.append(f"{weeks} Week{'s' if weeks > 1 else ''}")
    if days:
        components.append(f"{days} Day{'s' if days > 1 else ''}")
    if hours:
-        components.append(f"{int(hours)} Hour{'s' if hours > 1 else ''}")
+        components.append(f"{hours} Hour{'s' if hours > 1 else ''}")
    if minutes:
-        components.append(f"{int(minutes)} Minute{'s' if minutes > 1 else ''}")
+        components.append(f"{minutes} Minute{'s' if minutes > 1 else ''}")
    if seconds:
-        components.append(f"{int(seconds)} Second{'s' if seconds > 1 else ''}")
+        components.append(f"{seconds} Second{'s' if seconds > 1 else ''}")

    # Construct the human-readable string
    if len(components) > 1:
@ -576,7 +599,7 @@ def iso8601_duration_to_human(iso_duration, suffix:str=" ago"):
        human_str = components[0]
    else:
        human_str = "Moments"
-    
+
    return f"{human_str}{suffix}"

 def iso8601_diff_to_human(start, end):
@ -584,6 +607,7 @@ def iso8601_diff_to_human(start, end):
        return ""
    
    diff = iso8601_diff(start, end)
+    
    return iso8601_duration_to_human(diff)


--- a/src/talemate/world_state.py
+++ b/src/talemate/world_state.py
@ -1,6 +1,7 @@
 from pydantic import BaseModel
 from talemate.emit import emit
 import structlog
+import traceback
 from typing import Union

 import talemate.instance as instance
@ -59,7 +60,8 @@ class WorldState(BaseModel):
            world_state = await self.agent.request_world_state()
        except Exception as e:
            self.emit()
-            raise e
+            log.error("world_state.request_update", error=e, traceback=traceback.format_exc())
+            return
        
        previous_characters = self.characters
        previous_items = self.items
--- a/talemate_frontend/src/components/AIAgent.vue
+++ b/talemate_frontend/src/components/AIAgent.vue
@ -7,11 +7,12 @@
                        size="14"></v-progress-circular>
                    <v-icon v-else-if="agent.status === 'uninitialized'" color="orange" size="14">mdi-checkbox-blank-circle</v-icon>
                    <v-icon v-else-if="agent.status === 'disabled'" color="grey-darken-2" size="14">mdi-checkbox-blank-circle</v-icon>
+                    <v-icon v-else-if="agent.status === 'error'" color="red" size="14">mdi-checkbox-blank-circle</v-icon>
                    <v-icon v-else color="green" size="14">mdi-checkbox-blank-circle</v-icon>
                    <span class="ml-1" v-if="agent.label"> {{ agent.label }}</span>
                    <span class="ml-1" v-else> {{ agent.name }}</span>
                </v-list-item-title>
-                <v-list-item-subtitle>
+                <v-list-item-subtitle class="text-caption">
                    {{ agent.client }}
                </v-list-item-subtitle>
                <v-chip class="mr-1" v-if="agent.status === 'disabled'" size="x-small">Disabled</v-chip>
@ -65,7 +66,10 @@ export default {
            for(let i = 0; i < this.state.agents.length; i++) {
                let agent = this.state.agents[i];

-                if(agent.status  === 'warning' || agent.status === 'error') {
+                if(!agent.data.requires_llm_client)
+                    continue
+
+                if(agent.status === 'warning' || agent.status === 'error' || agent.status === 'uninitialized') {
                    console.log("agents: configuration required (1)", agent.status)
                    return true;
                }
@ -99,7 +103,6 @@ export default {
            } else {
                this.state.agents[index] = agent;
            }
-            this.state.dialog = false;
            this.$emit('agents-updated', this.state.agents);
        },
        editAgent(index) {
--- a/talemate_frontend/src/components/AgentModal.vue
+++ b/talemate_frontend/src/components/AgentModal.vue
@ -10,7 +10,7 @@
          </v-col>
          <v-col cols="3" class="text-right">
            <v-checkbox :label="enabledLabel()" hide-details density="compact" color="green" v-model="agent.enabled"
-              v-if="agent.data.has_toggle"></v-checkbox>
+              v-if="agent.data.has_toggle" @update:modelValue="save(false)"></v-checkbox>
          </v-col>
        </v-row>

@ -18,7 +18,7 @@

      </v-card-title>
      <v-card-text class="scrollable-content">
-        <v-select v-model="agent.client" :items="agent.data.client" label="Client"></v-select>
+        <v-select v-if="agent.data.requires_llm_client" v-model="agent.client" :items="agent.data.client" label="Client"  @update:modelValue="save(false)"></v-select>

        <v-alert type="warning" variant="tonal" density="compact" v-if="agent.data.experimental">
          This agent is currently experimental and may significantly decrease performance and / or require
@ -27,27 +27,25 @@

        <v-card v-for="(action, key) in agent.actions" :key="key" density="compact">
          <v-card-subtitle>
-            <v-checkbox :label="agent.data.actions[key].label" hide-details density="compact" color="green" v-model="action.enabled"></v-checkbox>
+            <v-checkbox v-if="!actionAlwaysEnabled(key)" :label="agent.data.actions[key].label" hide-details density="compact" color="green" v-model="action.enabled" @update:modelValue="save(false)"></v-checkbox>
          </v-card-subtitle>
          <v-card-text>
-              {{ agent.data.actions[key].description }}
+              <div v-if="!actionAlwaysEnabled(key)">
+                {{ agent.data.actions[key].description }}
+              </div>
              <div v-for="(action_config, config_key) in agent.data.actions[key].config" :key="config_key">
                <div v-if="action.enabled">
                <!-- render config widgets based on action_config.type (int, str, bool, float) -->
-                <v-text-field v-if="action_config.type === 'text'" v-model="action.config[config_key].value" :label="action_config.label" :hint="action_config.description" density="compact"></v-text-field>
-                <v-slider v-if="action_config.type === 'number' && action_config.step !== null" v-model="action.config[config_key].value" :label="action_config.label" :hint="action_config.description" :min="action_config.min" :max="action_config.max" :step="action_config.step" density="compact" thumb-label></v-slider>
-                <v-checkbox v-if="action_config.type === 'bool'" v-model="action.config[config_key].value" :label="action_config.label" :hint="action_config.description" density="compact"></v-checkbox>
+                <v-text-field v-if="action_config.type === 'text' && action_config.choices === null" v-model="action.config[config_key].value" :label="action_config.label" :hint="action_config.description" density="compact" @update:modelValue="save(true)"></v-text-field>
+                <v-autocomplete v-else-if="action_config.type === 'text' && action_config.choices !== null" v-model="action.config[config_key].value" :items="action_config.choices" :label="action_config.label" :hint="action_config.description" density="compact" item-title="label" item-value="value" @update:modelValue="save(false)"></v-autocomplete>
+                <v-slider v-if="action_config.type === 'number' && action_config.step !== null" v-model="action.config[config_key].value" :label="action_config.label" :hint="action_config.description" :min="action_config.min" :max="action_config.max" :step="action_config.step" density="compact" thumb-label @update:modelValue="save(true)"></v-slider>
+                <v-checkbox v-if="action_config.type === 'bool'" v-model="action.config[config_key].value" :label="action_config.label" :hint="action_config.description" density="compact" @update:modelValue="save(false)"></v-checkbox>
                </div>
              </div>
          </v-card-text>
        </v-card>

      </v-card-text>
-      <v-card-actions>
-        <v-spacer></v-spacer>
-        <v-btn color="primary" @click="close">Close</v-btn>
-        <v-btn color="primary" @click="save">Save</v-btn>
-      </v-card-actions>
    </v-card>
  </v-dialog>
 </template>
@ -58,9 +56,10 @@ export default {
    dialog: Boolean,
    formTitle: String
  },
-  inject: ['state'],
+  inject: ['state', 'getWebsocket'],
  data() {
    return {
+      saveTimeout: null,
      localDialog: this.state.dialog,
      agent: { ...this.state.currentAgent }
    };
@ -90,12 +89,32 @@ export default {
        return 'Disabled';
      }
    },
+    actionAlwaysEnabled(action) {
+      if (action.charAt(0) === '_') {
+        return true;
+      } else {
+        return false;
+      }
+    },
+
    close() {
      this.$emit('update:dialog', false);
    },
-    save() {
-      this.$emit('save', this.agent);
-      this.close();
+    save(delayed = false) {
+      console.log("save", delayed);
+      if(!delayed) {
+        this.$emit('save', this.agent);
+        return;
+      }
+
+      if(this.saveTimeout !== null)
+        clearTimeout(this.saveTimeout);
+
+      this.saveTimeout = setTimeout(() => {
+        this.$emit('save', this.agent);
+      }, 500);
+
+      //this.$emit('save', this.agent);
    }
  }
 }
--- a/talemate_frontend/src/components/AudioQueue.vue
+++ b/talemate_frontend/src/components/AudioQueue.vue
@ -0,0 +1,96 @@
+<template>
+  <div class="audio-queue">
+    <span>{{ queue.length }} sound(s) queued</span>
+    <v-icon :color="isPlaying ? 'green' : ''" v-if="!isMuted" @click="toggleMute">mdi-volume-high</v-icon>
+    <v-icon :color="isPlaying ? 'red' : ''" v-else @click="toggleMute">mdi-volume-off</v-icon>
+    <v-icon class="ml-1" @click="stopAndClear">mdi-stop-circle-outline</v-icon>
+  </div>
+</template>
+
+<script>
+export default {
+  name: 'AudioQueue',
+  data() {
+    return {
+      queue: [],
+      audioContext: null,
+      isPlaying: false,
+      isMuted: false,
+      currentSource: null
+    };
+  },
+  inject: ['getWebsocket', 'registerMessageHandler'],
+  created() {
+    this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
+    this.registerMessageHandler(this.handleMessage);
+  },
+  methods: {
+    handleMessage(data) {
+      if (data.type === 'audio_queue') {
+        console.log('Received audio queue message', data)
+        this.addToQueue(data.data.audio_data);
+      }
+    },
+    addToQueue(base64Sound) {
+      const soundBuffer = this.base64ToArrayBuffer(base64Sound);
+      this.queue.push(soundBuffer);
+      this.playNextSound();
+    },
+    base64ToArrayBuffer(base64) {
+      const binaryString = window.atob(base64);
+      const len = binaryString.length;
+      const bytes = new Uint8Array(len);
+      for (let i = 0; i < len; i++) {
+        bytes[i] = binaryString.charCodeAt(i);
+      }
+      return bytes.buffer;
+    },
+    playNextSound() {
+      if (this.isPlaying || this.queue.length === 0) {
+        return;
+      }
+      this.isPlaying = true;
+      const soundBuffer = this.queue.shift();
+      this.audioContext.decodeAudioData(soundBuffer, (buffer) => {
+        const source = this.audioContext.createBufferSource();
+        source.buffer = buffer;
+        this.currentSource = source;
+        if (!this.isMuted) {
+          source.connect(this.audioContext.destination);
+        }
+        source.onended = () => {
+          this.isPlaying = false;
+          this.playNextSound();
+        };
+        source.start(0);
+      }, (error) => {
+        console.error('Error with decoding audio data', error);
+      });
+    },
+    toggleMute() {
+      this.isMuted = !this.isMuted;
+      if (this.isMuted && this.currentSource) {
+        this.currentSource.disconnect(this.audioContext.destination);
+      } else if (this.currentSource) {
+        this.currentSource.connect(this.audioContext.destination);
+      }
+    },
+    stopAndClear() {
+      if (this.currentSource) {
+        this.currentSource.stop();
+        this.currentSource.disconnect();
+        this.currentSource = null;
+      }
+      this.queue = [];
+      this.isPlaying = false;
+    }
+  }
+};
+</script>
+
+<style scoped>
+.audio-queue {
+  display: flex;
+  align-items: center;
+}
+</style>
--- a/talemate_frontend/src/components/LoadScene.vue
+++ b/talemate_frontend/src/components/LoadScene.vue
@ -80,6 +80,12 @@ export default {
            this.getWebsocket().send(JSON.stringify({ type: 'request_scenes_list', query: this.sceneSearchInput }));
        },
        loadCreative() {
+            if(this.sceneSaved === false) {
+                if(!confirm("The current scene is not saved. Are you sure you want to load a new scene?")) {
+                    return;
+                }
+            }
+
            this.loading = true;
            this.getWebsocket().send(JSON.stringify({ type: 'load_scene', file_path: "environment:creative" }));
        },
--- a/talemate_frontend/src/components/TalemateApp.vue
+++ b/talemate_frontend/src/components/TalemateApp.vue
@ -75,6 +75,8 @@
      <span v-if="connecting" class="ml-1"><v-icon class="mr-1">mdi-progress-helper</v-icon>connecting</span>
      <span v-else-if="connected" class="ml-1"><v-icon class="mr-1" color="green" size="14">mdi-checkbox-blank-circle</v-icon>connected</span>
      <span v-else class="ml-1"><v-icon class="mr-1">mdi-progress-close</v-icon>disconnected</span>
+      <v-divider class="ml-1 mr-1" vertical></v-divider>
+      <AudioQueue ref="audioQueue" />
      <v-spacer></v-spacer>
      <span v-if="version !== null">v{{ version }}</span>
      <span v-if="configurationRequired()">
@ -161,6 +163,7 @@ import SceneHistory from './SceneHistory.vue';
 import CreativeEditor from './CreativeEditor.vue';
 import AppConfig from './AppConfig.vue';
 import DebugTools from './DebugTools.vue';
+import AudioQueue from './AudioQueue.vue';

 export default {
  components: {
@ -177,6 +180,7 @@ export default {
    CreativeEditor,
    AppConfig,
    DebugTools,
+    AudioQueue,
  },
  name: 'TalemateApp',
  data() {
--- a/templates/llm-prompt/OrionStar.jinja2
+++ b/templates/llm-prompt/OrionStar.jinja2
@ -0,0 +1 @@
+Human: {{ system_message }} {{ set_response(prompt, "\n\nAssistant:") }}
--- a/templates/llm-prompt/orca-2.jinja2
+++ b/templates/llm-prompt/orca-2.jinja2
@ -0,0 +1,4 @@
+<|im_start|>system
+{{ system_message }}<|im_end|>
+<|im_start|>user
+{{ set_response(prompt, "<|im_end|>\n<|im_start|>assistant\n") }}
				`@ -0,0 +1 @@`
				`Human: {{ system_message }} {{ set_response(prompt, "\n\nAssistant:") }}`