# api/synthesize.py import re from python.helpers.api import ApiHandler from flask import Request, Response from python.helpers import runtime, settings, kokoro_tts class Synthesize(ApiHandler): async def process(self, input: dict, request: Request) -> dict | Response: text = input.get("text", "") ctxid = input.get("ctxid", "") context = self.get_context(ctxid) if await kokoro_tts.is_downloading(): context.log.log(type="info", content="Kokoro TTS model is currently being downloaded, please wait...") try: # Clean and chunk text for long responses cleaned_text = self._clean_text(text) chunks = self._chunk_text(cleaned_text) if len(chunks) == 1: # Single chunk - return as before audio = await kokoro_tts.synthesize_sentences(chunks) return {"audio": audio, "success": True} else: # Multiple chunks - return as sequence audio_parts = [] for chunk in chunks: chunk_audio = await kokoro_tts.synthesize_sentences([chunk]) audio_parts.append(chunk_audio) return {"audio_parts": audio_parts, "success": True} except Exception as e: return {"error": str(e), "success": False} def _clean_text(self, text: str) -> str: """Clean text by removing markdown, tables, code blocks, and other formatting""" # Remove code blocks text = re.sub(r'```[\s\S]*?```', '', text) text = re.sub(r'`[^`]*`', '', text) # Remove markdown links text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # Remove markdown formatting text = re.sub(r'[*_#]+', '', text) # Remove tables (basic cleanup) text = re.sub(r'\|[^\n]*\|', '', text) # Remove extra whitespace and newlines text = re.sub(r'\n+', ' ', text) text = re.sub(r'\s+', ' ', text) # Remove URLs text = re.sub(r'https?://[^\s]+', '', text) # Remove email addresses text = re.sub(r'\S+@\S+', '', text) return text.strip() def _chunk_text(self, text: str) -> list[str]: """Split text into manageable chunks for TTS""" # If text is short enough, return as single chunk if len(text) <= 300: return [text] # Split into sentences first sentences = re.split(r'(?<=[.!?])\s+', text) chunks = [] current_chunk = "" for sentence in sentences: sentence = sentence.strip() if not sentence: continue # If adding this sentence would make chunk too long, start new chunk if current_chunk and len(current_chunk + " " + sentence) > 300: chunks.append(current_chunk.strip()) current_chunk = sentence else: current_chunk += (" " if current_chunk else "") + sentence # Add the last chunk if it has content if current_chunk.strip(): chunks.append(current_chunk.strip()) return chunks if chunks else [text]