mirror of
https://github.com/agent0ai/agent-zero.git
synced 2026-05-25 23:06:20 +00:00
Split the legacy core speech stack into two built-in, independently toggleable plugins: `_kokoro_tts` for TTS and `_whisper_stt` for STT. This refactor keeps dependency installation and bootstrap concerns in Docker/bootstrap/preload, while moving speech-specific tooling, APIs, prompts, UI, and runtime behavior into the plugins. Core now exposes engine-agnostic `tts-service` and `stt-service` brokers, with browser-native TTS preserved as the fallback when Kokoro is disabled. Included in this change: - add built-in `_kokoro_tts` plugin with plugin-owned synth API, config, status UI, and provider registration - add built-in `_whisper_stt` plugin with plugin-owned transcribe API, mic runtime, device UI, prompt injection, and provider registration - remove legacy core speech APIs/helpers/settings/UI and delete unused `webui/js/speech_browser.js` - replace the old hardcoded speech settings section with a generic voice surface backed by plugin extensions - update preload/docs/tests to match the new plugin-owned speech architecture Behavioral intent: - both plugins are built-in but not `always_enabled` - users can now hot-switch TTS and STT independently - browser TTS remains available when `_kokoro_tts` is off - Whisper mic UI only appears when `_whisper_stt` is enabled
31 lines
1 KiB
Python
31 lines
1 KiB
Python
import importlib.metadata
|
|
|
|
from helpers.api import ApiHandler, Request, Response
|
|
from plugins._kokoro_tts.helpers import migration, runtime
|
|
|
|
|
|
class Status(ApiHandler):
|
|
async def process(self, input: dict, request: Request) -> dict | Response:
|
|
migration.ensure_migrated()
|
|
|
|
package_version = ""
|
|
package_error = ""
|
|
try:
|
|
package_version = importlib.metadata.version("kokoro")
|
|
except Exception as e:
|
|
package_error = str(e)
|
|
|
|
return {
|
|
"plugin": "_kokoro_tts",
|
|
"enabled": runtime.is_globally_enabled(),
|
|
"config": runtime.get_config(),
|
|
"model": {
|
|
"ready": await runtime.is_downloaded(),
|
|
"loading": await runtime.is_downloading(),
|
|
},
|
|
"package": {
|
|
"version": package_version,
|
|
"error": package_error,
|
|
},
|
|
"fallback": "Browser-native speechSynthesis remains the fallback when Kokoro is disabled.",
|
|
}
|