feat(tts): add Inworld speech provider (#55972)

Adds the bundled Inworld speech provider with docs, config surface, SSRF-guarded fetches, directive overrides, native voice-note/telephony output coverage, and live `.profile` verification. Co-authored-by: cshape <cshape@users.noreply.github.com>
2026-04-28 06:31:11 +00:00 · 2026-04-25 14:33:21 -07:00 · 2026-04-25 14:33:21 -07:00 · 0bcb4c95c1
commit 0bcb4c95c1
parent 167588cb4f
23 changed files with 1295 additions and 16 deletions
--- a/.env.example
+++ b/.env.example
@ -82,4 +82,5 @@ OPENCLAW_GATEWAY_TOKEN=

 # ELEVENLABS_API_KEY=...
 # XI_API_KEY=...  # alias for ElevenLabs
+# INWORLD_API_KEY=...
 # DEEPGRAM_API_KEY=...
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@ -307,6 +307,11 @@
  - changed-files:
      - any-glob-to-any-file:
          - "extensions/huggingface/**"
+"extensions: inworld":
+  - changed-files:
+      - any-glob-to-any-file:
+          - "extensions/inworld/**"
+          - "docs/providers/inworld.md"
 "extensions: kilocode":
  - changed-files:
      - any-glob-to-any-file:
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -54,6 +54,7 @@ Docs: https://docs.openclaw.ai
 - Providers/Xiaomi: add MiMo TTS as a bundled speech provider with MP3/WAV output and voice-note Opus transcoding. Fixes #52376. (#55614) Thanks @zoujiejun.
 - Providers/ElevenLabs: include `eleven_v3` in the bundled TTS model catalog so model selection surfaces can offer ElevenLabs v3. (#68321) Thanks @itsuzef.
 - Providers/Local CLI TTS: add a bundled local command speech provider with file/stdout input, voice-note Opus conversion, and telephony PCM output. (#56239) Thanks @solar2ain.
+- Providers/Inworld: add Inworld as a bundled speech provider with streaming TTS synthesis, voice listing, voice-note output, and PCM telephony output. (#55972) Thanks @cshape.
 - Android/Talk Mode: expose Talk Mode in the Voice tab with runtime-owned voice capture modes and microphone foreground-service escalation. Thanks @alex-latitude.
 - Providers/LiteLLM: register `litellm` as an image-generation provider so `image_generate model=litellm/...` calls and `agents.defaults.imageGenerationModel.fallbacks` entries resolve through the LiteLLM proxy. Thanks @zqchris.
 - Codex harness: require Codex app-server `0.125.0` or newer and cover native MCP `PreToolUse`, `PostToolUse`, and `PermissionRequest` payloads through the OpenClaw hook relay.
--- a/docs/.generated/config-baseline.sha256
+++ b/docs/.generated/config-baseline.sha256
@ -1,4 +1,4 @@
-9ac3d271f9bfa9611557f0b52e4d0a600693bdd1de75cc1bafc320fc4d4f0075  config-baseline.json
+0b0d796bceddfb9e2929518ba84af626da7f5d75c392a217041f36e850c4e74f  config-baseline.json
 271fdf1d6652927e0fc160a6f25276bf6dccb8f1b27fab15e0fc2620e8cacab4  config-baseline.core.json
 7cd9c908f066c143eab2a201efbc9640f483ab28bba92ddeca1d18cc2b528bc3  config-baseline.channel.json
-7825b56a5b3fcdbe2e09ef8fe5d9f12ac3598435afebe20413051e45b0d1968e  config-baseline.plugin.json
+17eb3f8887193579ff32e35f9bd520ba2bd6049e52ab18855c5d41fcbf195d83  config-baseline.plugin.json
--- a/docs/docs.json
+++ b/docs/docs.json
@ -1317,6 +1317,7 @@
                  "providers/groq",
                  "providers/huggingface",
                  "providers/inferrs",
+                  "providers/inworld",
                  "providers/kilocode",
                  "providers/litellm",
                  "providers/lmstudio",
--- a/docs/providers/inworld.md
+++ b/docs/providers/inworld.md
@ -0,0 +1,115 @@
+---
+summary: "Inworld streaming text-to-speech for OpenClaw replies"
+read_when:
+  - You want Inworld speech synthesis for outbound replies
+  - You need PCM telephony or OGG_OPUS voice-note output from Inworld
+title: "Inworld"
+---
+
+Inworld is a streaming text-to-speech (TTS) provider. In OpenClaw it
+synthesizes outbound reply audio (MP3 by default, OGG_OPUS for voice notes)
+and PCM audio for telephony channels such as Voice Call.
+
+OpenClaw posts to Inworld's streaming TTS endpoint, concatenates the
+returned base64 audio chunks into a single buffer, and hands the result to
+the standard reply-audio pipeline.
+
+| Detail        | Value                                                       |
+| ------------- | ----------------------------------------------------------- |
+| Website       | [inworld.ai](https://inworld.ai)                            |
+| Docs          | [docs.inworld.ai/tts/tts](https://docs.inworld.ai/tts/tts)  |
+| Auth          | `INWORLD_API_KEY` (HTTP Basic, Base64 dashboard credential) |
+| Default voice | `Sarah`                                                     |
+| Default model | `inworld-tts-1.5-max`                                       |
+
+## Getting started
+
+<Steps>
+  <Step title="Set your API key">
+    Copy the credential from your Inworld dashboard (Workspace > API Keys)
+    and set it as an env var. The value is sent verbatim as the HTTP Basic
+    credential, so do not Base64-encode it again or convert it to a bearer
+    token.
+
+    ```
+    INWORLD_API_KEY=<base64-credential-from-dashboard>
+    ```
+
+  </Step>
+  <Step title="Select Inworld in messages.tts">
+    ```json5
+    {
+      messages: {
+        tts: {
+          auto: "always",
+          provider: "inworld",
+          providers: {
+            inworld: {
+              voiceId: "Sarah",
+              modelId: "inworld-tts-1.5-max",
+            },
+          },
+        },
+      },
+    }
+    ```
+  </Step>
+  <Step title="Send a message">
+    Send a reply through any connected channel. OpenClaw synthesizes the
+    audio with Inworld and delivers it as MP3 (or OGG_OPUS when the channel
+    expects a voice note).
+  </Step>
+</Steps>
+
+## Configuration options
+
+| Option        | Path                                         | Description                                                       |
+| ------------- | -------------------------------------------- | ----------------------------------------------------------------- |
+| `apiKey`      | `messages.tts.providers.inworld.apiKey`      | Base64 dashboard credential. Falls back to `INWORLD_API_KEY`.     |
+| `baseUrl`     | `messages.tts.providers.inworld.baseUrl`     | Override Inworld API base URL (default `https://api.inworld.ai`). |
+| `voiceId`     | `messages.tts.providers.inworld.voiceId`     | Voice identifier (default `Sarah`).                               |
+| `modelId`     | `messages.tts.providers.inworld.modelId`     | TTS model id (default `inworld-tts-1.5-max`).                     |
+| `temperature` | `messages.tts.providers.inworld.temperature` | Sampling temperature `0..2` (optional).                           |
+
+## Notes
+
+<AccordionGroup>
+  <Accordion title="Authentication">
+    Inworld uses HTTP Basic auth with a single Base64-encoded credential
+    string. Copy it verbatim from the Inworld dashboard. The provider sends
+    it as `Authorization: Basic <apiKey>` without any further encoding, so
+    do not Base64-encode it yourself and do not pass a bearer-style token.
+    See [TTS auth notes](/tools/tts#inworld-primary) for the same callout.
+  </Accordion>
+  <Accordion title="Models">
+    Supported model ids: `inworld-tts-1.5-max` (default),
+    `inworld-tts-1.5-mini`, `inworld-tts-1-max`, `inworld-tts-1`.
+  </Accordion>
+  <Accordion title="Audio outputs">
+    Replies use MP3 by default. When the channel target is `voice-note`
+    OpenClaw asks Inworld for `OGG_OPUS` so the audio plays as a native
+    voice bubble. Telephony synthesis uses raw `PCM` at 22050 Hz to feed
+    the telephony bridge.
+  </Accordion>
+  <Accordion title="Custom endpoints">
+    Override the API host with `messages.tts.providers.inworld.baseUrl`.
+    Trailing slashes are stripped before requests are sent.
+  </Accordion>
+</AccordionGroup>
+
+## Related
+
+<CardGroup cols={2}>
+  <Card title="Text-to-speech" href="/tools/tts" icon="waveform-lines">
+    TTS overview, providers, and `messages.tts` config.
+  </Card>
+  <Card title="Configuration" href="/gateway/configuration" icon="gear">
+    Full config reference including `messages.tts` settings.
+  </Card>
+  <Card title="Providers" href="/providers" icon="grid">
+    All bundled OpenClaw providers.
+  </Card>
+  <Card title="Troubleshooting" href="/help/troubleshooting" icon="wrench">
+    Common issues and debugging steps.
+  </Card>
+</CardGroup>
--- a/docs/tools/tts.md
+++ b/docs/tools/tts.md
@ -7,7 +7,7 @@ read_when:
 title: "Text-to-speech"
 ---

-OpenClaw can convert outbound replies into audio using ElevenLabs, Google Gemini, Gradium, Local CLI, Microsoft, MiniMax, OpenAI, Vydra, xAI, or Xiaomi MiMo.
+OpenClaw can convert outbound replies into audio using ElevenLabs, Google Gemini, Gradium, Inworld, Local CLI, Microsoft, MiniMax, OpenAI, Vydra, xAI, or Xiaomi MiMo.
 It works anywhere OpenClaw can send audio.

 ## Supported services
@ -15,6 +15,7 @@ It works anywhere OpenClaw can send audio.
 - **ElevenLabs** (primary or fallback provider)
 - **Google Gemini** (primary or fallback provider; uses Gemini API TTS)
 - **Gradium** (primary or fallback provider; supports voice-note and telephony output)
+- **Inworld** (primary or fallback provider; uses the Inworld streaming TTS API)
 - **Local CLI** (primary or fallback provider; runs a configured local TTS command)
 - **Microsoft** (primary or fallback provider; current bundled implementation uses `node-edge-tts`)
 - **MiniMax** (primary or fallback provider; uses the T2A v2 API)
@ -38,11 +39,12 @@ or ElevenLabs.

 ## Optional keys

-If you want OpenAI, ElevenLabs, Google Gemini, Gradium, MiniMax, Vydra, xAI, or Xiaomi MiMo:
+If you want ElevenLabs, Google Gemini, Gradium, Inworld, MiniMax, OpenAI, Vydra, xAI, or Xiaomi MiMo:

 - `ELEVENLABS_API_KEY` (or `XI_API_KEY`)
 - `GEMINI_API_KEY` (or `GOOGLE_API_KEY`)
 - `GRADIUM_API_KEY`
+- `INWORLD_API_KEY`
 - `MINIMAX_API_KEY`; MiniMax TTS also accepts Token Plan auth via
  `MINIMAX_OAUTH_TOKEN`, `MINIMAX_CODE_PLAN_KEY`, or
  `MINIMAX_CODING_API_KEY`
@ -64,6 +66,7 @@ so that provider must also be authenticated if you enable summaries.
 - [ElevenLabs Text to Speech](https://elevenlabs.io/docs/api-reference/text-to-speech)
 - [ElevenLabs Authentication](https://elevenlabs.io/docs/api-reference/authentication)
 - [Gradium](/providers/gradium)
+- [Inworld TTS API](https://docs.inworld.ai/tts/tts)
 - [MiniMax T2A v2 API](https://platform.minimaxi.com/document/T2A%20V2)
 - [Xiaomi MiMo speech synthesis](/providers/xiaomi#text-to-speech)
 - [node-edge-tts](https://github.com/SchneeHertz/node-edge-tts)
@ -217,6 +220,35 @@ by the bundled Google image-generation provider. Resolution order is
 `messages.tts.providers.google.apiKey` -> `models.providers.google.apiKey` ->
 `GEMINI_API_KEY` -> `GOOGLE_API_KEY`.

+### Inworld primary
+
+```json5
+{
+  messages: {
+    tts: {
+      auto: "always",
+      provider: "inworld",
+      providers: {
+        inworld: {
+          apiKey: "inworld_api_key",
+          baseUrl: "https://api.inworld.ai",
+          voiceId: "Sarah",
+          modelId: "inworld-tts-1.5-max",
+          temperature: 0.8,
+        },
+      },
+    },
+  },
+}
+```
+
+The `apiKey` value must be the Base64-encoded credential string copied
+verbatim from the Inworld dashboard (Workspace > API Keys). The provider
+sends it as `Authorization: Basic <apiKey>` without any additional
+encoding, so do not pass a raw bearer token and do not Base64-encode it
+yourself. The key falls back to the `INWORLD_API_KEY` env var. See
+[Inworld provider](/providers/inworld) for full setup.
+
 ### xAI primary

 ```json5
@ -415,7 +447,7 @@ Then run:
  - `tagged` only sends audio when the reply includes `[[tts:key=value]]` directives or a `[[tts:text]]...[[/tts:text]]` block.
 - `enabled`: legacy toggle (doctor migrates this to `auto`).
 - `mode`: `"final"` (default) or `"all"` (includes tool/block replies).
- `provider`: speech provider id such as `"elevenlabs"`, `"google"`, `"gradium"`, `"microsoft"`, `"minimax"`, `"openai"`, `"vydra"`, `"xai"`, or `"xiaomi"` (fallback is automatic).
+- `provider`: speech provider id such as `"elevenlabs"`, `"google"`, `"gradium"`, `"inworld"`, `"microsoft"`, `"minimax"`, `"openai"`, `"vydra"`, `"xai"`, or `"xiaomi"` (fallback is automatic).
 - If `provider` is **unset**, OpenClaw uses the first configured speech provider in registry auto-select order.
 - Legacy `provider: "edge"` config is repaired by `openclaw doctor --fix` and
  rewritten to `provider: "microsoft"`.
@ -429,7 +461,7 @@ Then run:
 - `maxTextLength`: hard cap for TTS input (chars). `/tts audio` fails if exceeded.
 - `timeoutMs`: request timeout (ms).
 - `prefsPath`: override the local prefs JSON path (provider/limit/summary).
- `apiKey` values fall back to env vars (`ELEVENLABS_API_KEY`/`XI_API_KEY`, `GEMINI_API_KEY`/`GOOGLE_API_KEY`, `GRADIUM_API_KEY`, `MINIMAX_API_KEY`, `OPENAI_API_KEY`, `VYDRA_API_KEY`, `XAI_API_KEY`, `XIAOMI_API_KEY`).
+- `apiKey` values fall back to env vars (`ELEVENLABS_API_KEY`/`XI_API_KEY`, `GEMINI_API_KEY`/`GOOGLE_API_KEY`, `GRADIUM_API_KEY`, `INWORLD_API_KEY`, `MINIMAX_API_KEY`, `OPENAI_API_KEY`, `VYDRA_API_KEY`, `XAI_API_KEY`, `XIAOMI_API_KEY`).
 - `providers.elevenlabs.baseUrl`: override ElevenLabs API base URL.
 - `providers.openai.baseUrl`: override the OpenAI TTS endpoint.
  - Resolution order: `messages.tts.providers.openai.baseUrl` -> `OPENAI_TTS_BASE_URL` -> `https://api.openai.com/v1`
@ -453,6 +485,10 @@ Then run:
 - `providers.tts-local-cli.timeoutMs`: command timeout in milliseconds (default `120000`).
 - `providers.tts-local-cli.cwd`: optional command working directory.
 - `providers.tts-local-cli.env`: optional string environment overrides for the command.
+- `providers.inworld.baseUrl`: override Inworld API base URL (default `https://api.inworld.ai`).
+- `providers.inworld.voiceId`: Inworld voice identifier (default `Sarah`).
+- `providers.inworld.modelId`: Inworld TTS model (default `inworld-tts-1.5-max`; also supports `inworld-tts-1.5-mini`, `inworld-tts-1-max`, `inworld-tts-1`).
+- `providers.inworld.temperature`: sampling temperature `0..2` (optional).
 - `providers.google.model`: Gemini TTS model (default `gemini-3.1-flash-tts-preview`).
 - `providers.google.voiceName`: Gemini prebuilt voice name (default `Kore`; `voice` is also accepted).
 - `providers.google.audioProfile`: natural-language style prompt prepended before the spoken text.
@ -586,6 +622,7 @@ These override `messages.tts.*` for that host.
  with `ffmpeg`.
 - **Google Gemini**: Gemini API TTS returns raw 24kHz PCM. OpenClaw wraps it as WAV for audio attachments, transcodes it to 48kHz Opus for voice-note targets, and returns PCM directly for Talk/telephony.
 - **Gradium**: WAV for audio attachments, Opus for voice-note targets, and `ulaw_8000` at 8 kHz for telephony.
+- **Inworld**: MP3 for normal audio attachments, native `OGG_OPUS` for voice-note targets, and raw `PCM` at 22050 Hz for Talk/telephony.
 - **xAI**: MP3 by default; `responseFormat` may be `mp3`, `wav`, `pcm`, `mulaw`, or `alaw`. OpenClaw uses xAI's batch REST TTS endpoint and returns a complete audio attachment; xAI's streaming TTS WebSocket is not used by this provider path. Native Opus voice-note format is not supported by this path.
 - **Microsoft**: uses `microsoft.outputFormat` (default `audio-24khz-48kbitrate-mono-mp3`).
  - The bundled transport accepts an `outputFormat`, but not all formats are available from the service.
--- a/extensions/inworld/index.ts
+++ b/extensions/inworld/index.ts
@ -0,0 +1,11 @@
+import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
+import { buildInworldSpeechProvider } from "./speech-provider.js";
+
+export default definePluginEntry({
+  id: "inworld",
+  name: "Inworld Speech",
+  description: "Bundled Inworld speech provider",
+  register(api) {
+    api.registerSpeechProvider(buildInworldSpeechProvider());
+  },
+});
--- a/extensions/inworld/inworld.live.test.ts
+++ b/extensions/inworld/inworld.live.test.ts
@ -0,0 +1,84 @@
+import { describe, expect, it } from "vitest";
+import { isLiveTestEnabled } from "../../src/agents/live-test-helpers.js";
+import {
+  registerProviderPlugin,
+  requireRegisteredProvider,
+} from "../../test/helpers/plugins/provider-registration.js";
+import plugin from "./index.js";
+
+const INWORLD_API_KEY = process.env.INWORLD_API_KEY?.trim() ?? "";
+const LIVE = isLiveTestEnabled() && INWORLD_API_KEY.length > 0;
+const describeLive = LIVE ? describe : describe.skip;
+
+const registerInworldPlugin = () =>
+  registerProviderPlugin({
+    plugin,
+    id: "inworld",
+    name: "Inworld",
+  });
+
+describeLive("inworld plugin live", () => {
+  it("lists voices through the registered speech provider", async () => {
+    const { speechProviders } = await registerInworldPlugin();
+    const provider = requireRegisteredProvider(speechProviders, "inworld");
+
+    const voices = await provider.listVoices?.({
+      apiKey: INWORLD_API_KEY,
+    });
+
+    expect(voices?.length).toBeGreaterThan(0);
+    expect(voices).toEqual(expect.arrayContaining([expect.objectContaining({ id: "Sarah" })]));
+  }, 120_000);
+
+  it("synthesizes MP3, native voice-note Ogg/Opus, and telephony PCM", async () => {
+    const { speechProviders } = await registerInworldPlugin();
+    const provider = requireRegisteredProvider(speechProviders, "inworld");
+    const providerConfig = {
+      apiKey: INWORLD_API_KEY,
+      voiceId: "Sarah",
+      modelId: "inworld-tts-1.5-max",
+    };
+
+    const audioFile = await provider.synthesize({
+      text: "OpenClaw Inworld text to speech integration test OK.",
+      cfg: { plugins: { enabled: true } } as never,
+      providerConfig,
+      target: "audio-file",
+      timeoutMs: 90_000,
+    });
+
+    expect(audioFile.outputFormat).toBe("mp3");
+    expect(audioFile.fileExtension).toBe(".mp3");
+    expect(audioFile.voiceCompatible).toBe(false);
+    expect(audioFile.audioBuffer.byteLength).toBeGreaterThan(512);
+    expect(audioFile.audioBuffer.subarray(0, 4).toString("ascii")).not.toBe("RIFF");
+
+    const voiceNote = await provider.synthesize({
+      text: "OpenClaw Inworld voice note integration test OK.",
+      cfg: { plugins: { enabled: true } } as never,
+      providerConfig,
+      target: "voice-note",
+      timeoutMs: 90_000,
+    });
+
+    expect(voiceNote.outputFormat).toBe("ogg_opus");
+    expect(voiceNote.fileExtension).toBe(".ogg");
+    expect(voiceNote.voiceCompatible).toBe(true);
+    expect(voiceNote.audioBuffer.byteLength).toBeGreaterThan(128);
+    expect(voiceNote.audioBuffer.subarray(0, 4).toString("ascii")).toBe("OggS");
+
+    const telephony = await provider.synthesizeTelephony?.({
+      text: "OpenClaw Inworld telephony check OK.",
+      cfg: { plugins: { enabled: true } } as never,
+      providerConfig,
+      timeoutMs: 90_000,
+    });
+    if (!telephony) {
+      throw new Error("Inworld telephony synthesis did not return audio");
+    }
+    expect(telephony.outputFormat).toBe("pcm");
+    expect(telephony.sampleRate).toBe(22_050);
+    expect(telephony.audioBuffer.byteLength).toBeGreaterThan(512);
+    expect(telephony.audioBuffer.subarray(0, 4).toString("ascii")).not.toBe("RIFF");
+  }, 180_000);
+});
--- a/extensions/inworld/openclaw.plugin.json
+++ b/extensions/inworld/openclaw.plugin.json
@ -0,0 +1,40 @@
+{
+  "id": "inworld",
+  "enabledByDefault": true,
+  "name": "Inworld",
+  "description": "Inworld streaming text-to-speech (MP3, OGG_OPUS, PCM telephony).",
+  "providerAuthEnvVars": {
+    "inworld": ["INWORLD_API_KEY"]
+  },
+  "contracts": {
+    "speechProviders": ["inworld"]
+  },
+  "configSchema": {
+    "type": "object",
+    "additionalProperties": false,
+    "properties": {
+      "apiKey": {
+        "type": "string",
+        "description": "Inworld API key. Must be the Base64 credential string from the Inworld dashboard (used as Authorization: Basic <apiKey>). Falls back to INWORLD_API_KEY env var."
+      },
+      "baseUrl": {
+        "type": "string",
+        "description": "Override Inworld API base URL (default https://api.inworld.ai)."
+      },
+      "voiceId": {
+        "type": "string",
+        "description": "Voice identifier (default Sarah)."
+      },
+      "modelId": {
+        "type": "string",
+        "description": "TTS model id (default inworld-tts-1.5-max)."
+      },
+      "temperature": {
+        "type": "number",
+        "minimum": 0,
+        "maximum": 2,
+        "description": "Sampling temperature 0..2."
+      }
+    }
+  }
+}
--- a/extensions/inworld/package.json
+++ b/extensions/inworld/package.json
@ -0,0 +1,15 @@
+{
+  "name": "@openclaw/inworld-speech",
+  "version": "2026.4.16",
+  "private": true,
+  "description": "OpenClaw Inworld speech plugin",
+  "type": "module",
+  "devDependencies": {
+    "@openclaw/plugin-sdk": "workspace:*"
+  },
+  "openclaw": {
+    "extensions": [
+      "./index.ts"
+    ]
+  }
+}
--- a/extensions/inworld/speech-provider.test.ts
+++ b/extensions/inworld/speech-provider.test.ts
@ -0,0 +1,213 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+const { inworldTTSMock, listInworldVoicesMock } = vi.hoisted(() => ({
+  inworldTTSMock: vi.fn(),
+  listInworldVoicesMock: vi.fn(),
+}));
+
+vi.mock("./tts.js", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("./tts.js")>();
+  return {
+    ...actual,
+    inworldTTS: inworldTTSMock,
+    listInworldVoices: listInworldVoicesMock,
+  };
+});
+
+import { buildInworldSpeechProvider } from "./speech-provider.js";
+
+describe("buildInworldSpeechProvider", () => {
+  const originalEnv = process.env.INWORLD_API_KEY;
+
+  afterEach(() => {
+    process.env.INWORLD_API_KEY = originalEnv;
+    inworldTTSMock.mockReset();
+    listInworldVoicesMock.mockReset();
+    vi.restoreAllMocks();
+  });
+
+  it("reports configured when INWORLD_API_KEY env var is set", () => {
+    process.env.INWORLD_API_KEY = "test-key";
+    const provider = buildInworldSpeechProvider();
+    expect(
+      provider.isConfigured({
+        providerConfig: {},
+        timeoutMs: 30_000,
+      }),
+    ).toBe(true);
+  });
+
+  it("reports configured when providerConfig apiKey is set", () => {
+    delete process.env.INWORLD_API_KEY;
+    const provider = buildInworldSpeechProvider();
+    expect(
+      provider.isConfigured({
+        providerConfig: { apiKey: "config-key" },
+        timeoutMs: 30_000,
+      }),
+    ).toBe(true);
+  });
+
+  it("reports not configured when no key is available", () => {
+    delete process.env.INWORLD_API_KEY;
+    const provider = buildInworldSpeechProvider();
+    expect(
+      provider.isConfigured({
+        providerConfig: {},
+        timeoutMs: 30_000,
+      }),
+    ).toBe(false);
+  });
+
+  it("has correct provider metadata", () => {
+    const provider = buildInworldSpeechProvider();
+    expect(provider.id).toBe("inworld");
+    expect(provider.label).toBe("Inworld");
+    expect(provider.autoSelectOrder).toBe(30);
+    expect(provider.models).toContain("inworld-tts-1.5-max");
+    expect(provider.models).toContain("inworld-tts-1.5-mini");
+  });
+
+  it("normalizes provider-owned speech config from raw provider config", () => {
+    const provider = buildInworldSpeechProvider();
+    const resolved = provider.resolveConfig?.({
+      cfg: {} as never,
+      timeoutMs: 30_000,
+      rawConfig: {
+        providers: {
+          inworld: {
+            apiKey: "basic-key",
+            baseUrl: "https://custom.inworld.example.com/",
+            voiceId: "Ashley",
+            modelId: "inworld-tts-1.5-mini",
+            temperature: 0.8,
+          },
+        },
+      },
+    });
+
+    expect(resolved).toEqual({
+      apiKey: "basic-key",
+      baseUrl: "https://custom.inworld.example.com",
+      voiceId: "Ashley",
+      modelId: "inworld-tts-1.5-mini",
+      temperature: 0.8,
+    });
+  });
+
+  it("parses Inworld TTS directive overrides", () => {
+    const provider = buildInworldSpeechProvider();
+    const policy = {
+      enabled: true,
+      allowText: true,
+      allowProvider: true,
+      allowVoice: true,
+      allowModelId: true,
+      allowVoiceSettings: true,
+      allowNormalization: true,
+      allowSeed: true,
+    };
+
+    expect(provider.parseDirectiveToken?.({ key: "voice", value: "Ashley", policy })).toEqual({
+      handled: true,
+      overrides: { voiceId: "Ashley" },
+    });
+    expect(
+      provider.parseDirectiveToken?.({
+        key: "model",
+        value: "inworld-tts-1.5-mini",
+        policy,
+      }),
+    ).toEqual({
+      handled: true,
+      overrides: { modelId: "inworld-tts-1.5-mini" },
+    });
+    expect(provider.parseDirectiveToken?.({ key: "temperature", value: "0.7", policy })).toEqual({
+      handled: true,
+      overrides: { temperature: 0.7 },
+    });
+  });
+
+  it("warns on invalid directive temperature", () => {
+    const provider = buildInworldSpeechProvider();
+    expect(
+      provider.parseDirectiveToken?.({
+        key: "temperature",
+        value: "3",
+        policy: {
+          enabled: true,
+          allowText: true,
+          allowProvider: true,
+          allowVoice: true,
+          allowModelId: true,
+          allowVoiceSettings: true,
+          allowNormalization: true,
+          allowSeed: true,
+        },
+      }),
+    ).toEqual({
+      handled: true,
+      warnings: ['invalid Inworld temperature "3"'],
+    });
+  });
+
+  it("synthesizes voice-note targets with native OGG_OPUS output", async () => {
+    inworldTTSMock.mockResolvedValueOnce(Buffer.from("opus"));
+    const provider = buildInworldSpeechProvider();
+
+    const result = await provider.synthesize?.({
+      text: "Hello",
+      cfg: {} as never,
+      providerConfig: { apiKey: "key", voiceId: "Sarah", modelId: "inworld-tts-1.5-max" },
+      providerOverrides: { voice: "Ashley", model: "inworld-tts-1.5-mini", temperature: 0.6 },
+      target: "voice-note",
+      timeoutMs: 30_000,
+    });
+
+    expect(inworldTTSMock).toHaveBeenCalledWith({
+      text: "Hello",
+      apiKey: "key",
+      baseUrl: "https://api.inworld.ai",
+      voiceId: "Ashley",
+      modelId: "inworld-tts-1.5-mini",
+      audioEncoding: "OGG_OPUS",
+      temperature: 0.6,
+      timeoutMs: 30_000,
+    });
+    expect(result).toEqual({
+      audioBuffer: Buffer.from("opus"),
+      outputFormat: "ogg_opus",
+      fileExtension: ".ogg",
+      voiceCompatible: true,
+    });
+  });
+
+  it("synthesizes telephony PCM at 22050 Hz", async () => {
+    inworldTTSMock.mockResolvedValueOnce(Buffer.from("pcm"));
+    const provider = buildInworldSpeechProvider();
+
+    const result = await provider.synthesizeTelephony?.({
+      text: "Hello",
+      cfg: {} as never,
+      providerConfig: { apiKey: "key", voiceId: "Sarah", modelId: "inworld-tts-1.5-max" },
+      timeoutMs: 30_000,
+    });
+
+    expect(inworldTTSMock).toHaveBeenCalledWith({
+      text: "Hello",
+      apiKey: "key",
+      baseUrl: "https://api.inworld.ai",
+      voiceId: "Sarah",
+      modelId: "inworld-tts-1.5-max",
+      audioEncoding: "PCM",
+      sampleRateHertz: 22_050,
+      temperature: undefined,
+      timeoutMs: 30_000,
+    });
+    expect(result).toEqual({
+      audioBuffer: Buffer.from("pcm"),
+      outputFormat: "pcm",
+      sampleRate: 22_050,
+    });
+  });
+});
--- a/extensions/inworld/speech-provider.ts
+++ b/extensions/inworld/speech-provider.ts
@ -0,0 +1,221 @@
+import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input";
+import type {
+  SpeechDirectiveTokenParseContext,
+  SpeechProviderConfig,
+  SpeechProviderOverrides,
+  SpeechProviderPlugin,
+} from "openclaw/plugin-sdk/speech-core";
+import { asFiniteNumber, asObject, trimToUndefined } from "openclaw/plugin-sdk/speech-core";
+import {
+  DEFAULT_INWORLD_MODEL_ID,
+  DEFAULT_INWORLD_VOICE_ID,
+  type InworldAudioEncoding,
+  INWORLD_TTS_MODELS,
+  inworldTTS,
+  listInworldVoices,
+  normalizeInworldBaseUrl,
+} from "./tts.js";
+
+type InworldProviderConfig = {
+  apiKey?: string;
+  baseUrl: string;
+  voiceId: string;
+  modelId: string;
+  temperature?: number;
+};
+
+type InworldProviderOverrides = {
+  voiceId?: string;
+  modelId?: string;
+  temperature?: number;
+};
+
+function normalizeInworldProviderConfig(rawConfig: Record<string, unknown>): InworldProviderConfig {
+  const providers = asObject(rawConfig.providers);
+  const raw = asObject(providers?.inworld) ?? asObject(rawConfig.inworld);
+  return {
+    apiKey: normalizeResolvedSecretInputString({
+      value: raw?.apiKey,
+      path: "messages.tts.providers.inworld.apiKey",
+    }),
+    baseUrl: normalizeInworldBaseUrl(trimToUndefined(raw?.baseUrl)),
+    voiceId: trimToUndefined(raw?.voiceId) ?? DEFAULT_INWORLD_VOICE_ID,
+    modelId: trimToUndefined(raw?.modelId) ?? DEFAULT_INWORLD_MODEL_ID,
+    temperature: asFiniteNumber(raw?.temperature),
+  };
+}
+
+function readInworldProviderConfig(config: SpeechProviderConfig): InworldProviderConfig {
+  const defaults = normalizeInworldProviderConfig({});
+  return {
+    apiKey: trimToUndefined(config.apiKey) ?? defaults.apiKey,
+    baseUrl: normalizeInworldBaseUrl(trimToUndefined(config.baseUrl) ?? defaults.baseUrl),
+    voiceId: trimToUndefined(config.voiceId) ?? defaults.voiceId,
+    modelId: trimToUndefined(config.modelId) ?? defaults.modelId,
+    temperature: asFiniteNumber(config.temperature) ?? defaults.temperature,
+  };
+}
+
+function readInworldOverrides(
+  overrides: SpeechProviderOverrides | undefined,
+): InworldProviderOverrides {
+  if (!overrides) {
+    return {};
+  }
+  return {
+    voiceId: trimToUndefined(overrides.voiceId ?? overrides.voice),
+    modelId: trimToUndefined(overrides.modelId ?? overrides.model),
+    temperature: asFiniteNumber(overrides.temperature),
+  };
+}
+
+function parseDirectiveToken(ctx: SpeechDirectiveTokenParseContext): {
+  handled: boolean;
+  overrides?: SpeechProviderOverrides;
+  warnings?: string[];
+} {
+  switch (ctx.key) {
+    case "voice":
+    case "voiceid":
+    case "voice_id":
+    case "inworld_voice":
+    case "inworldvoice":
+      if (!ctx.policy.allowVoice) {
+        return { handled: true };
+      }
+      return { handled: true, overrides: { voiceId: ctx.value } };
+    case "model":
+    case "modelid":
+    case "model_id":
+    case "inworld_model":
+    case "inworldmodel":
+      if (!ctx.policy.allowModelId) {
+        return { handled: true };
+      }
+      return { handled: true, overrides: { modelId: ctx.value } };
+    case "temperature": {
+      if (!ctx.policy.allowVoiceSettings) {
+        return { handled: true };
+      }
+      const temperature = Number(ctx.value);
+      if (!Number.isFinite(temperature) || temperature < 0 || temperature > 2) {
+        return { handled: true, warnings: [`invalid Inworld temperature "${ctx.value}"`] };
+      }
+      return { handled: true, overrides: { temperature } };
+    }
+    default:
+      return { handled: false };
+  }
+}
+
+export function buildInworldSpeechProvider(): SpeechProviderPlugin {
+  return {
+    id: "inworld",
+    label: "Inworld",
+    autoSelectOrder: 30,
+    models: INWORLD_TTS_MODELS,
+    resolveConfig: ({ rawConfig }) => normalizeInworldProviderConfig(rawConfig),
+    parseDirectiveToken,
+    resolveTalkConfig: ({ baseTtsConfig, talkProviderConfig }) => {
+      const base = normalizeInworldProviderConfig(baseTtsConfig);
+      const resolvedApiKey =
+        talkProviderConfig.apiKey === undefined
+          ? undefined
+          : normalizeResolvedSecretInputString({
+              value: talkProviderConfig.apiKey,
+              path: "talk.providers.inworld.apiKey",
+            });
+      return {
+        ...base,
+        ...(resolvedApiKey === undefined ? {} : { apiKey: resolvedApiKey }),
+        ...(trimToUndefined(talkProviderConfig.baseUrl) == null
+          ? {}
+          : { baseUrl: normalizeInworldBaseUrl(trimToUndefined(talkProviderConfig.baseUrl)) }),
+        ...(trimToUndefined(talkProviderConfig.voiceId) == null
+          ? {}
+          : { voiceId: trimToUndefined(talkProviderConfig.voiceId) }),
+        ...(trimToUndefined(talkProviderConfig.modelId) == null
+          ? {}
+          : { modelId: trimToUndefined(talkProviderConfig.modelId) }),
+        ...(asFiniteNumber(talkProviderConfig.temperature) == null
+          ? {}
+          : { temperature: asFiniteNumber(talkProviderConfig.temperature) }),
+      };
+    },
+    resolveTalkOverrides: ({ params }) => ({
+      ...(trimToUndefined(params.voiceId) == null
+        ? {}
+        : { voiceId: trimToUndefined(params.voiceId) }),
+      ...(trimToUndefined(params.modelId) == null
+        ? {}
+        : { modelId: trimToUndefined(params.modelId) }),
+      ...(asFiniteNumber(params.temperature) == null
+        ? {}
+        : { temperature: asFiniteNumber(params.temperature) }),
+    }),
+    listVoices: async (req) => {
+      const config = req.providerConfig ? readInworldProviderConfig(req.providerConfig) : undefined;
+      const apiKey = req.apiKey || config?.apiKey || process.env.INWORLD_API_KEY;
+      if (!apiKey) {
+        throw new Error("Inworld API key missing");
+      }
+      return listInworldVoices({
+        apiKey,
+        baseUrl: req.baseUrl ?? config?.baseUrl,
+      });
+    },
+    isConfigured: ({ providerConfig }) =>
+      Boolean(readInworldProviderConfig(providerConfig).apiKey || process.env.INWORLD_API_KEY),
+    synthesize: async (req) => {
+      const config = readInworldProviderConfig(req.providerConfig);
+      const overrides = readInworldOverrides(req.providerOverrides);
+      const apiKey = config.apiKey || process.env.INWORLD_API_KEY;
+      if (!apiKey) {
+        throw new Error("Inworld API key missing");
+      }
+
+      const useOpus = req.target === "voice-note";
+      const audioEncoding: InworldAudioEncoding = useOpus ? "OGG_OPUS" : "MP3";
+
+      const audioBuffer = await inworldTTS({
+        text: req.text,
+        apiKey,
+        baseUrl: config.baseUrl,
+        voiceId: overrides.voiceId ?? config.voiceId,
+        modelId: overrides.modelId ?? config.modelId,
+        audioEncoding,
+        temperature: overrides.temperature ?? config.temperature,
+        timeoutMs: req.timeoutMs,
+      });
+
+      return {
+        audioBuffer,
+        outputFormat: audioEncoding.toLowerCase(),
+        fileExtension: useOpus ? ".ogg" : ".mp3",
+        voiceCompatible: useOpus,
+      };
+    },
+    synthesizeTelephony: async (req) => {
+      const config = readInworldProviderConfig(req.providerConfig);
+      const apiKey = config.apiKey || process.env.INWORLD_API_KEY;
+      if (!apiKey) {
+        throw new Error("Inworld API key missing");
+      }
+
+      const sampleRate = 22_050;
+      const audioBuffer = await inworldTTS({
+        text: req.text,
+        apiKey,
+        baseUrl: config.baseUrl,
+        voiceId: config.voiceId,
+        modelId: config.modelId,
+        audioEncoding: "PCM",
+        sampleRateHertz: sampleRate,
+        temperature: config.temperature,
+        timeoutMs: req.timeoutMs,
+      });
+
+      return { audioBuffer, outputFormat: "pcm", sampleRate };
+    },
+  };
+}
--- a/extensions/inworld/tsconfig.json
+++ b/extensions/inworld/tsconfig.json
@ -0,0 +1,16 @@
+{
+  "extends": "../tsconfig.package-boundary.base.json",
+  "compilerOptions": {
+    "rootDir": "."
+  },
+  "include": ["./*.ts", "./src/**/*.ts"],
+  "exclude": [
+    "./**/*.test.ts",
+    "./dist/**",
+    "./node_modules/**",
+    "./src/test-support/**",
+    "./src/**/*test-helpers.ts",
+    "./src/**/*test-harness.ts",
+    "./src/**/*test-support.ts"
+  ]
+}
--- a/extensions/inworld/tts.test.ts
+++ b/extensions/inworld/tts.test.ts
@ -0,0 +1,312 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+const { fetchWithSsrFGuardMock } = vi.hoisted(() => ({
+  fetchWithSsrFGuardMock: vi.fn(),
+}));
+
+vi.mock("openclaw/plugin-sdk/ssrf-runtime", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("openclaw/plugin-sdk/ssrf-runtime")>();
+  return {
+    ...actual,
+    fetchWithSsrFGuard: fetchWithSsrFGuardMock,
+  };
+});
+
+import { inworldTTS, listInworldVoices } from "./tts.js";
+
+type GuardRequest = {
+  url: string;
+  init?: RequestInit;
+  auditContext?: string;
+  policy?: unknown;
+  timeoutMs?: number;
+};
+
+function queueGuardedResponse(response: Response): { release: ReturnType<typeof vi.fn> } {
+  const release = vi.fn(async () => {});
+  fetchWithSsrFGuardMock.mockResolvedValueOnce({ response, release });
+  return { release };
+}
+
+function lastGuardRequest(): GuardRequest {
+  const call = fetchWithSsrFGuardMock.mock.calls.at(-1);
+  if (!call) {
+    throw new Error("fetchWithSsrFGuard was not called");
+  }
+  return call[0] as GuardRequest;
+}
+
+function readRequestBody(request: GuardRequest): string {
+  const body = request.init?.body;
+  if (typeof body !== "string") {
+    throw new Error("expected request body to be a string");
+  }
+  return body;
+}
+
+describe("listInworldVoices", () => {
+  afterEach(() => {
+    fetchWithSsrFGuardMock.mockClear();
+    vi.restoreAllMocks();
+  });
+
+  it("maps Inworld voice metadata into speech voice options", async () => {
+    queueGuardedResponse(
+      new Response(
+        JSON.stringify({
+          voices: [
+            {
+              voiceId: "Dennis",
+              displayName: "Dennis",
+              description: "Middle-aged man with a smooth, calm and friendly voice",
+              langCode: "EN_US",
+              tags: ["male", "middle-aged", "smooth", "calm", "friendly"],
+              source: "SYSTEM",
+            },
+            {
+              voiceId: "Ashley",
+              displayName: "Ashley",
+              description: "A warm, natural female voice",
+              langCode: "EN_US",
+              tags: ["female", "warm", "natural"],
+              source: "SYSTEM",
+            },
+          ],
+        }),
+        { status: 200 },
+      ),
+    );
+
+    const voices = await listInworldVoices({ apiKey: "test-key" });
+
+    expect(voices).toEqual([
+      {
+        id: "Dennis",
+        name: "Dennis",
+        description: "Middle-aged man with a smooth, calm and friendly voice",
+        locale: "EN_US",
+        gender: "male",
+      },
+      {
+        id: "Ashley",
+        name: "Ashley",
+        description: "A warm, natural female voice",
+        locale: "EN_US",
+        gender: "female",
+      },
+    ]);
+    const request = lastGuardRequest();
+    expect(request.url).toBe("https://api.inworld.ai/voices/v1/voices");
+    expect(request.auditContext).toBe("inworld-voices");
+    expect(request.policy).toEqual({ hostnameAllowlist: ["api.inworld.ai"] });
+    const headers = new Headers(request.init?.headers);
+    expect(headers.get("authorization")).toBe("Basic test-key");
+  });
+
+  it("throws on API errors with response body", async () => {
+    queueGuardedResponse(new Response("service unavailable", { status: 503 }));
+
+    await expect(listInworldVoices({ apiKey: "test-key" })).rejects.toThrow(
+      "Inworld voices API error (503): service unavailable",
+    );
+  });
+
+  it("filters out voices with empty voiceId", async () => {
+    queueGuardedResponse(
+      new Response(
+        JSON.stringify({
+          voices: [
+            { voiceId: "", displayName: "Empty" },
+            { voiceId: "Dennis", displayName: "Dennis" },
+          ],
+        }),
+        { status: 200 },
+      ),
+    );
+
+    const voices = await listInworldVoices({ apiKey: "test-key" });
+    expect(voices).toHaveLength(1);
+    expect(voices[0].id).toBe("Dennis");
+  });
+
+  it("returns empty array when no voices present", async () => {
+    queueGuardedResponse(new Response(JSON.stringify({}), { status: 200 }));
+
+    const voices = await listInworldVoices({ apiKey: "test-key" });
+    expect(voices).toEqual([]);
+  });
+
+  it("passes language filter as query parameter", async () => {
+    queueGuardedResponse(new Response(JSON.stringify({ voices: [] }), { status: 200 }));
+
+    await listInworldVoices({ apiKey: "test-key", language: "EN_US" });
+
+    expect(lastGuardRequest().url).toBe("https://api.inworld.ai/voices/v1/voices?languages=EN_US");
+  });
+
+  it("releases the guarded dispatcher after success", async () => {
+    const { release } = queueGuardedResponse(
+      new Response(JSON.stringify({ voices: [] }), { status: 200 }),
+    );
+
+    await listInworldVoices({ apiKey: "test-key" });
+
+    expect(release).toHaveBeenCalledTimes(1);
+  });
+});
+
+describe("inworldTTS", () => {
+  afterEach(() => {
+    fetchWithSsrFGuardMock.mockClear();
+    vi.restoreAllMocks();
+  });
+
+  it("concatenates base64 audio chunks from streaming response", async () => {
+    const chunk1 = Buffer.from("audio-chunk-1").toString("base64");
+    const chunk2 = Buffer.from("audio-chunk-2").toString("base64");
+    const body = [
+      JSON.stringify({ result: { audioContent: chunk1 } }),
+      JSON.stringify({ result: { audioContent: chunk2 } }),
+    ].join("\n");
+
+    queueGuardedResponse(new Response(body, { status: 200 }));
+
+    const buffer = await inworldTTS({
+      text: "Hello world",
+      apiKey: "test-key",
+    });
+
+    expect(buffer).toEqual(
+      Buffer.concat([Buffer.from("audio-chunk-1"), Buffer.from("audio-chunk-2")]),
+    );
+  });
+
+  it("throws on HTTP errors with response body", async () => {
+    queueGuardedResponse(new Response("bad request body", { status: 400 }));
+
+    await expect(inworldTTS({ text: "test", apiKey: "test-key" })).rejects.toThrow(
+      "Inworld TTS API error (400): bad request body",
+    );
+  });
+
+  it("throws on in-stream errors", async () => {
+    const body = JSON.stringify({
+      error: { code: 3, message: "Invalid voice ID" },
+    });
+    queueGuardedResponse(new Response(body, { status: 200 }));
+
+    await expect(inworldTTS({ text: "test", apiKey: "test-key" })).rejects.toThrow(
+      "Inworld TTS stream error (3): Invalid voice ID",
+    );
+  });
+
+  it("throws on empty audio response", async () => {
+    const body = JSON.stringify({ result: { audioContent: "" } });
+    queueGuardedResponse(new Response(body, { status: 200 }));
+
+    await expect(inworldTTS({ text: "test", apiKey: "test-key" })).rejects.toThrow(
+      "Inworld TTS returned no audio data",
+    );
+  });
+
+  it("throws descriptive error on non-JSON line in stream", async () => {
+    queueGuardedResponse(new Response("<html>Rate limited</html>", { status: 200 }));
+
+    await expect(inworldTTS({ text: "test", apiKey: "test-key" })).rejects.toThrow(
+      "Inworld TTS stream parse error: unexpected non-JSON line:",
+    );
+  });
+
+  it("sends correct request body with defaults", async () => {
+    const chunk = Buffer.from("audio").toString("base64");
+    queueGuardedResponse(
+      new Response(JSON.stringify({ result: { audioContent: chunk } }), { status: 200 }),
+    );
+
+    await inworldTTS({ text: "Hello", apiKey: "test-key" });
+
+    const request = lastGuardRequest();
+    expect(request.url).toBe("https://api.inworld.ai/tts/v1/voice:stream");
+    expect(request.auditContext).toBe("inworld-tts");
+    expect(request.policy).toEqual({ hostnameAllowlist: ["api.inworld.ai"] });
+    expect(request.init?.method).toBe("POST");
+    const headers = new Headers(request.init?.headers);
+    expect(headers.get("authorization")).toBe("Basic test-key");
+    expect(headers.get("content-type")).toBe("application/json");
+    expect(JSON.parse(readRequestBody(request))).toEqual({
+      text: "Hello",
+      voiceId: "Sarah",
+      modelId: "inworld-tts-1.5-max",
+      audioConfig: { audioEncoding: "MP3" },
+    });
+  });
+
+  it("includes temperature and sampleRateHertz when provided", async () => {
+    const chunk = Buffer.from("audio").toString("base64");
+    queueGuardedResponse(
+      new Response(JSON.stringify({ result: { audioContent: chunk } }), { status: 200 }),
+    );
+
+    await inworldTTS({
+      text: "Hello",
+      apiKey: "test-key",
+      voiceId: "Ashley",
+      modelId: "inworld-tts-1.5-mini",
+      audioEncoding: "PCM",
+      sampleRateHertz: 22_050,
+      temperature: 0.8,
+    });
+
+    const callBody = JSON.parse(readRequestBody(lastGuardRequest()));
+    expect(callBody.voiceId).toBe("Ashley");
+    expect(callBody.modelId).toBe("inworld-tts-1.5-mini");
+    expect(callBody.audioConfig.audioEncoding).toBe("PCM");
+    expect(callBody.audioConfig.sampleRateHertz).toBe(22_050);
+    expect(callBody.temperature).toBe(0.8);
+  });
+
+  it("uses custom base URL", async () => {
+    const chunk = Buffer.from("audio").toString("base64");
+    queueGuardedResponse(
+      new Response(JSON.stringify({ result: { audioContent: chunk } }), { status: 200 }),
+    );
+
+    await inworldTTS({
+      text: "Hello",
+      apiKey: "test-key",
+      baseUrl: "https://custom.inworld.example.com/",
+    });
+
+    expect(lastGuardRequest().url).toBe("https://custom.inworld.example.com/tts/v1/voice:stream");
+    expect(lastGuardRequest().policy).toEqual({
+      hostnameAllowlist: ["custom.inworld.example.com"],
+    });
+  });
+
+  it("skips empty lines in streaming response", async () => {
+    const chunk = Buffer.from("audio").toString("base64");
+    const body = `\n${JSON.stringify({ result: { audioContent: chunk } })}\n\n`;
+    queueGuardedResponse(new Response(body, { status: 200 }));
+
+    const buffer = await inworldTTS({ text: "test", apiKey: "test-key" });
+    expect(buffer).toEqual(Buffer.from("audio"));
+  });
+
+  it("releases the guarded dispatcher after success", async () => {
+    const chunk = Buffer.from("audio").toString("base64");
+    const { release } = queueGuardedResponse(
+      new Response(JSON.stringify({ result: { audioContent: chunk } }), { status: 200 }),
+    );
+
+    await inworldTTS({ text: "test", apiKey: "test-key" });
+
+    expect(release).toHaveBeenCalledTimes(1);
+  });
+
+  it("releases the guarded dispatcher after failure", async () => {
+    const { release } = queueGuardedResponse(new Response("fail", { status: 500 }));
+
+    await expect(inworldTTS({ text: "test", apiKey: "test-key" })).rejects.toThrow();
+    expect(release).toHaveBeenCalledTimes(1);
+  });
+});
--- a/extensions/inworld/tts.ts
+++ b/extensions/inworld/tts.ts
@ -0,0 +1,190 @@
+import type { SpeechVoiceOption } from "openclaw/plugin-sdk/speech-core";
+import { fetchWithSsrFGuard, type SsrFPolicy } from "openclaw/plugin-sdk/ssrf-runtime";
+
+export const DEFAULT_INWORLD_BASE_URL = "https://api.inworld.ai";
+export const DEFAULT_INWORLD_VOICE_ID = "Sarah";
+export const DEFAULT_INWORLD_MODEL_ID = "inworld-tts-1.5-max";
+
+export const INWORLD_TTS_MODELS = [
+  "inworld-tts-1.5-max",
+  "inworld-tts-1.5-mini",
+  "inworld-tts-1-max",
+  "inworld-tts-1",
+] as const;
+
+export type InworldAudioEncoding =
+  | "MP3"
+  | "OGG_OPUS"
+  | "LINEAR16"
+  | "PCM"
+  | "WAV"
+  | "ALAW"
+  | "MULAW"
+  | "FLAC";
+
+export function normalizeInworldBaseUrl(baseUrl?: string): string {
+  const trimmed = baseUrl?.trim();
+  return trimmed?.replace(/\/+$/, "") || DEFAULT_INWORLD_BASE_URL;
+}
+
+function ssrfPolicyFromInworldBaseUrl(baseUrl: string): SsrFPolicy | undefined {
+  try {
+    const parsed = new URL(baseUrl);
+    if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
+      return undefined;
+    }
+    return { hostnameAllowlist: [parsed.hostname] };
+  } catch {
+    return undefined;
+  }
+}
+
+/**
+ * Calls the Inworld streaming TTS endpoint and concatenates every audio chunk
+ * into a single buffer. The stream returns newline-delimited JSON, each line
+ * carrying base64 audio in `result.audioContent`.
+ */
+export async function inworldTTS(params: {
+  text: string;
+  apiKey: string;
+  baseUrl?: string;
+  voiceId?: string;
+  modelId?: string;
+  audioEncoding?: InworldAudioEncoding;
+  sampleRateHertz?: number;
+  temperature?: number;
+  timeoutMs?: number;
+}): Promise<Buffer> {
+  const baseUrl = normalizeInworldBaseUrl(params.baseUrl);
+  const url = `${baseUrl}/tts/v1/voice:stream`;
+  const requestBody = JSON.stringify({
+    text: params.text,
+    voiceId: params.voiceId ?? DEFAULT_INWORLD_VOICE_ID,
+    modelId: params.modelId ?? DEFAULT_INWORLD_MODEL_ID,
+    audioConfig: {
+      audioEncoding: params.audioEncoding ?? "MP3",
+      ...(params.sampleRateHertz && { sampleRateHertz: params.sampleRateHertz }),
+    },
+    ...(params.temperature != null && { temperature: params.temperature }),
+  });
+
+  const { response, release } = await fetchWithSsrFGuard({
+    url,
+    init: {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        // apiKey is the Base64-encoded credential string copied from the
+        // Inworld dashboard; it is sent verbatim as the HTTP Basic
+        // credential. Do not Base64-encode it here, and do not normalize
+        // bearer-style tokens.
+        Authorization: `Basic ${params.apiKey}`,
+      },
+      body: requestBody,
+    },
+    timeoutMs: params.timeoutMs,
+    policy: ssrfPolicyFromInworldBaseUrl(baseUrl),
+    auditContext: "inworld-tts",
+  });
+
+  try {
+    if (!response.ok) {
+      const errorBody = await response.text().catch(() => "");
+      throw new Error(`Inworld TTS API error (${response.status}): ${errorBody}`);
+    }
+
+    const body = await response.text();
+    const chunks: Buffer[] = [];
+
+    for (const line of body.split("\n")) {
+      const trimmed = line.trim();
+      if (!trimmed) {
+        continue;
+      }
+
+      let parsed: {
+        result?: { audioContent?: string };
+        error?: { code?: number; message?: string };
+      };
+      try {
+        parsed = JSON.parse(trimmed) as typeof parsed;
+      } catch {
+        throw new Error(
+          `Inworld TTS stream parse error: unexpected non-JSON line: ${trimmed.slice(0, 80)}`,
+        );
+      }
+
+      if (parsed.error) {
+        throw new Error(`Inworld TTS stream error (${parsed.error.code}): ${parsed.error.message}`);
+      }
+
+      if (parsed.result?.audioContent) {
+        chunks.push(Buffer.from(parsed.result.audioContent, "base64"));
+      }
+    }
+
+    if (chunks.length === 0) {
+      throw new Error("Inworld TTS returned no audio data");
+    }
+
+    return Buffer.concat(chunks);
+  } finally {
+    await release();
+  }
+}
+
+export async function listInworldVoices(params: {
+  apiKey: string;
+  baseUrl?: string;
+  language?: string;
+  timeoutMs?: number;
+}): Promise<SpeechVoiceOption[]> {
+  const baseUrl = normalizeInworldBaseUrl(params.baseUrl);
+  const langParam = params.language ? `?languages=${encodeURIComponent(params.language)}` : "";
+  const url = `${baseUrl}/voices/v1/voices${langParam}`;
+
+  const { response, release } = await fetchWithSsrFGuard({
+    url,
+    init: {
+      method: "GET",
+      headers: {
+        Authorization: `Basic ${params.apiKey}`,
+      },
+    },
+    timeoutMs: params.timeoutMs,
+    policy: ssrfPolicyFromInworldBaseUrl(baseUrl),
+    auditContext: "inworld-voices",
+  });
+
+  try {
+    if (!response.ok) {
+      const errorBody = await response.text().catch(() => "");
+      throw new Error(`Inworld voices API error (${response.status}): ${errorBody}`);
+    }
+
+    const json = (await response.json()) as {
+      voices?: Array<{
+        voiceId?: string;
+        displayName?: string;
+        description?: string;
+        langCode?: string;
+        tags?: string[];
+        source?: string;
+      }>;
+    };
+
+    return Array.isArray(json.voices)
+      ? json.voices
+          .map((voice) => ({
+            id: voice.voiceId?.trim() ?? "",
+            name: voice.displayName?.trim() || undefined,
+            description: voice.description?.trim() || undefined,
+            locale: voice.langCode || undefined,
+            gender: voice.tags?.find((t) => t === "male" || t === "female") || undefined,
+          }))
+          .filter((voice) => voice.id.length > 0)
+      : [];
+  } finally {
+    await release();
+  }
+}
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@ -675,6 +675,12 @@ importers:
        specifier: workspace:*
        version: link:../../packages/plugin-sdk

+  extensions/inworld:
+    devDependencies:
+      '@openclaw/plugin-sdk':
+        specifier: workspace:*
+        version: link:../../packages/plugin-sdk
+
  extensions/irc:
    devDependencies:
      '@openclaw/plugin-sdk':
--- a/src/channels/plugins/module-loader.test.ts
+++ b/src/channels/plugins/module-loader.test.ts
@ -3,6 +3,7 @@ import os from "node:os";
 import path from "node:path";
 import { afterEach, describe, expect, it, vi } from "vitest";
 import { importFreshModule } from "../../../test/helpers/import-fresh.ts";
+import { shouldExpectNativeJitiForJavaScriptTestRuntime } from "../../test-utils/jiti-runtime.js";
 import {
  isJavaScriptModulePath,
  resolveCompiledBundledModulePath,
@ -92,7 +93,7 @@ describe("channel plugin module loader helpers", () => {
    expect(createJiti).not.toHaveBeenCalled();
  });

-  it("uses native Jiti import for Windows dist loads", async () => {
+  it("uses the runtime-supported Jiti boundary for Windows dist loads", async () => {
    const createJiti = vi.fn(() => vi.fn(() => ({ ok: true })));
    vi.doMock("jiti", () => ({
      createJiti,
@ -119,7 +120,7 @@ describe("channel plugin module loader helpers", () => {
      expect(createJiti).toHaveBeenCalledWith(
        expect.any(String),
        expect.objectContaining({
-          tryNative: true,
+          tryNative: shouldExpectNativeJitiForJavaScriptTestRuntime(),
        }),
      );
    } finally {
--- a/src/cli/program/preaction.test.ts
+++ b/src/cli/program/preaction.test.ts
@ -463,8 +463,8 @@ describe("registerPreActionHooks", () => {
    });

    await runPreAction({
-      parseArgv: ["agents", "list"],
-      processArgv: ["node", "openclaw", "agents", "list", "--json"],
+      parseArgv: ["message", "send"],
+      processArgv: ["node", "openclaw", "message", "send", "--json"],
    });

    expect(ensurePluginRegistryLoadedMock).toHaveBeenCalled();
--- a/src/plugin-sdk/facade-loader.test.ts
+++ b/src/plugin-sdk/facade-loader.test.ts
@ -1,6 +1,7 @@
 import fs from "node:fs";
 import path from "node:path";
 import { afterEach, describe, expect, it, vi } from "vitest";
+import { shouldExpectNativeJitiForJavaScriptTestRuntime } from "../test-utils/jiti-runtime.js";
 import {
  listImportedBundledPluginFacadeIds,
  loadBundledPluginPublicSurfaceModuleSync,
@ -126,7 +127,7 @@ describe("plugin-sdk facade loader", () => {
    expect(listImportedFacadeRuntimeIds()).toEqual(["demo"]);
  });

-  it("uses native Jiti import for Windows dist facade loads", () => {
+  it("uses the runtime-supported Jiti boundary for Windows dist facade loads", () => {
    const dir = createTempDirSync("openclaw-facade-loader-windows-dist-");
    const bundledPluginsDir = path.join(dir, "dist");
    fs.mkdirSync(path.join(bundledPluginsDir, "demo"), { recursive: true });
@ -158,7 +159,7 @@ describe("plugin-sdk facade loader", () => {
      expect(createJitiCalls[0]?.[0]).toEqual(expect.any(String));
      expect(createJitiCalls[0]?.[1]).toEqual(
        expect.objectContaining({
-          tryNative: true,
+          tryNative: shouldExpectNativeJitiForJavaScriptTestRuntime(),
        }),
      );
    } finally {
--- a/src/plugins/doctor-contract-registry.test.ts
+++ b/src/plugins/doctor-contract-registry.test.ts
@ -1,6 +1,7 @@
 import fs from "node:fs";
 import path from "node:path";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { shouldExpectNativeJitiForJavaScriptTestRuntime } from "../test-utils/jiti-runtime.js";
 import { cleanupTrackedTempDirs, makeTrackedTempDir } from "./test-helpers/fs-fixtures.js";
 import {
  getRegistryJitiMocks,
@ -34,7 +35,7 @@ describe("doctor-contract-registry getJiti", () => {
    clearPluginDoctorContractRegistryCache();
  });

-  it("uses native jiti loading on Windows for contract-api modules", () => {
+  it("uses the runtime-supported Jiti boundary on Windows for contract-api modules", () => {
    const pluginRoot = makeTempDir();
    fs.writeFileSync(path.join(pluginRoot, "contract-api.js"), "export default {};\n", "utf-8");
    mocks.loadPluginManifestRegistry.mockReturnValue({
@ -42,6 +43,7 @@ describe("doctor-contract-registry getJiti", () => {
      diagnostics: [],
    });
    const platformSpy = vi.spyOn(process, "platform", "get").mockReturnValue("win32");
+    const expectedTryNative = shouldExpectNativeJitiForJavaScriptTestRuntime();

    try {
      listPluginDoctorLegacyConfigRules({
@ -56,7 +58,7 @@ describe("doctor-contract-registry getJiti", () => {
    expect(mocks.createJiti.mock.calls[0]?.[0]).toBe(path.join(pluginRoot, "contract-api.js"));
    expect(mocks.createJiti.mock.calls[0]?.[1]).toEqual(
      expect.objectContaining({
-        tryNative: true,
+        tryNative: expectedTryNative,
      }),
    );
  });
--- a/src/plugins/setup-registry.test.ts
+++ b/src/plugins/setup-registry.test.ts
@ -1,6 +1,7 @@
 import fs from "node:fs";
 import path from "node:path";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { shouldExpectNativeJitiForJavaScriptTestRuntime } from "../test-utils/jiti-runtime.js";
 import { cleanupTrackedTempDirs, makeTrackedTempDir } from "./test-helpers/fs-fixtures.js";
 import {
  getRegistryJitiMocks,
@ -176,7 +177,7 @@ describe("setup-registry getJiti", () => {
    clearPluginSetupRegistryCache();
  });

-  it("uses native jiti loading on Windows for setup-api modules", () => {
+  it("uses the runtime-supported Jiti boundary on Windows for setup-api modules", () => {
    const pluginRoot = makeTempDir();
    fs.writeFileSync(path.join(pluginRoot, "setup-api.js"), "export default {};\n", "utf-8");
    mocks.loadPluginManifestRegistry.mockReturnValue({
@ -185,6 +186,7 @@ describe("setup-registry getJiti", () => {
    });
    const platformSpy = vi.spyOn(process, "platform", "get").mockReturnValue("win32");
    const restoreVersions = forceNodeRuntimeVersionsForTest();
+    const expectedTryNative = shouldExpectNativeJitiForJavaScriptTestRuntime();

    try {
      resolvePluginSetupRegistry({
@ -200,7 +202,7 @@ describe("setup-registry getJiti", () => {
    expect(mocks.createJiti.mock.calls[0]?.[0]).toBe(path.join(pluginRoot, "setup-api.js"));
    expect(mocks.createJiti.mock.calls[0]?.[1]).toEqual(
      expect.objectContaining({
-        tryNative: true,
+        tryNative: expectedTryNative,
      }),
    );
  });
--- a/src/test-utils/jiti-runtime.ts
+++ b/src/test-utils/jiti-runtime.ts
@ -0,0 +1,5 @@
+export function shouldExpectNativeJitiForJavaScriptTestRuntime(): boolean {
+  return (
+    typeof (process.versions as { bun?: string }).bun !== "string" && process.platform !== "win32"
+  );
+}