mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-28 06:31:11 +00:00
feat(providers): add DeepInfra provider plugin (#73038)
* feat(providers): add DeepInfra provider plugin * feat(deepinfra): add media provider surfaces * fix(deepinfra): satisfy provider boundary checks * docs: add gitcrawl maintainer skill * test: include deepinfra in live media sweeps * fix: remove stale tts contract import
This commit is contained in:
parent
1fde7dbc0e
commit
0294aebe6f
54 changed files with 2830 additions and 179 deletions
68
.agents/skills/gitcrawl/SKILL.md
Normal file
68
.agents/skills/gitcrawl/SKILL.md
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
---
|
||||
name: gitcrawl
|
||||
description: Use gitcrawl for OpenClaw issue and PR archive search, duplicate discovery, related-thread clustering, and local GitHub mirror freshness checks.
|
||||
metadata:
|
||||
openclaw:
|
||||
requires:
|
||||
bins:
|
||||
- gitcrawl
|
||||
---
|
||||
|
||||
# Gitcrawl
|
||||
|
||||
Use this skill before live GitHub search when triaging OpenClaw issues or PRs.
|
||||
|
||||
`gitcrawl` is the local candidate-discovery layer. It is fast, includes open and closed threads, and can surface duplicate attempts, related issues, and already-landed fixes. It is not the final source of truth for comments, labels, merges, closes, or current CI.
|
||||
|
||||
## Default Flow
|
||||
|
||||
1. Check local state:
|
||||
|
||||
```bash
|
||||
gitcrawl doctor --json
|
||||
```
|
||||
|
||||
2. Read the target from the local archive:
|
||||
|
||||
```bash
|
||||
gitcrawl threads openclaw/openclaw --numbers <issue-or-pr-number> --include-closed --json
|
||||
```
|
||||
|
||||
3. Find related candidates:
|
||||
|
||||
```bash
|
||||
gitcrawl neighbors openclaw/openclaw --number <issue-or-pr-number> --limit 12 --json
|
||||
gitcrawl search openclaw/openclaw --query "<scope or title keywords>" --mode hybrid --limit 20 --json
|
||||
```
|
||||
|
||||
4. Inspect relevant clusters:
|
||||
|
||||
```bash
|
||||
gitcrawl cluster-detail openclaw/openclaw --id <cluster-id> --member-limit 20 --body-chars 280 --json
|
||||
```
|
||||
|
||||
5. Verify anything actionable with live GitHub and the checkout:
|
||||
|
||||
```bash
|
||||
gh pr view <number> --json number,title,state,mergedAt,body,files,comments,reviews,statusCheckRollup
|
||||
gh issue view <number> --json number,title,state,body,comments,closedAt
|
||||
```
|
||||
|
||||
## Freshness Rules
|
||||
|
||||
- Treat `gitcrawl` as stale if `doctor` shows no target thread, an old `last_sync_at`, missing embeddings for neighbor/search commands, or a clearly wrong open/closed state.
|
||||
- If stale data blocks the decision, refresh the portable store first:
|
||||
|
||||
```bash
|
||||
gitcrawl init --portable-store git@github.com:openclaw/gitcrawl-store.git --json
|
||||
```
|
||||
|
||||
- Run expensive update commands such as `gitcrawl sync --include-comments` only when the user asked to update the local store or stale data is blocking the decision.
|
||||
- The sync default is all GitHub thread states; pass `--state open`, `--state closed`, or `--state all` only when a task requires a narrower or explicit scope.
|
||||
|
||||
## Boundaries
|
||||
|
||||
- Use `gitcrawl` for candidates, clusters, and historical context.
|
||||
- Use `gh`, `gh api`, and the current checkout for live state before commenting, labeling, closing, reopening, merging, or filing a PR review.
|
||||
- Do not close or label based only on `gitcrawl` similarity. Require matching problem intent plus live verification.
|
||||
- If `gitcrawl` is unavailable, say so and fall back to targeted `gh search` rather than blocking normal maintainer work.
|
||||
4
.agents/skills/gitcrawl/agents/openai.yaml
Normal file
4
.agents/skills/gitcrawl/agents/openai.yaml
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
interface:
|
||||
display_name: "Gitcrawl"
|
||||
short_description: "Search local OpenClaw issue and PR history before live GitHub triage"
|
||||
default_prompt: "Use $gitcrawl to inspect OpenClaw issue and PR history, find related threads and duplicate candidates, then verify actionable decisions with live GitHub."
|
||||
|
|
@ -9,7 +9,8 @@ Use this skill for maintainer-facing GitHub workflow, not for ordinary code chan
|
|||
|
||||
## Start issue and PR triage with gitcrawl
|
||||
|
||||
- Anytime you inspect OpenClaw issues or PRs, check local `gitcrawl` data first for related threads, duplicate attempts, and already-landed fixes.
|
||||
- Use `$gitcrawl` first anytime you inspect OpenClaw issues or PRs.
|
||||
- Check local `gitcrawl` data first for related threads, duplicate attempts, and already-landed fixes.
|
||||
- Use `gitcrawl` for candidate discovery and clustering; use `gh`, `gh api`, and the current checkout to verify live state before commenting, labeling, closing, or landing.
|
||||
- If `gitcrawl` is missing, stale, lacks the target thread, or has no embeddings for neighbor/search commands, fall back to the GitHub search workflow below.
|
||||
- Do not run expensive/update commands such as `gitcrawl sync --include-comments`, future enrichment commands, or broad reclustering unless the user asked to update the local store or stale data is blocking the decision.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
name: tag-duplicate-prs-issues
|
||||
description: Search duplicate OpenClaw PRs/issues, group related work in prtags, and sync duplicate state to GitHub.
|
||||
description: Use gitcrawl to search duplicate OpenClaw PRs/issues, group related work in prtags, and sync duplicate state to GitHub.
|
||||
---
|
||||
|
||||
# Tag Duplicate PRs and Issues
|
||||
|
|
@ -12,43 +12,25 @@ It is not for reviewing the implementation quality of a PR.
|
|||
|
||||
## Required Setup
|
||||
|
||||
Do not start duplicate triage until this setup is complete.
|
||||
Do not write duplicate groups or annotations until this setup is complete.
|
||||
Read-only discovery can still proceed with `gitcrawl` and live `gh`.
|
||||
|
||||
### Install the companion skills
|
||||
### Companion Skills
|
||||
|
||||
Install these skills first because they teach the agent how to use the two main CLIs correctly:
|
||||
|
||||
- `ghreplica` skill from the `ghreplica` repo at `skills/ghreplica/SKILL.md`
|
||||
- `prtags` skill from the `prtags` repo at `skills/prtags/SKILL.md`
|
||||
|
||||
This skill assumes those two skills are available and can be used during the same run.
|
||||
Use `$gitcrawl` first for local candidate discovery.
|
||||
Use the `prtags` skill from the `prtags` repo at `skills/prtags/SKILL.md` when it is available.
|
||||
|
||||
### Install the CLIs
|
||||
|
||||
Install `ghreplica` and `prtags` from their latest GitHub releases.
|
||||
Install `prtags` from its latest GitHub release.
|
||||
Do not rely on an old local build unless the maintainer explicitly wants to test unreleased behavior.
|
||||
|
||||
`ghreplica` CLI install path:
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/dutifuldev/ghreplica/main/scripts/install-ghr.sh | bash -s -- --bin-dir "$HOME/.local/bin"
|
||||
```
|
||||
|
||||
`prtags` CLI install path:
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/dutifuldev/prtags/main/scripts/install-prtags.sh | bash -s -- --bin-dir "$HOME/.local/bin"
|
||||
```
|
||||
|
||||
Use the `pr-search-cli` project with `uvx`.
|
||||
The command itself is `pr-search`.
|
||||
Do not require a permanent install unless the maintainer explicitly wants one.
|
||||
|
||||
```bash
|
||||
uvx --from pr-search-cli pr-search status
|
||||
uvx --from pr-search-cli pr-search code similar 67144
|
||||
```
|
||||
|
||||
### Authenticate prtags
|
||||
|
||||
`prtags` should be logged in with the maintainer's own GitHub account through OAuth device flow.
|
||||
|
|
@ -66,20 +48,15 @@ The expected outcome is that `prtags` stores the logged-in maintainer identity l
|
|||
Do not require an up-front preflight before starting the workflow.
|
||||
Proceed with the normal steps until you actually need a tool or account state.
|
||||
|
||||
As soon as you discover that a required CLI is missing or `prtags` is not logged in, stop immediately.
|
||||
Do not continue in a partial mode after that point.
|
||||
As soon as you discover that `prtags` is missing or not logged in at the write step, stop immediately.
|
||||
Do not continue in a partial write mode after that point.
|
||||
|
||||
If `ghr` is missing, ask the user to run the `ghreplica` install command.
|
||||
|
||||
If `prtags` is missing, ask the user to run both CLI install commands:
|
||||
If `prtags` is missing, ask the user to run:
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/dutifuldev/ghreplica/main/scripts/install-ghr.sh | bash -s -- --bin-dir "$HOME/.local/bin"
|
||||
curl -fsSL https://raw.githubusercontent.com/dutifuldev/prtags/main/scripts/install-prtags.sh | bash -s -- --bin-dir "$HOME/.local/bin"
|
||||
```
|
||||
|
||||
If `uvx --from pr-search-cli pr-search ...` fails because `uvx` or the `pr-search` launcher is not available, ask the user to make that command work before continuing.
|
||||
|
||||
If `prtags auth status` shows that the user is not logged in, ask the user to run:
|
||||
|
||||
```bash
|
||||
|
|
@ -90,19 +67,19 @@ Resume only after the missing tool or login state has been fixed.
|
|||
|
||||
## Read-Path Default
|
||||
|
||||
For read-only GitHub operations in this workflow, use `ghr` as the default CLI.
|
||||
Treat it as a drop-in replacement for the `gh` read operations you would normally use for PRs, issues, comments, reviews, and duplicate-search evidence.
|
||||
For candidate discovery in this workflow, use `gitcrawl` first.
|
||||
Treat it as the local history and clustering layer for related issues, duplicate attempts, and closed threads.
|
||||
|
||||
Only fall back to `gh` when `ghr` is failing for a concrete reason, such as:
|
||||
Use live `gh` or `gh api` for the target thread and for any candidate before making an actionable judgment.
|
||||
Use live GitHub when `gitcrawl` is missing or stale for a concrete reason, such as:
|
||||
|
||||
- the mirrored object is not present yet
|
||||
- the mirror data is clearly stale or incomplete for the decision you need to make
|
||||
- the `ghr` command errors, times out, or does not expose the specific read you need
|
||||
- the target or candidate is not present yet
|
||||
- the local data is clearly stale or incomplete for the decision you need to make
|
||||
- `gitcrawl` errors, times out, or lacks the needed neighbor/search data
|
||||
|
||||
When you fall back to `gh`, note that you did so and why.
|
||||
When you fall back to live GitHub search, note that you did so and why.
|
||||
|
||||
If `ghr` is missing a fresh PR or issue but `gh` can read it, you may use `gh` for the read-side judgment.
|
||||
If a later `prtags` target-level write fails because the same object is still missing from `ghreplica`, stop and report that the mirror has not caught up yet instead of forcing the write.
|
||||
If a later `prtags` target-level write fails because its own mirror has not caught up, stop and report that the curation backend is missing the target object instead of forcing a fallback write.
|
||||
|
||||
## Goal
|
||||
|
||||
|
|
@ -118,14 +95,12 @@ For each target PR or issue:
|
|||
|
||||
Use the tools with these boundaries:
|
||||
|
||||
- `ghreplica` is the raw evidence source
|
||||
- use `ghr` first for normal GitHub read operations in this workflow
|
||||
- use it for title/body/comment search, related PRs, overlapping files, overlapping ranges, and current PR or issue status
|
||||
- resort to `gh` only when `ghr` cannot provide the needed read cleanly
|
||||
- `pr-search-cli` is candidate generation and ranking
|
||||
- use it to suggest likely duplicate PRs or issue-cluster context
|
||||
- do not treat it as final truth
|
||||
- do not create or expand a duplicate group only because `pr-search-cli` put multiple PRs in the same issue or duplicate cluster
|
||||
- `gitcrawl` is candidate generation and historical context
|
||||
- use it first for local title/body search, neighbors, clusters, and closed-thread discovery
|
||||
- treat every candidate as a lead until live GitHub confirms it
|
||||
- `gh` is live GitHub truth
|
||||
- use it for target state, body, comments, reviews, files, linked issues, and current open/closed/merged status
|
||||
- use `gh search` only when `gitcrawl` is stale, missing data, or cannot express the needed query
|
||||
- `prtags` is the maintainer curation layer
|
||||
- use it to create or reuse one duplicate group
|
||||
- use it to save the duplicate status, confidence, rationale, and group summary
|
||||
|
|
@ -182,7 +157,7 @@ Examples:
|
|||
## Evidence Checklist
|
||||
|
||||
Before declaring a duplicate, gather evidence from at least two categories.
|
||||
Same-issue or same-cluster output from `pr-search-cli` counts only as candidate generation, not as one of the required proof categories by itself.
|
||||
`gitcrawl` neighbors, search hits, and cluster membership count as candidate generation, not as enough proof by themselves.
|
||||
|
||||
For PRs:
|
||||
|
||||
|
|
@ -205,21 +180,18 @@ If you only have wording similarity, that is not enough.
|
|||
## Step 1: Read The Target
|
||||
|
||||
Start by reading the target itself.
|
||||
Use `ghr` first for this step even if you would normally reach for `gh`.
|
||||
Use live GitHub for current target state.
|
||||
|
||||
For a PR:
|
||||
|
||||
```bash
|
||||
ghr pr view -R openclaw/openclaw <number> --comments
|
||||
ghr pr reviews -R openclaw/openclaw <number>
|
||||
ghr pr comments -R openclaw/openclaw <number>
|
||||
gh pr view <number> --json number,title,state,mergedAt,body,closingIssuesReferences,files,comments,reviews,statusCheckRollup
|
||||
```
|
||||
|
||||
For an issue:
|
||||
|
||||
```bash
|
||||
ghr issue view -R openclaw/openclaw <number> --comments
|
||||
ghr issue comments -R openclaw/openclaw <number>
|
||||
gh issue view <number> --json number,title,state,body,comments,closedAt
|
||||
```
|
||||
|
||||
Record:
|
||||
|
|
@ -232,74 +204,56 @@ Record:
|
|||
- whether it is open, closed, or merged
|
||||
- whether there is already a likely duplicate thread mentioned by humans
|
||||
|
||||
## Step 2: Search Broadly With ghreplica
|
||||
## Step 2: Search Broadly With Gitcrawl
|
||||
|
||||
Use `ghreplica` first because it is the most direct evidence source.
|
||||
Do not switch to `gh` for ordinary reads unless `ghr` is missing data or failing.
|
||||
Use `gitcrawl` first because it is the local OpenClaw history and clustering source.
|
||||
Do not switch to broad live GitHub search unless `gitcrawl` is missing data, stale, or failing.
|
||||
|
||||
### PR duplicate search
|
||||
|
||||
Run all of these when the target is a PR:
|
||||
Start with the target and nearby threads:
|
||||
|
||||
```bash
|
||||
ghr search related-prs -R openclaw/openclaw <pr-number> --mode path_overlap --state all
|
||||
ghr search related-prs -R openclaw/openclaw <pr-number> --mode range_overlap --state all
|
||||
ghr search mentions -R openclaw/openclaw --query "<key phrase from title or body>" --mode fts --scope pull_requests --state all
|
||||
ghr search mentions -R openclaw/openclaw --query "<subsystem or error phrase>" --mode fts --scope issues --state all
|
||||
gitcrawl threads openclaw/openclaw --numbers <issue-or-pr-number> --include-closed --json
|
||||
gitcrawl neighbors openclaw/openclaw --number <issue-or-pr-number> --limit 20 --json
|
||||
```
|
||||
|
||||
Use `prs-by-paths` or `prs-by-ranges` when the likely duplicate surface is already known:
|
||||
Then search key phrases and subsystem terms:
|
||||
|
||||
```bash
|
||||
ghr search prs-by-paths -R openclaw/openclaw --path src/example.ts --state all
|
||||
ghr search prs-by-ranges -R openclaw/openclaw --path src/example.ts --start 20 --end 80 --state all
|
||||
gitcrawl search openclaw/openclaw --query "<key phrase from title or body>" --mode hybrid --limit 20 --json
|
||||
gitcrawl search openclaw/openclaw --query "<subsystem or error phrase>" --mode hybrid --limit 20 --json
|
||||
```
|
||||
|
||||
### Issue duplicate search
|
||||
|
||||
`ghreplica` does not have a special issue-to-issue “related issues” command.
|
||||
For issues, search mirrored text and linked PR context instead.
|
||||
|
||||
Run targeted text searches:
|
||||
Inspect likely clusters:
|
||||
|
||||
```bash
|
||||
ghr search mentions -R openclaw/openclaw --query "<issue title phrase>" --mode fts --scope issues --state all
|
||||
ghr search mentions -R openclaw/openclaw --query "<error message or symptom>" --mode fts --scope issues --state all
|
||||
ghr search mentions -R openclaw/openclaw --query "<subsystem phrase>" --mode fts --scope pull_requests --state all
|
||||
gitcrawl cluster-detail openclaw/openclaw --id <cluster-id> --member-limit 20 --body-chars 280 --json
|
||||
```
|
||||
|
||||
Then inspect the candidate PRs or issues those searches uncover.
|
||||
|
||||
## Step 3: Use pr-search-cli As A Hint Layer
|
||||
|
||||
Use `pr-search-cli` after `ghreplica`.
|
||||
It is good at surfacing candidates quickly, but it is not the final decision-maker.
|
||||
Run it through the `pr-search` command.
|
||||
|
||||
For a PR:
|
||||
For PRs, verify likely code overlap with live file data:
|
||||
|
||||
```bash
|
||||
uvx --from pr-search-cli pr-search -R openclaw/openclaw code similar <pr-number>
|
||||
uvx --from pr-search-cli pr-search -R openclaw/openclaw code clusters for-pr <pr-number>
|
||||
uvx --from pr-search-cli pr-search -R openclaw/openclaw issues for-pr <pr-number>
|
||||
uvx --from pr-search-cli pr-search -R openclaw/openclaw issues duplicate-prs
|
||||
gh pr view <candidate-pr> --json number,title,state,mergedAt,files,body,comments,reviews
|
||||
```
|
||||
|
||||
Interpretation:
|
||||
For issues, verify likely duplicate issue state and comments live:
|
||||
|
||||
- `code similar` suggests PRs with similar change shape
|
||||
- `code clusters for-pr` shows the PR’s nearby code cluster
|
||||
- `issues for-pr` shows which issue clusters the PR appears to belong to
|
||||
- `issues duplicate-prs` is useful for spotting already-known duplicate PR patterns
|
||||
```bash
|
||||
gh issue view <candidate-issue> --json number,title,state,body,comments,closedAt
|
||||
```
|
||||
|
||||
Treat every `pr-search-cli` result as a hint to investigate, not as enough evidence to create or widen a duplicate group.
|
||||
Multiple PRs can share the same issue or issue cluster while still taking meaningfully different fix paths.
|
||||
## Step 3: Use Live GitHub Search For Gaps
|
||||
|
||||
For an issue:
|
||||
Use targeted live GitHub search after `gitcrawl` when:
|
||||
|
||||
- use `ghreplica` first to find candidate PRs or issue wording
|
||||
- if the issue has linked PRs or a likely implementation PR, run `pr-search-cli` on those PRs
|
||||
- treat issue-cluster output as supporting context, not as enough by itself to call the issue a duplicate
|
||||
- the target is too new for the local store
|
||||
- comments or reviews matter and the local store lacks them
|
||||
- the exact phrase did not appear in local results but the issue/PR is current enough that GitHub should know it
|
||||
|
||||
```bash
|
||||
gh search prs --repo openclaw/openclaw --match title,body --limit 50 -- "<key phrase>"
|
||||
gh search issues --repo openclaw/openclaw --match title,body --limit 50 -- "<key phrase>"
|
||||
gh search issues --repo openclaw/openclaw --match comments --limit 50 -- "<error or maintainer phrase>"
|
||||
```
|
||||
|
||||
## Step 4: Decide The Outcome
|
||||
|
||||
|
|
@ -344,7 +298,7 @@ Reuse an existing group when:
|
|||
- it already contains clearly related members
|
||||
- adding the target would keep the group coherent
|
||||
|
||||
Do not widen an existing group just because `pr-search-cli` placed several PRs under the same issue or duplicate cluster.
|
||||
Do not widen an existing group just because `gitcrawl` placed several PRs or issues near each other.
|
||||
Confirm that the actual implementation path and maintainer intent still match before adding the new member.
|
||||
|
||||
Create a new group only when no existing group clearly fits.
|
||||
|
|
@ -423,8 +377,8 @@ prtags annotation group set <group-id> \
|
|||
|
||||
When the evidence is incomplete, set `duplicate_status=candidate` and lower the confidence.
|
||||
|
||||
If a per-PR or per-issue annotation write fails because `prtags` cannot resolve the target through `ghreplica`, do not force a fallback write path.
|
||||
Keep the group state you were able to write, report that the mirror is still missing the target object, and defer the target-level annotation until `ghreplica` catches up.
|
||||
If a per-PR or per-issue annotation write fails because `prtags` cannot resolve the target, do not force a fallback write path.
|
||||
Keep the group state you were able to write, report that the curation backend is still missing the target object, and defer the target-level annotation until `prtags` catches up.
|
||||
|
||||
## Step 8: Let prtags Sync The Group Comment
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
interface:
|
||||
display_name: "Tag Duplicate PRs and Issues"
|
||||
short_description: "Find duplicate PRs and issues, group them in prtags, and let prtags sync the GitHub comment"
|
||||
default_prompt: "Use $tag-duplicate-prs-issues to decide whether an OpenClaw PR or issue is a duplicate, gather evidence with ghreplica and pr-search-cli, group related items in prtags, and save the duplicate judgment."
|
||||
short_description: "Find duplicate PRs and issues with gitcrawl, group them in prtags, and let prtags sync the GitHub comment"
|
||||
default_prompt: "Use $tag-duplicate-prs-issues to decide whether an OpenClaw PR or issue is a duplicate, gather candidates with gitcrawl, verify live state with GitHub, group related items in prtags, and save the duplicate judgment."
|
||||
|
|
|
|||
5
.github/labeler.yml
vendored
5
.github/labeler.yml
vendored
|
|
@ -314,6 +314,11 @@
|
|||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "extensions/deepseek/**"
|
||||
"extensions: deepinfra":
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "extensions/deepinfra/**"
|
||||
- "docs/providers/deepinfra.md"
|
||||
"extensions: tencent":
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
|
|
|
|||
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -102,6 +102,8 @@ USER.md
|
|||
.agents/skills/*
|
||||
!.agents/skills/blacksmith-testbox/
|
||||
!.agents/skills/blacksmith-testbox/**
|
||||
!.agents/skills/gitcrawl/
|
||||
!.agents/skills/gitcrawl/**
|
||||
!.agents/skills/openclaw-ghsa-maintainer/
|
||||
!.agents/skills/openclaw-ghsa-maintainer/**
|
||||
!.agents/skills/openclaw-parallels-smoke/
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ Docs: https://docs.openclaw.ai
|
|||
- Plugin SDK/testing: add a focused `plugin-sdk/plugin-test-api` helper subpath and move bundled plugin registration tests off the repo-only plugin API bridge. Thanks @vincentkoc.
|
||||
- Plugin SDK: add generic host hooks for session state, next-turn context, trusted tool policy, UI descriptors, events, scheduler cleanup, and run-scoped plugin context. (#72287) Thanks @100yenadmin.
|
||||
- Plugin SDK/testing: expose provider catalog, wizard, registry, manifest, public-artifact, outbound, and TTS contract helpers through documented SDK testing seams so bundled plugin tests no longer import repo `src/**` internals. Thanks @vincentkoc.
|
||||
- Providers/DeepInfra: add a bundled DeepInfra provider with `DEEPINFRA_API_KEY` onboarding, dynamic OpenAI-compatible model discovery, image generation/editing, image/audio media understanding, TTS, text-to-video, memory embeddings, static catalog metadata, and provider-owned base URL policy. Carries forward #53805, #48088, #37576, #43896, #11533, and #2554. Thanks @ats3v.
|
||||
- Matrix: attach versioned structured approval metadata to pending approval messages so capable Matrix clients can render richer approval UI while body text and reaction fallback keep working. (#72432) Thanks @kakahu2015.
|
||||
|
||||
### Fixes
|
||||
|
|
|
|||
|
|
@ -95,6 +95,10 @@
|
|||
"source": "Chutes",
|
||||
"target": "Chutes"
|
||||
},
|
||||
{
|
||||
"source": "DeepInfra",
|
||||
"target": "DeepInfra"
|
||||
},
|
||||
{
|
||||
"source": "Qwen",
|
||||
"target": "Qwen"
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ a per-agent SQLite database and needs no extra dependencies to get started.
|
|||
|
||||
## Getting started
|
||||
|
||||
If you have an API key for OpenAI, Gemini, Voyage, or Mistral, the builtin
|
||||
If you have an API key for OpenAI, Gemini, Voyage, Mistral, or DeepInfra, the builtin
|
||||
engine auto-detects it and enables vector search. No config needed.
|
||||
|
||||
To set a provider explicitly:
|
||||
|
|
@ -60,14 +60,15 @@ at a GGUF file:
|
|||
|
||||
## Supported embedding providers
|
||||
|
||||
| Provider | ID | Auto-detected | Notes |
|
||||
| -------- | --------- | ------------- | ----------------------------------- |
|
||||
| OpenAI | `openai` | Yes | Default: `text-embedding-3-small` |
|
||||
| Gemini | `gemini` | Yes | Supports multimodal (image + audio) |
|
||||
| Voyage | `voyage` | Yes | |
|
||||
| Mistral | `mistral` | Yes | |
|
||||
| Ollama | `ollama` | No | Local, set explicitly |
|
||||
| Local | `local` | Yes (first) | Optional `node-llama-cpp` runtime |
|
||||
| Provider | ID | Auto-detected | Notes |
|
||||
| --------- | ----------- | ------------- | ----------------------------------- |
|
||||
| OpenAI | `openai` | Yes | Default: `text-embedding-3-small` |
|
||||
| Gemini | `gemini` | Yes | Supports multimodal (image + audio) |
|
||||
| Voyage | `voyage` | Yes | |
|
||||
| Mistral | `mistral` | Yes | |
|
||||
| DeepInfra | `deepinfra` | Yes | Default: `BAAI/bge-m3` |
|
||||
| Ollama | `ollama` | No | Local, set explicitly |
|
||||
| Local | `local` | Yes (first) | Optional `node-llama-cpp` runtime |
|
||||
|
||||
Auto-detection picks the first provider whose API key can be resolved, in the
|
||||
order shown. Set `memorySearch.provider` to override.
|
||||
|
|
|
|||
|
|
@ -280,6 +280,7 @@ See [/providers/kilocode](/providers/kilocode) for setup details.
|
|||
| BytePlus | `byteplus` / `byteplus-plan` | `BYTEPLUS_API_KEY` | `byteplus-plan/ark-code-latest` |
|
||||
| Cerebras | `cerebras` | `CEREBRAS_API_KEY` | `cerebras/zai-glm-4.7` |
|
||||
| Cloudflare AI Gateway | `cloudflare-ai-gateway` | `CLOUDFLARE_AI_GATEWAY_API_KEY` | — |
|
||||
| DeepInfra | `deepinfra` | `DEEPINFRA_API_KEY` | `deepinfra/deepseek-ai/DeepSeek-V3.2` |
|
||||
| DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` | `deepseek/deepseek-v4-flash` |
|
||||
| GitHub Copilot | `github-copilot` | `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` / `GITHUB_TOKEN` | — |
|
||||
| Groq | `groq` | `GROQ_API_KEY` | — |
|
||||
|
|
|
|||
|
|
@ -1331,6 +1331,7 @@
|
|||
"providers/cloudflare-ai-gateway",
|
||||
"providers/comfy",
|
||||
"providers/deepgram",
|
||||
"providers/deepinfra",
|
||||
"providers/deepseek",
|
||||
"providers/elevenlabs",
|
||||
"providers/fal",
|
||||
|
|
|
|||
|
|
@ -468,6 +468,7 @@ If you want to rely on env keys (e.g. exported in your `~/.profile`), run local
|
|||
- `<provider>:generate`
|
||||
- `<provider>:edit` when the provider declares edit support
|
||||
- Current bundled providers covered:
|
||||
- `deepinfra`
|
||||
- `fal`
|
||||
- `google`
|
||||
- `minimax`
|
||||
|
|
@ -477,6 +478,7 @@ If you want to rely on env keys (e.g. exported in your `~/.profile`), run local
|
|||
- `xai`
|
||||
- Optional narrowing:
|
||||
- `OPENCLAW_LIVE_IMAGE_GENERATION_PROVIDERS="openai,google,openrouter,xai"`
|
||||
- `OPENCLAW_LIVE_IMAGE_GENERATION_PROVIDERS="deepinfra"`
|
||||
- `OPENCLAW_LIVE_IMAGE_GENERATION_MODELS="openai/gpt-image-2,google/gemini-3.1-flash-image-preview,openrouter/google/gemini-3.1-flash-image-preview,xai/grok-imagine-image"`
|
||||
- `OPENCLAW_LIVE_IMAGE_GENERATION_CASES="google:flash-generate,google:pro-edit,openrouter:generate,xai:default-generate,xai:default-edit"`
|
||||
- Optional auth behavior:
|
||||
|
|
@ -551,7 +553,7 @@ image-generation runtime, and the live provider request.
|
|||
- `google` because the current shared Gemini/Veo lane uses local buffer-backed input and that path is not accepted in the shared sweep
|
||||
- `openai` because the current shared lane lacks org-specific video inpaint/remix access guarantees
|
||||
- Optional narrowing:
|
||||
- `OPENCLAW_LIVE_VIDEO_GENERATION_PROVIDERS="google,openai,runway"`
|
||||
- `OPENCLAW_LIVE_VIDEO_GENERATION_PROVIDERS="deepinfra,google,openai,runway"`
|
||||
- `OPENCLAW_LIVE_VIDEO_GENERATION_MODELS="google/veo-3.1-fast-generate-preview,openai/sora-2,runway/gen4_aleph"`
|
||||
- `OPENCLAW_LIVE_VIDEO_GENERATION_SKIP_PROVIDERS=""` to include every provider in the default sweep, including FAL
|
||||
- `OPENCLAW_LIVE_VIDEO_GENERATION_TIMEOUT_MS=60000` to reduce each provider operation cap for an aggressive smoke run
|
||||
|
|
|
|||
83
docs/providers/deepinfra.md
Normal file
83
docs/providers/deepinfra.md
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
---
|
||||
summary: "Use DeepInfra's unified API to access the most popular open source and frontier models in OpenClaw"
|
||||
read_when:
|
||||
- You want a single API key for the top open source LLMs
|
||||
- You want to run models via DeepInfra's API in OpenClaw
|
||||
---
|
||||
|
||||
# DeepInfra
|
||||
|
||||
DeepInfra provides a **unified API** that routes requests to the most popular open source and frontier models behind a single
|
||||
endpoint and API key. It is OpenAI-compatible, so most OpenAI SDKs work by switching the base URL.
|
||||
|
||||
## Getting an API key
|
||||
|
||||
1. Go to [https://deepinfra.com/](https://deepinfra.com/)
|
||||
2. Sign in or create an account
|
||||
3. Navigate to Dashboard / Keys and generate a new API key or use the auto created one
|
||||
|
||||
## CLI setup
|
||||
|
||||
```bash
|
||||
openclaw onboard --deepinfra-api-key <key>
|
||||
```
|
||||
|
||||
Or set the environment variable:
|
||||
|
||||
```bash
|
||||
export DEEPINFRA_API_KEY="<your-deepinfra-api-key>" # pragma: allowlist secret
|
||||
```
|
||||
|
||||
## Config snippet
|
||||
|
||||
```json5
|
||||
{
|
||||
env: { DEEPINFRA_API_KEY: "<your-deepinfra-api-key>" }, // pragma: allowlist secret
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "deepinfra/deepseek-ai/DeepSeek-V3.2" },
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
## Supported OpenClaw surfaces
|
||||
|
||||
The bundled plugin registers all DeepInfra surfaces that match current
|
||||
OpenClaw provider contracts:
|
||||
|
||||
| Surface | Default model | OpenClaw config/tool |
|
||||
| ------------------------ | ---------------------------------- | -------------------------------------------------------- |
|
||||
| Chat / model provider | `deepseek-ai/DeepSeek-V3.2` | `agents.defaults.model` |
|
||||
| Image generation/editing | `black-forest-labs/FLUX-1-schnell` | `image_generate`, `agents.defaults.imageGenerationModel` |
|
||||
| Media understanding | `moonshotai/Kimi-K2.5` for images | inbound image understanding |
|
||||
| Speech-to-text | `openai/whisper-large-v3-turbo` | inbound audio transcription |
|
||||
| Text-to-speech | `hexgrad/Kokoro-82M` | `messages.tts.provider: "deepinfra"` |
|
||||
| Video generation | `Pixverse/Pixverse-T2V` | `video_generate`, `agents.defaults.videoGenerationModel` |
|
||||
| Memory embeddings | `BAAI/bge-m3` | `agents.defaults.memorySearch.provider: "deepinfra"` |
|
||||
|
||||
DeepInfra also exposes reranking, classification, object-detection, and other
|
||||
native model types. OpenClaw does not currently have first-class provider
|
||||
contracts for those categories, so this plugin does not register them yet.
|
||||
|
||||
## Available models
|
||||
|
||||
OpenClaw dynamically discovers available DeepInfra models at startup. Use
|
||||
`/models deepinfra` to see the full list of models available.
|
||||
|
||||
Any model available on [DeepInfra.com](https://deepinfra.com/) can be used with the `deepinfra/` prefix:
|
||||
|
||||
```
|
||||
deepinfra/MiniMaxAI/MiniMax-M2.5
|
||||
deepinfra/deepseek-ai/DeepSeek-V3.2
|
||||
deepinfra/moonshotai/Kimi-K2.5
|
||||
deepinfra/zai-org/GLM-5.1
|
||||
...and many more
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- Model refs are `deepinfra/<provider>/<model>` (e.g., `deepinfra/Qwen/Qwen3-Max`).
|
||||
- Default model: `deepinfra/deepseek-ai/DeepSeek-V3.2`
|
||||
- Base URL: `https://api.deepinfra.com/v1/openai`
|
||||
- Native video generation uses `https://api.deepinfra.com/v1/inference/<model>`.
|
||||
|
|
@ -31,6 +31,7 @@ model as `provider/model`.
|
|||
- [Chutes](/providers/chutes)
|
||||
- [ComfyUI](/providers/comfy)
|
||||
- [Cloudflare AI Gateway](/providers/cloudflare-ai-gateway)
|
||||
- [DeepInfra](/providers/deepinfra)
|
||||
- [fal](/providers/fal)
|
||||
- [Fireworks](/providers/fireworks)
|
||||
- [GLM models](/providers/glm)
|
||||
|
|
|
|||
|
|
@ -84,8 +84,8 @@ See [Models](/providers/models) for pricing config and [Token use & costs](/refe
|
|||
|
||||
Inbound media can be summarized/transcribed before the reply runs. This uses model/provider APIs.
|
||||
|
||||
- Audio: OpenAI / Groq / Deepgram / Google / Mistral.
|
||||
- Image: OpenAI / OpenRouter / Anthropic / Google / MiniMax / Moonshot / Qwen / Z.AI.
|
||||
- Audio: OpenAI / Groq / Deepgram / DeepInfra / Google / Mistral.
|
||||
- Image: OpenAI / OpenRouter / Anthropic / DeepInfra / Google / MiniMax / Moonshot / Qwen / Z.AI.
|
||||
- Video: Google / Qwen / Moonshot.
|
||||
|
||||
See [Media understanding](/nodes/media-understanding).
|
||||
|
|
@ -94,8 +94,8 @@ See [Media understanding](/nodes/media-understanding).
|
|||
|
||||
Shared generation capabilities can also spend provider keys:
|
||||
|
||||
- Image generation: OpenAI / Google / fal / MiniMax
|
||||
- Video generation: Qwen
|
||||
- Image generation: OpenAI / Google / DeepInfra / fal / MiniMax
|
||||
- Video generation: DeepInfra / Qwen
|
||||
|
||||
Image generation can infer an auth-backed provider default when
|
||||
`agents.defaults.imageGenerationModel` is unset. Video generation currently
|
||||
|
|
@ -113,6 +113,7 @@ Semantic memory search uses **embedding APIs** when configured for remote provid
|
|||
- `memorySearch.provider = "gemini"` → Gemini embeddings
|
||||
- `memorySearch.provider = "voyage"` → Voyage embeddings
|
||||
- `memorySearch.provider = "mistral"` → Mistral embeddings
|
||||
- `memorySearch.provider = "deepinfra"` → DeepInfra embeddings
|
||||
- `memorySearch.provider = "lmstudio"` → LM Studio embeddings (local/self-hosted)
|
||||
- `memorySearch.provider = "ollama"` → Ollama embeddings (local/self-hosted; typically no hosted API billing)
|
||||
- Optional fallback to a remote provider if local embeddings fail
|
||||
|
|
|
|||
|
|
@ -46,12 +46,12 @@ See [Active Memory](/concepts/active-memory) for the activation model, plugin-ow
|
|||
|
||||
## Provider selection
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
| ---------- | --------- | ---------------- | ------------------------------------------------------------------------------------------------------------- |
|
||||
| `provider` | `string` | auto-detected | Embedding adapter ID: `bedrock`, `gemini`, `github-copilot`, `local`, `mistral`, `ollama`, `openai`, `voyage` |
|
||||
| `model` | `string` | provider default | Embedding model name |
|
||||
| `fallback` | `string` | `"none"` | Fallback adapter ID when the primary fails |
|
||||
| `enabled` | `boolean` | `true` | Enable or disable memory search |
|
||||
| Key | Type | Default | Description |
|
||||
| ---------- | --------- | ---------------- | -------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `provider` | `string` | auto-detected | Embedding adapter ID: `bedrock`, `deepinfra`, `gemini`, `github-copilot`, `local`, `mistral`, `ollama`, `openai`, `voyage` |
|
||||
| `model` | `string` | provider default | Embedding model name |
|
||||
| `fallback` | `string` | `"none"` | Fallback adapter ID when the primary fails |
|
||||
| `enabled` | `boolean` | `true` | Enable or disable memory search |
|
||||
|
||||
### Auto-detection order
|
||||
|
||||
|
|
@ -76,6 +76,9 @@ When `provider` is not set, OpenClaw selects the first available:
|
|||
<Step title="mistral">
|
||||
Selected if a Mistral key can be resolved.
|
||||
</Step>
|
||||
<Step title="deepinfra">
|
||||
Selected if a DeepInfra key can be resolved.
|
||||
</Step>
|
||||
<Step title="bedrock">
|
||||
Selected if the AWS SDK credential chain resolves (instance role, access keys, profile, SSO, web identity, or shared config).
|
||||
</Step>
|
||||
|
|
@ -87,15 +90,16 @@ When `provider` is not set, OpenClaw selects the first available:
|
|||
|
||||
Remote embeddings require an API key. Bedrock uses the AWS SDK default credential chain instead (instance roles, SSO, access keys).
|
||||
|
||||
| Provider | Env var | Config key |
|
||||
| -------------- | -------------------------------------------------- | --------------------------------- |
|
||||
| Bedrock | AWS credential chain | No API key needed |
|
||||
| Gemini | `GEMINI_API_KEY` | `models.providers.google.apiKey` |
|
||||
| GitHub Copilot | `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, `GITHUB_TOKEN` | Auth profile via device login |
|
||||
| Mistral | `MISTRAL_API_KEY` | `models.providers.mistral.apiKey` |
|
||||
| Ollama | `OLLAMA_API_KEY` (placeholder) | -- |
|
||||
| OpenAI | `OPENAI_API_KEY` | `models.providers.openai.apiKey` |
|
||||
| Voyage | `VOYAGE_API_KEY` | `models.providers.voyage.apiKey` |
|
||||
| Provider | Env var | Config key |
|
||||
| -------------- | -------------------------------------------------- | ----------------------------------- |
|
||||
| Bedrock | AWS credential chain | No API key needed |
|
||||
| DeepInfra | `DEEPINFRA_API_KEY` | `models.providers.deepinfra.apiKey` |
|
||||
| Gemini | `GEMINI_API_KEY` | `models.providers.google.apiKey` |
|
||||
| GitHub Copilot | `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, `GITHUB_TOKEN` | Auth profile via device login |
|
||||
| Mistral | `MISTRAL_API_KEY` | `models.providers.mistral.apiKey` |
|
||||
| Ollama | `OLLAMA_API_KEY` (placeholder) | -- |
|
||||
| OpenAI | `OPENAI_API_KEY` | `models.providers.openai.apiKey` |
|
||||
| Voyage | `VOYAGE_API_KEY` | `models.providers.voyage.apiKey` |
|
||||
|
||||
<Note>
|
||||
Codex OAuth covers chat/completions only and does not satisfy embedding requests.
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
summary: "Generate and edit images via image_generate across OpenAI, Google, fal, MiniMax, ComfyUI, OpenRouter, LiteLLM, xAI, Vydra"
|
||||
summary: "Generate and edit images via image_generate across OpenAI, Google, fal, MiniMax, ComfyUI, DeepInfra, OpenRouter, LiteLLM, xAI, Vydra"
|
||||
read_when:
|
||||
- Generating or editing images via the agent
|
||||
- Configuring image-generation providers and models
|
||||
|
|
@ -71,6 +71,7 @@ internal image endpoints remain blocked by default.
|
|||
| OpenAI image generation with API billing | `openai/gpt-image-2` | `OPENAI_API_KEY` |
|
||||
| OpenAI image generation with Codex subscription auth | `openai/gpt-image-2` | OpenAI Codex OAuth |
|
||||
| OpenAI transparent-background PNG/WebP | `openai/gpt-image-1.5` | `OPENAI_API_KEY` or OpenAI Codex OAuth |
|
||||
| DeepInfra image generation | `deepinfra/black-forest-labs/FLUX-1-schnell` | `DEEPINFRA_API_KEY` |
|
||||
| OpenRouter image generation | `openrouter/google/gemini-3.1-flash-image-preview` | `OPENROUTER_API_KEY` |
|
||||
| LiteLLM image generation | `litellm/gpt-image-2` | `LITELLM_API_KEY` |
|
||||
| Google Gemini image generation | `google/gemini-3.1-flash-image-preview` | `GEMINI_API_KEY` or `GOOGLE_API_KEY` |
|
||||
|
|
@ -88,6 +89,7 @@ backend emits it.
|
|||
| Provider | Default model | Edit support | Auth |
|
||||
| ---------- | --------------------------------------- | ---------------------------------- | ----------------------------------------------------- |
|
||||
| ComfyUI | `workflow` | Yes (1 image, workflow-configured) | `COMFY_API_KEY` or `COMFY_CLOUD_API_KEY` for cloud |
|
||||
| DeepInfra | `black-forest-labs/FLUX-1-schnell` | Yes (1 image) | `DEEPINFRA_API_KEY` |
|
||||
| fal | `fal-ai/flux/dev` | Yes | `FAL_KEY` |
|
||||
| Google | `gemini-3.1-flash-image-preview` | Yes | `GEMINI_API_KEY` or `GOOGLE_API_KEY` |
|
||||
| LiteLLM | `gpt-image-2` | Yes (up to 5 input images) | `LITELLM_API_KEY` |
|
||||
|
|
@ -105,13 +107,13 @@ Use `action: "list"` to inspect available providers and models at runtime:
|
|||
|
||||
## Provider capabilities
|
||||
|
||||
| Capability | ComfyUI | fal | Google | MiniMax | OpenAI | Vydra | xAI |
|
||||
| --------------------- | ------------------ | ----------------- | -------------- | --------------------- | -------------- | ----- | -------------- |
|
||||
| Generate (max count) | Workflow-defined | 4 | 4 | 9 | 4 | 1 | 4 |
|
||||
| Edit / reference | 1 image (workflow) | 1 image | Up to 5 images | 1 image (subject ref) | Up to 5 images | — | Up to 5 images |
|
||||
| Size control | — | ✓ | ✓ | — | Up to 4K | — | — |
|
||||
| Aspect ratio | — | ✓ (generate only) | ✓ | ✓ | — | — | ✓ |
|
||||
| Resolution (1K/2K/4K) | — | ✓ | ✓ | — | — | — | 1K, 2K |
|
||||
| Capability | ComfyUI | DeepInfra | fal | Google | MiniMax | OpenAI | Vydra | xAI |
|
||||
| --------------------- | ------------------ | --------- | ----------------- | -------------- | --------------------- | -------------- | ----- | -------------- |
|
||||
| Generate (max count) | Workflow-defined | 4 | 4 | 4 | 9 | 4 | 1 | 4 |
|
||||
| Edit / reference | 1 image (workflow) | 1 image | 1 image | Up to 5 images | 1 image (subject ref) | Up to 5 images | — | Up to 5 images |
|
||||
| Size control | — | ✓ | ✓ | ✓ | — | Up to 4K | — | — |
|
||||
| Aspect ratio | — | — | ✓ (generate only) | ✓ | ✓ | — | — | ✓ |
|
||||
| Resolution (1K/2K/4K) | — | — | ✓ | ✓ | — | — | — | 1K, 2K |
|
||||
|
||||
## Tool parameters
|
||||
|
||||
|
|
@ -226,7 +228,7 @@ from each attempt.
|
|||
|
||||
### Image editing
|
||||
|
||||
OpenAI, OpenRouter, Google, fal, MiniMax, ComfyUI, and xAI support editing
|
||||
OpenAI, OpenRouter, Google, DeepInfra, fal, MiniMax, ComfyUI, and xAI support editing
|
||||
reference images. Pass a reference image path or URL:
|
||||
|
||||
```text
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ provider is configured.
|
|||
| Alibaba | | ✓ | | | | | |
|
||||
| BytePlus | | ✓ | | | | | |
|
||||
| ComfyUI | ✓ | ✓ | ✓ | | | | |
|
||||
| DeepInfra | ✓ | ✓ | | ✓ | ✓ | | ✓ |
|
||||
| Deepgram | | | | | ✓ | ✓ | |
|
||||
| ElevenLabs | | | | ✓ | ✓ | | |
|
||||
| fal | ✓ | ✓ | | | | | |
|
||||
|
|
@ -94,7 +95,7 @@ original channel.
|
|||
|
||||
## Speech-to-text and Voice Call
|
||||
|
||||
Deepgram, ElevenLabs, Mistral, OpenAI, SenseAudio, and xAI can all transcribe
|
||||
Deepgram, DeepInfra, ElevenLabs, Mistral, OpenAI, SenseAudio, and xAI can all transcribe
|
||||
inbound audio through the batch `tools.media.audio` path when configured.
|
||||
Channel plugins that preflight a voice note for mention gating or command
|
||||
parsing mark the transcribed attachment on the inbound context, so the shared
|
||||
|
|
@ -116,6 +117,13 @@ vendor without waiting for a completed recording.
|
|||
Image, video, batch TTS, batch STT, Voice Call streaming STT, backend
|
||||
realtime voice, and memory-embedding surfaces.
|
||||
</Accordion>
|
||||
<Accordion title="DeepInfra">
|
||||
Chat/model routing, image generation/editing, text-to-video, batch TTS,
|
||||
batch STT, image media understanding, and memory-embedding surfaces.
|
||||
DeepInfra-native rerank/classification/object-detection models are not
|
||||
registered until OpenClaw has dedicated provider contracts for those
|
||||
categories.
|
||||
</Accordion>
|
||||
<Accordion title="xAI">
|
||||
Image, video, search, code-execution, batch TTS, batch STT, and Voice
|
||||
Call streaming STT. xAI Realtime voice is an upstream capability but is
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ title: "Text-to-speech"
|
|||
sidebarTitle: "Text to speech (TTS)"
|
||||
---
|
||||
|
||||
OpenClaw can convert outbound replies into audio across **13 speech providers**
|
||||
OpenClaw can convert outbound replies into audio across **14 speech providers**
|
||||
and deliver native voice messages on Feishu, Matrix, Telegram, and WhatsApp,
|
||||
audio attachments everywhere else, and PCM/Ulaw streams for telephony and Talk.
|
||||
|
||||
|
|
@ -55,6 +55,7 @@ OpenClaw picks the first configured provider in registry auto-select order.
|
|||
| Provider | Auth | Notes |
|
||||
| ----------------- | ---------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------- |
|
||||
| **Azure Speech** | `AZURE_SPEECH_KEY` + `AZURE_SPEECH_REGION` (also `AZURE_SPEECH_API_KEY`, `SPEECH_KEY`, `SPEECH_REGION`) | Native Ogg/Opus voice-note output and telephony. |
|
||||
| **DeepInfra** | `DEEPINFRA_API_KEY` | OpenAI-compatible TTS. Defaults to `hexgrad/Kokoro-82M`. |
|
||||
| **ElevenLabs** | `ELEVENLABS_API_KEY` or `XI_API_KEY` | Voice cloning, multilingual, deterministic via `seed`. |
|
||||
| **Google Gemini** | `GEMINI_API_KEY` or `GOOGLE_API_KEY` | Gemini API TTS; persona-aware via `promptTemplate: "audio-profile-v1"`. |
|
||||
| **Gradium** | `GRADIUM_API_KEY` | Voice-note and telephony output. |
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ sidebarTitle: "Video generation"
|
|||
---
|
||||
|
||||
OpenClaw agents can generate videos from text prompts, reference images, or
|
||||
existing videos. Fourteen provider backends are supported, each with
|
||||
existing videos. Fifteen provider backends are supported, each with
|
||||
different model options, input modes, and feature sets. The agent picks the
|
||||
right provider automatically based on your configuration and available API
|
||||
keys.
|
||||
|
|
@ -111,6 +111,7 @@ generation.
|
|||
| BytePlus Seedance 1.5 | `seedance-1-5-pro-251215` | ✓ | Up to 2 images (first + last frame via role) | — | `BYTEPLUS_API_KEY` |
|
||||
| BytePlus Seedance 2.0 | `dreamina-seedance-2-0-260128` | ✓ | Up to 9 reference images | Up to 3 videos | `BYTEPLUS_API_KEY` |
|
||||
| ComfyUI | `workflow` | ✓ | 1 image | — | `COMFY_API_KEY` or `COMFY_CLOUD_API_KEY` |
|
||||
| DeepInfra | `Pixverse/Pixverse-T2V` | ✓ | — | — | `DEEPINFRA_API_KEY` |
|
||||
| fal | `fal-ai/minimax/video-01-live` | ✓ | 1 image; up to 9 with Seedance reference-to-video | Up to 3 videos with Seedance reference-to-video | `FAL_KEY` |
|
||||
| Google | `veo-3.1-fast-generate-preview` | ✓ | 1 image | 1 video | `GEMINI_API_KEY` |
|
||||
| MiniMax | `MiniMax-Hailuo-2.3` | ✓ | 1 image | — | `MINIMAX_API_KEY` or MiniMax OAuth |
|
||||
|
|
@ -132,20 +133,21 @@ runtime modes at runtime.
|
|||
The explicit mode contract used by `video_generate`, contract tests, and
|
||||
the shared live sweep:
|
||||
|
||||
| Provider | `generate` | `imageToVideo` | `videoToVideo` | Shared live lanes today |
|
||||
| -------- | :--------: | :------------: | :------------: | ---------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Alibaba | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs |
|
||||
| BytePlus | ✓ | ✓ | — | `generate`, `imageToVideo` |
|
||||
| ComfyUI | ✓ | ✓ | — | Not in the shared sweep; workflow-specific coverage lives with Comfy tests |
|
||||
| fal | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` only when using Seedance reference-to-video |
|
||||
| Google | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; shared `videoToVideo` skipped because the current buffer-backed Gemini/Veo sweep does not accept that input |
|
||||
| MiniMax | ✓ | ✓ | — | `generate`, `imageToVideo` |
|
||||
| OpenAI | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; shared `videoToVideo` skipped because this org/input path currently needs provider-side inpaint/remix access |
|
||||
| Qwen | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs |
|
||||
| Runway | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` runs only when the selected model is `runway/gen4_aleph` |
|
||||
| Together | ✓ | ✓ | — | `generate`, `imageToVideo` |
|
||||
| Vydra | ✓ | ✓ | — | `generate`; shared `imageToVideo` skipped because bundled `veo3` is text-only and bundled `kling` requires a remote image URL |
|
||||
| xAI | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider currently needs a remote MP4 URL |
|
||||
| Provider | `generate` | `imageToVideo` | `videoToVideo` | Shared live lanes today |
|
||||
| --------- | :--------: | :------------: | :------------: | ---------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Alibaba | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs |
|
||||
| BytePlus | ✓ | ✓ | — | `generate`, `imageToVideo` |
|
||||
| ComfyUI | ✓ | ✓ | — | Not in the shared sweep; workflow-specific coverage lives with Comfy tests |
|
||||
| DeepInfra | ✓ | — | — | `generate`; native DeepInfra video schemas are text-to-video in the bundled contract |
|
||||
| fal | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` only when using Seedance reference-to-video |
|
||||
| Google | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; shared `videoToVideo` skipped because the current buffer-backed Gemini/Veo sweep does not accept that input |
|
||||
| MiniMax | ✓ | ✓ | — | `generate`, `imageToVideo` |
|
||||
| OpenAI | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; shared `videoToVideo` skipped because this org/input path currently needs provider-side inpaint/remix access |
|
||||
| Qwen | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs |
|
||||
| Runway | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` runs only when the selected model is `runway/gen4_aleph` |
|
||||
| Together | ✓ | ✓ | — | `generate`, `imageToVideo` |
|
||||
| Vydra | ✓ | ✓ | — | `generate`; shared `imageToVideo` skipped because bundled `veo3` is text-only and bundled `kling` requires a remote image URL |
|
||||
| xAI | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider currently needs a remote MP4 URL |
|
||||
|
||||
## Tool parameters
|
||||
|
||||
|
|
|
|||
8
extensions/deepinfra/api.ts
Normal file
8
extensions/deepinfra/api.ts
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
export { buildDeepInfraProvider, buildStaticDeepInfraProvider } from "./provider-catalog.js";
|
||||
export { applyDeepInfraConfig } from "./onboard.js";
|
||||
export { DEEPINFRA_DEFAULT_MODEL_REF } from "./provider-models.js";
|
||||
export { buildDeepInfraImageGenerationProvider } from "./image-generation-provider.js";
|
||||
export { deepinfraMediaUnderstandingProvider } from "./media-understanding-provider.js";
|
||||
export { deepinfraMemoryEmbeddingProviderAdapter } from "./memory-embedding-adapter.js";
|
||||
export { buildDeepInfraSpeechProvider } from "./speech-provider.js";
|
||||
export { buildDeepInfraVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
33
extensions/deepinfra/embedding-provider.ts
Normal file
33
extensions/deepinfra/embedding-provider.ts
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import {
|
||||
createRemoteEmbeddingProvider,
|
||||
resolveRemoteEmbeddingClient,
|
||||
type MemoryEmbeddingProviderCreateOptions,
|
||||
type MemoryEmbeddingProviderCreateResult,
|
||||
} from "openclaw/plugin-sdk/memory-core-host-engine-embeddings";
|
||||
import {
|
||||
DEEPINFRA_BASE_URL,
|
||||
DEFAULT_DEEPINFRA_EMBEDDING_MODEL,
|
||||
normalizeDeepInfraModelRef,
|
||||
} from "./media-models.js";
|
||||
|
||||
export { DEFAULT_DEEPINFRA_EMBEDDING_MODEL };
|
||||
|
||||
export async function createDeepInfraEmbeddingProvider(
|
||||
options: MemoryEmbeddingProviderCreateOptions,
|
||||
): Promise<MemoryEmbeddingProviderCreateResult & { client: { model: string } }> {
|
||||
const client = await resolveRemoteEmbeddingClient({
|
||||
provider: "deepinfra",
|
||||
options: {
|
||||
...options,
|
||||
model: normalizeDeepInfraModelRef(options.model, DEFAULT_DEEPINFRA_EMBEDDING_MODEL),
|
||||
},
|
||||
defaultBaseUrl: DEEPINFRA_BASE_URL,
|
||||
normalizeModel: (model) => normalizeDeepInfraModelRef(model, DEFAULT_DEEPINFRA_EMBEDDING_MODEL),
|
||||
});
|
||||
const provider = createRemoteEmbeddingProvider({
|
||||
id: "deepinfra",
|
||||
client,
|
||||
errorPrefix: "DeepInfra embeddings API error",
|
||||
});
|
||||
return { provider, client };
|
||||
}
|
||||
148
extensions/deepinfra/image-generation-provider.test.ts
Normal file
148
extensions/deepinfra/image-generation-provider.test.ts
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { buildDeepInfraImageGenerationProvider } from "./image-generation-provider.js";
|
||||
|
||||
const {
|
||||
assertOkOrThrowHttpErrorMock,
|
||||
postJsonRequestMock,
|
||||
postMultipartRequestMock,
|
||||
resolveApiKeyForProviderMock,
|
||||
resolveProviderHttpRequestConfigMock,
|
||||
} = vi.hoisted(() => ({
|
||||
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
|
||||
postJsonRequestMock: vi.fn(),
|
||||
postMultipartRequestMock: vi.fn(),
|
||||
resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "deepinfra-key" })),
|
||||
resolveProviderHttpRequestConfigMock: vi.fn((params: Record<string, unknown>) => ({
|
||||
baseUrl: params.baseUrl ?? params.defaultBaseUrl ?? "https://api.deepinfra.com/v1/openai",
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers(params.defaultHeaders as HeadersInit | undefined),
|
||||
dispatcherPolicy: undefined,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({
|
||||
resolveApiKeyForProvider: resolveApiKeyForProviderMock,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
|
||||
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
|
||||
postJsonRequest: postJsonRequestMock,
|
||||
postMultipartRequest: postMultipartRequestMock,
|
||||
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
|
||||
sanitizeConfiguredModelProviderRequest: vi.fn((request) => request),
|
||||
}));
|
||||
|
||||
describe("deepinfra image generation provider", () => {
|
||||
afterEach(() => {
|
||||
assertOkOrThrowHttpErrorMock.mockClear();
|
||||
postJsonRequestMock.mockReset();
|
||||
postMultipartRequestMock.mockReset();
|
||||
resolveApiKeyForProviderMock.mockClear();
|
||||
resolveProviderHttpRequestConfigMock.mockClear();
|
||||
});
|
||||
|
||||
it("declares generation and single-reference edit support", () => {
|
||||
const provider = buildDeepInfraImageGenerationProvider();
|
||||
|
||||
expect(provider.id).toBe("deepinfra");
|
||||
expect(provider.defaultModel).toBe("black-forest-labs/FLUX-1-schnell");
|
||||
expect(provider.capabilities.generate.maxCount).toBe(4);
|
||||
expect(provider.capabilities.edit.enabled).toBe(true);
|
||||
expect(provider.capabilities.edit.maxInputImages).toBe(1);
|
||||
});
|
||||
|
||||
it("sends OpenAI-compatible image generation requests and sniffs JPEG output", async () => {
|
||||
const release = vi.fn(async () => {});
|
||||
const jpegBytes = Buffer.from([0xff, 0xd8, 0xff, 0x00]);
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
data: [{ b64_json: jpegBytes.toString("base64"), revised_prompt: "red square" }],
|
||||
}),
|
||||
},
|
||||
release,
|
||||
});
|
||||
|
||||
const provider = buildDeepInfraImageGenerationProvider();
|
||||
const result = await provider.generateImage({
|
||||
provider: "deepinfra",
|
||||
model: "deepinfra/black-forest-labs/FLUX-1-schnell",
|
||||
prompt: "red square",
|
||||
count: 2,
|
||||
size: "512x512",
|
||||
timeoutMs: 12_345,
|
||||
cfg: {
|
||||
models: {
|
||||
providers: {
|
||||
deepinfra: {
|
||||
baseUrl: "https://api.deepinfra.com/v1/openai/",
|
||||
},
|
||||
},
|
||||
},
|
||||
} as never,
|
||||
});
|
||||
|
||||
expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "deepinfra",
|
||||
capability: "image",
|
||||
baseUrl: "https://api.deepinfra.com/v1/openai",
|
||||
}),
|
||||
);
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://api.deepinfra.com/v1/openai/images/generations",
|
||||
timeoutMs: 12_345,
|
||||
body: {
|
||||
model: "black-forest-labs/FLUX-1-schnell",
|
||||
prompt: "red square",
|
||||
n: 2,
|
||||
size: "512x512",
|
||||
response_format: "b64_json",
|
||||
},
|
||||
}),
|
||||
);
|
||||
expect(result.images[0]?.mimeType).toBe("image/jpeg");
|
||||
expect(result.images[0]?.fileName).toBe("image-1.jpg");
|
||||
expect(result.images[0]?.revisedPrompt).toBe("red square");
|
||||
expect(release).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("sends image edits as multipart OpenAI-compatible requests", async () => {
|
||||
postMultipartRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
data: [
|
||||
{
|
||||
b64_json: Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]).toString(
|
||||
"base64",
|
||||
),
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
|
||||
const provider = buildDeepInfraImageGenerationProvider();
|
||||
const result = await provider.generateImage({
|
||||
provider: "deepinfra",
|
||||
model: "black-forest-labs/FLUX-1-schnell",
|
||||
prompt: "make it neon",
|
||||
inputImages: [{ buffer: Buffer.from("source"), mimeType: "image/png" }],
|
||||
cfg: {} as never,
|
||||
});
|
||||
|
||||
expect(postMultipartRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://api.deepinfra.com/v1/openai/images/edits",
|
||||
}),
|
||||
);
|
||||
const form = postMultipartRequestMock.mock.calls[0]?.[0].body as FormData;
|
||||
expect(form.get("model")).toBe("black-forest-labs/FLUX-1-schnell");
|
||||
expect(form.get("prompt")).toBe("make it neon");
|
||||
expect(form.get("response_format")).toBe("b64_json");
|
||||
expect(form.get("image")).toBeInstanceOf(File);
|
||||
expect(result.images[0]?.mimeType).toBe("image/png");
|
||||
});
|
||||
});
|
||||
250
extensions/deepinfra/image-generation-provider.ts
Normal file
250
extensions/deepinfra/image-generation-provider.ts
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
|
||||
import type {
|
||||
GeneratedImageAsset,
|
||||
ImageGenerationProvider,
|
||||
ImageGenerationSourceImage,
|
||||
} from "openclaw/plugin-sdk/image-generation";
|
||||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
postJsonRequest,
|
||||
postMultipartRequest,
|
||||
resolveProviderHttpRequestConfig,
|
||||
sanitizeConfiguredModelProviderRequest,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
|
||||
import {
|
||||
DEEPINFRA_BASE_URL,
|
||||
DEEPINFRA_IMAGE_MODELS,
|
||||
DEFAULT_DEEPINFRA_IMAGE_MODEL,
|
||||
DEFAULT_DEEPINFRA_IMAGE_SIZE,
|
||||
normalizeDeepInfraBaseUrl,
|
||||
normalizeDeepInfraModelRef,
|
||||
} from "./media-models.js";
|
||||
|
||||
const DEEPINFRA_IMAGE_SIZES = ["512x512", "1024x1024", "1024x1792", "1792x1024"] as const;
|
||||
const MAX_DEEPINFRA_INPUT_IMAGES = 1;
|
||||
|
||||
type DeepInfraProviderConfig = NonNullable<
|
||||
NonNullable<OpenClawConfig["models"]>["providers"]
|
||||
>[string];
|
||||
|
||||
type DeepInfraImageApiResponse = {
|
||||
data?: Array<{
|
||||
b64_json?: string;
|
||||
revised_prompt?: string;
|
||||
url?: string;
|
||||
}>;
|
||||
};
|
||||
|
||||
function resolveDeepInfraProviderConfig(
|
||||
cfg: OpenClawConfig | undefined,
|
||||
): DeepInfraProviderConfig | undefined {
|
||||
return cfg?.models?.providers?.deepinfra;
|
||||
}
|
||||
|
||||
function detectImageMimeType(buffer: Buffer): {
|
||||
mimeType: string;
|
||||
extension: "jpg" | "png" | "webp";
|
||||
} {
|
||||
if (buffer.length >= 3 && buffer[0] === 0xff && buffer[1] === 0xd8 && buffer[2] === 0xff) {
|
||||
return { mimeType: "image/jpeg", extension: "jpg" };
|
||||
}
|
||||
if (
|
||||
buffer.length >= 8 &&
|
||||
buffer[0] === 0x89 &&
|
||||
buffer[1] === 0x50 &&
|
||||
buffer[2] === 0x4e &&
|
||||
buffer[3] === 0x47
|
||||
) {
|
||||
return { mimeType: "image/png", extension: "png" };
|
||||
}
|
||||
if (
|
||||
buffer.length >= 12 &&
|
||||
buffer.toString("ascii", 0, 4) === "RIFF" &&
|
||||
buffer.toString("ascii", 8, 12) === "WEBP"
|
||||
) {
|
||||
return { mimeType: "image/webp", extension: "webp" };
|
||||
}
|
||||
return { mimeType: "image/jpeg", extension: "jpg" };
|
||||
}
|
||||
|
||||
function imageToUploadName(image: ImageGenerationSourceImage, index: number): string {
|
||||
const fileName = normalizeOptionalString(image.fileName);
|
||||
if (fileName) {
|
||||
return fileName;
|
||||
}
|
||||
const mimeType = normalizeOptionalString(image.mimeType) ?? "image/png";
|
||||
const ext =
|
||||
mimeType === "image/jpeg" || mimeType === "image/jpg"
|
||||
? "jpg"
|
||||
: mimeType === "image/webp"
|
||||
? "webp"
|
||||
: "png";
|
||||
return `image-${index + 1}.${ext}`;
|
||||
}
|
||||
|
||||
function imageToAsset(
|
||||
entry: NonNullable<DeepInfraImageApiResponse["data"]>[number],
|
||||
index: number,
|
||||
): GeneratedImageAsset | null {
|
||||
const b64 = normalizeOptionalString(entry.b64_json);
|
||||
if (!b64) {
|
||||
return null;
|
||||
}
|
||||
const buffer = Buffer.from(b64, "base64");
|
||||
const detected = detectImageMimeType(buffer);
|
||||
const image: GeneratedImageAsset = {
|
||||
buffer,
|
||||
mimeType: detected.mimeType,
|
||||
fileName: `image-${index + 1}.${detected.extension}`,
|
||||
};
|
||||
const revisedPrompt = normalizeOptionalString(entry.revised_prompt);
|
||||
if (revisedPrompt) {
|
||||
image.revisedPrompt = revisedPrompt;
|
||||
}
|
||||
return image;
|
||||
}
|
||||
|
||||
function parseImageResponse(payload: DeepInfraImageApiResponse): GeneratedImageAsset[] {
|
||||
return (payload.data ?? [])
|
||||
.map(imageToAsset)
|
||||
.filter((entry): entry is GeneratedImageAsset => entry !== null);
|
||||
}
|
||||
|
||||
export function buildDeepInfraImageGenerationProvider(): ImageGenerationProvider {
|
||||
return {
|
||||
id: "deepinfra",
|
||||
label: "DeepInfra",
|
||||
defaultModel: DEFAULT_DEEPINFRA_IMAGE_MODEL,
|
||||
models: [...DEEPINFRA_IMAGE_MODELS],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "deepinfra",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
generate: {
|
||||
maxCount: 4,
|
||||
supportsSize: true,
|
||||
supportsAspectRatio: false,
|
||||
supportsResolution: false,
|
||||
},
|
||||
edit: {
|
||||
enabled: true,
|
||||
maxCount: 1,
|
||||
maxInputImages: MAX_DEEPINFRA_INPUT_IMAGES,
|
||||
supportsSize: true,
|
||||
supportsAspectRatio: false,
|
||||
supportsResolution: false,
|
||||
},
|
||||
geometry: {
|
||||
sizes: [...DEEPINFRA_IMAGE_SIZES],
|
||||
},
|
||||
},
|
||||
async generateImage(req) {
|
||||
const inputImages = req.inputImages ?? [];
|
||||
const isEdit = inputImages.length > 0;
|
||||
if (inputImages.length > MAX_DEEPINFRA_INPUT_IMAGES) {
|
||||
throw new Error("DeepInfra image editing supports one reference image.");
|
||||
}
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "deepinfra",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("DeepInfra API key missing");
|
||||
}
|
||||
|
||||
const providerConfig = resolveDeepInfraProviderConfig(req.cfg);
|
||||
const resolvedBaseUrl = normalizeDeepInfraBaseUrl(
|
||||
providerConfig?.baseUrl,
|
||||
DEEPINFRA_BASE_URL,
|
||||
);
|
||||
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
|
||||
resolveProviderHttpRequestConfig({
|
||||
baseUrl: resolvedBaseUrl,
|
||||
defaultBaseUrl: DEEPINFRA_BASE_URL,
|
||||
allowPrivateNetwork: false,
|
||||
request: sanitizeConfiguredModelProviderRequest(providerConfig?.request),
|
||||
defaultHeaders: {
|
||||
Authorization: `Bearer ${auth.apiKey}`,
|
||||
},
|
||||
provider: "deepinfra",
|
||||
capability: "image",
|
||||
transport: "http",
|
||||
});
|
||||
|
||||
const model = normalizeDeepInfraModelRef(req.model, DEFAULT_DEEPINFRA_IMAGE_MODEL);
|
||||
const count = isEdit ? 1 : (req.count ?? 1);
|
||||
const size = normalizeOptionalString(req.size) ?? DEFAULT_DEEPINFRA_IMAGE_SIZE;
|
||||
const endpoint = isEdit ? "images/edits" : "images/generations";
|
||||
const request = isEdit
|
||||
? (() => {
|
||||
const form = new FormData();
|
||||
form.set("model", model);
|
||||
form.set("prompt", req.prompt);
|
||||
form.set("n", String(count));
|
||||
form.set("size", size);
|
||||
form.set("response_format", "b64_json");
|
||||
const image = inputImages[0];
|
||||
if (!image) {
|
||||
throw new Error("DeepInfra image edit missing reference image.");
|
||||
}
|
||||
const mimeType = normalizeOptionalString(image.mimeType) ?? "image/png";
|
||||
form.append(
|
||||
"image",
|
||||
new Blob([new Uint8Array(image.buffer)], { type: mimeType }),
|
||||
imageToUploadName(image, 0),
|
||||
);
|
||||
const multipartHeaders = new Headers(headers);
|
||||
multipartHeaders.delete("Content-Type");
|
||||
return postMultipartRequest({
|
||||
url: `${baseUrl}/${endpoint}`,
|
||||
headers: multipartHeaders,
|
||||
body: form,
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn: fetch,
|
||||
allowPrivateNetwork,
|
||||
dispatcherPolicy,
|
||||
});
|
||||
})()
|
||||
: postJsonRequest({
|
||||
url: `${baseUrl}/${endpoint}`,
|
||||
headers: new Headers({
|
||||
...Object.fromEntries(headers.entries()),
|
||||
"Content-Type": "application/json",
|
||||
}),
|
||||
body: {
|
||||
model,
|
||||
prompt: req.prompt,
|
||||
n: count,
|
||||
size,
|
||||
response_format: "b64_json",
|
||||
},
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn: fetch,
|
||||
allowPrivateNetwork,
|
||||
dispatcherPolicy,
|
||||
});
|
||||
|
||||
const { response, release } = await request;
|
||||
try {
|
||||
await assertOkOrThrowHttpError(
|
||||
response,
|
||||
isEdit ? "DeepInfra image edit failed" : "DeepInfra image generation failed",
|
||||
);
|
||||
const images = parseImageResponse((await response.json()) as DeepInfraImageApiResponse);
|
||||
if (images.length === 0) {
|
||||
throw new Error("DeepInfra image response did not include generated image data");
|
||||
}
|
||||
return { images, model };
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
113
extensions/deepinfra/index.test.ts
Normal file
113
extensions/deepinfra/index.test.ts
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
createCapturedPluginRegistration,
|
||||
registerSingleProviderPlugin,
|
||||
} from "openclaw/plugin-sdk/testing";
|
||||
import deepinfraPlugin from "./index.js";
|
||||
|
||||
describe("deepinfra augmentModelCatalog", () => {
|
||||
it("returns empty when no configured catalog entries", async () => {
|
||||
const provider = await registerSingleProviderPlugin(deepinfraPlugin);
|
||||
|
||||
const entries = await provider.augmentModelCatalog?.({} as never);
|
||||
|
||||
expect(entries).toEqual([]);
|
||||
});
|
||||
|
||||
it("returns configured catalog entries from config", async () => {
|
||||
const provider = await registerSingleProviderPlugin(deepinfraPlugin);
|
||||
|
||||
const entries = await provider.augmentModelCatalog?.({
|
||||
config: {
|
||||
models: {
|
||||
providers: {
|
||||
deepinfra: {
|
||||
models: [
|
||||
{
|
||||
id: "zai-org/GLM-5.1",
|
||||
name: "GLM-5.1",
|
||||
input: ["text"],
|
||||
reasoning: true,
|
||||
contextWindow: 202752,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as never);
|
||||
|
||||
expect(entries).toEqual([
|
||||
{
|
||||
provider: "deepinfra",
|
||||
id: "zai-org/GLM-5.1",
|
||||
name: "GLM-5.1",
|
||||
input: ["text"],
|
||||
reasoning: true,
|
||||
contextWindow: 202752,
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("deepinfra capability registration", () => {
|
||||
it("registers all DeepInfra-backed OpenClaw provider surfaces", () => {
|
||||
const captured = createCapturedPluginRegistration();
|
||||
deepinfraPlugin.register(captured.api);
|
||||
|
||||
expect(captured.providers.map((provider) => provider.id)).toEqual(["deepinfra"]);
|
||||
expect(captured.imageGenerationProviders.map((provider) => provider.id)).toEqual(["deepinfra"]);
|
||||
expect(captured.mediaUnderstandingProviders.map((provider) => provider.id)).toEqual([
|
||||
"deepinfra",
|
||||
]);
|
||||
expect(captured.memoryEmbeddingProviders.map((provider) => provider.id)).toEqual(["deepinfra"]);
|
||||
expect(captured.speechProviders.map((provider) => provider.id)).toEqual(["deepinfra"]);
|
||||
expect(captured.videoGenerationProviders.map((provider) => provider.id)).toEqual(["deepinfra"]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("deepinfra isCacheTtlEligible", () => {
|
||||
it("returns true for anthropic/* proxied models", async () => {
|
||||
const provider = await registerSingleProviderPlugin(deepinfraPlugin);
|
||||
expect(
|
||||
provider.isCacheTtlEligible?.({
|
||||
provider: "deepinfra",
|
||||
modelId: "anthropic/claude-4-sonnet",
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
// Locked to case-insensitive to stay consistent with the shared proxy cache
|
||||
// wrapper, which lowercases the modelId before the "anthropic/" prefix check.
|
||||
it("returns true regardless of modelId case", async () => {
|
||||
const provider = await registerSingleProviderPlugin(deepinfraPlugin);
|
||||
expect(
|
||||
provider.isCacheTtlEligible?.({
|
||||
provider: "deepinfra",
|
||||
modelId: "Anthropic/Claude-4-Sonnet",
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
provider.isCacheTtlEligible?.({
|
||||
provider: "deepinfra",
|
||||
modelId: "ANTHROPIC/claude-4-sonnet",
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("returns false for non-anthropic models", async () => {
|
||||
const provider = await registerSingleProviderPlugin(deepinfraPlugin);
|
||||
expect(
|
||||
provider.isCacheTtlEligible?.({
|
||||
provider: "deepinfra",
|
||||
modelId: "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
||||
}),
|
||||
).toBe(false);
|
||||
expect(
|
||||
provider.isCacheTtlEligible?.({
|
||||
provider: "deepinfra",
|
||||
modelId: "zai-org/GLM-5.1",
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
84
extensions/deepinfra/index.ts
Normal file
84
extensions/deepinfra/index.ts
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
import { readConfiguredProviderCatalogEntries } from "openclaw/plugin-sdk/provider-catalog-shared";
|
||||
import { defineSingleProviderPluginEntry } from "openclaw/plugin-sdk/provider-entry";
|
||||
import { PASSTHROUGH_GEMINI_REPLAY_HOOKS } from "openclaw/plugin-sdk/provider-model-shared";
|
||||
import {
|
||||
createOpenRouterSystemCacheWrapper,
|
||||
createOpenRouterWrapper,
|
||||
isProxyReasoningUnsupported,
|
||||
} from "openclaw/plugin-sdk/provider-stream";
|
||||
import { buildDeepInfraImageGenerationProvider } from "./image-generation-provider.js";
|
||||
import { deepinfraMediaUnderstandingProvider } from "./media-understanding-provider.js";
|
||||
import { deepinfraMemoryEmbeddingProviderAdapter } from "./memory-embedding-adapter.js";
|
||||
import { applyDeepInfraConfig } from "./onboard.js";
|
||||
import { buildDeepInfraProvider, buildStaticDeepInfraProvider } from "./provider-catalog.js";
|
||||
import { DEEPINFRA_DEFAULT_MODEL_REF } from "./provider-models.js";
|
||||
import { buildDeepInfraSpeechProvider } from "./speech-provider.js";
|
||||
import { buildDeepInfraVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const PROVIDER_ID = "deepinfra";
|
||||
|
||||
export default defineSingleProviderPluginEntry({
|
||||
id: PROVIDER_ID,
|
||||
name: "DeepInfra Provider",
|
||||
description: "Bundled DeepInfra provider plugin",
|
||||
provider: {
|
||||
label: "DeepInfra",
|
||||
docsPath: "/providers/deepinfra",
|
||||
auth: [
|
||||
{
|
||||
methodId: "api-key",
|
||||
label: "DeepInfra API key",
|
||||
hint: "Unified API for open source models",
|
||||
optionKey: "deepinfraApiKey",
|
||||
flagName: "--deepinfra-api-key",
|
||||
envVar: "DEEPINFRA_API_KEY",
|
||||
promptMessage: "Enter DeepInfra API key",
|
||||
noteTitle: "DeepInfra",
|
||||
noteMessage: [
|
||||
"DeepInfra provides an OpenAI-compatible API for open source and frontier models.",
|
||||
"Get your API key at: https://deepinfra.com/dash/api_keys",
|
||||
].join("\n"),
|
||||
defaultModel: DEEPINFRA_DEFAULT_MODEL_REF,
|
||||
applyConfig: (cfg) => applyDeepInfraConfig(cfg),
|
||||
wizard: {
|
||||
choiceId: "deepinfra-api-key",
|
||||
choiceLabel: "DeepInfra API key",
|
||||
choiceHint: "Unified API for open source models",
|
||||
groupId: PROVIDER_ID,
|
||||
groupLabel: "DeepInfra",
|
||||
groupHint: "Unified API for open source models",
|
||||
},
|
||||
},
|
||||
],
|
||||
catalog: {
|
||||
buildProvider: buildDeepInfraProvider,
|
||||
buildStaticProvider: buildStaticDeepInfraProvider,
|
||||
},
|
||||
augmentModelCatalog: ({ config }) =>
|
||||
readConfiguredProviderCatalogEntries({
|
||||
config,
|
||||
providerId: PROVIDER_ID,
|
||||
}),
|
||||
normalizeConfig: ({ providerConfig }) => providerConfig,
|
||||
normalizeTransport: ({ api, baseUrl }) =>
|
||||
baseUrl === "https://api.deepinfra.com/v1/openai" ? { api, baseUrl } : undefined,
|
||||
...PASSTHROUGH_GEMINI_REPLAY_HOOKS,
|
||||
wrapStreamFn: (ctx) => {
|
||||
const thinkingLevel = isProxyReasoningUnsupported(ctx.modelId)
|
||||
? undefined
|
||||
: ctx.thinkingLevel;
|
||||
return createOpenRouterSystemCacheWrapper(
|
||||
createOpenRouterWrapper(ctx.streamFn, thinkingLevel),
|
||||
);
|
||||
},
|
||||
isModernModelRef: () => true,
|
||||
isCacheTtlEligible: (ctx) => ctx.modelId.toLowerCase().startsWith("anthropic/"),
|
||||
},
|
||||
register(api) {
|
||||
api.registerImageGenerationProvider(buildDeepInfraImageGenerationProvider());
|
||||
api.registerMediaUnderstandingProvider(deepinfraMediaUnderstandingProvider);
|
||||
api.registerMemoryEmbeddingProvider(deepinfraMemoryEmbeddingProviderAdapter);
|
||||
api.registerSpeechProvider(buildDeepInfraSpeechProvider());
|
||||
api.registerVideoGenerationProvider(buildDeepInfraVideoGenerationProvider());
|
||||
},
|
||||
});
|
||||
50
extensions/deepinfra/media-models.ts
Normal file
50
extensions/deepinfra/media-models.ts
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
|
||||
import { DEEPINFRA_BASE_URL } from "./provider-models.js";
|
||||
|
||||
export { DEEPINFRA_BASE_URL };
|
||||
|
||||
export const DEEPINFRA_NATIVE_BASE_URL = "https://api.deepinfra.com/v1/inference";
|
||||
|
||||
export const DEFAULT_DEEPINFRA_IMAGE_MODEL = "black-forest-labs/FLUX-1-schnell";
|
||||
export const DEFAULT_DEEPINFRA_IMAGE_SIZE = "1024x1024";
|
||||
export const DEEPINFRA_IMAGE_MODELS = [
|
||||
DEFAULT_DEEPINFRA_IMAGE_MODEL,
|
||||
"run-diffusion/Juggernaut-Lightning-Flux",
|
||||
"black-forest-labs/FLUX-1-dev",
|
||||
"Qwen/Qwen-Image-Max",
|
||||
"stabilityai/sdxl-turbo",
|
||||
] as const;
|
||||
|
||||
export const DEFAULT_DEEPINFRA_EMBEDDING_MODEL = "BAAI/bge-m3";
|
||||
|
||||
export const DEFAULT_DEEPINFRA_AUDIO_TRANSCRIPTION_MODEL = "openai/whisper-large-v3-turbo";
|
||||
export const DEFAULT_DEEPINFRA_IMAGE_UNDERSTANDING_MODEL = "moonshotai/Kimi-K2.5";
|
||||
|
||||
export const DEFAULT_DEEPINFRA_TTS_MODEL = "hexgrad/Kokoro-82M";
|
||||
export const DEFAULT_DEEPINFRA_TTS_VOICE = "af_alloy";
|
||||
export const DEEPINFRA_TTS_MODELS = [
|
||||
DEFAULT_DEEPINFRA_TTS_MODEL,
|
||||
"ResembleAI/chatterbox-turbo",
|
||||
"sesame/csm-1b",
|
||||
"Qwen/Qwen3-TTS",
|
||||
] as const;
|
||||
|
||||
export const DEFAULT_DEEPINFRA_VIDEO_MODEL = "Pixverse/Pixverse-T2V";
|
||||
export const DEEPINFRA_VIDEO_MODELS = [
|
||||
DEFAULT_DEEPINFRA_VIDEO_MODEL,
|
||||
"Pixverse/Pixverse-T2V-HD",
|
||||
"Wan-AI/Wan2.1-T2V-1.3B",
|
||||
"google/veo-3.0-fast",
|
||||
] as const;
|
||||
|
||||
export const DEEPINFRA_VIDEO_ASPECT_RATIOS = ["16:9", "4:3", "1:1", "3:4", "9:16"] as const;
|
||||
export const DEEPINFRA_VIDEO_DURATIONS = [5, 8] as const;
|
||||
|
||||
export function normalizeDeepInfraModelRef(model: string | undefined, fallback: string): string {
|
||||
const value = normalizeOptionalString(model) ?? fallback;
|
||||
return value.startsWith("deepinfra/") ? value.slice("deepinfra/".length) : value;
|
||||
}
|
||||
|
||||
export function normalizeDeepInfraBaseUrl(value: unknown, fallback = DEEPINFRA_BASE_URL): string {
|
||||
return (normalizeOptionalString(value) ?? fallback).replace(/\/+$/u, "");
|
||||
}
|
||||
52
extensions/deepinfra/media-understanding-provider.test.ts
Normal file
52
extensions/deepinfra/media-understanding-provider.test.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import { describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
deepinfraMediaUnderstandingProvider,
|
||||
transcribeDeepInfraAudio,
|
||||
} from "./media-understanding-provider.js";
|
||||
|
||||
const { transcribeOpenAiCompatibleAudioMock } = vi.hoisted(() => ({
|
||||
transcribeOpenAiCompatibleAudioMock: vi.fn(async () => ({ text: "hello", model: "whisper" })),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/media-understanding", async () => {
|
||||
const actual = await vi.importActual<typeof import("openclaw/plugin-sdk/media-understanding")>(
|
||||
"openclaw/plugin-sdk/media-understanding",
|
||||
);
|
||||
return {
|
||||
...actual,
|
||||
transcribeOpenAiCompatibleAudio: transcribeOpenAiCompatibleAudioMock,
|
||||
};
|
||||
});
|
||||
|
||||
describe("deepinfra media understanding provider", () => {
|
||||
it("declares image and audio defaults", () => {
|
||||
expect(deepinfraMediaUnderstandingProvider).toMatchObject({
|
||||
id: "deepinfra",
|
||||
capabilities: ["image", "audio"],
|
||||
defaultModels: {
|
||||
image: "moonshotai/Kimi-K2.5",
|
||||
audio: "openai/whisper-large-v3-turbo",
|
||||
},
|
||||
});
|
||||
expect(deepinfraMediaUnderstandingProvider.describeImage).toBeTypeOf("function");
|
||||
expect(deepinfraMediaUnderstandingProvider.describeImages).toBeTypeOf("function");
|
||||
});
|
||||
|
||||
it("routes audio transcription through the OpenAI-compatible DeepInfra endpoint", async () => {
|
||||
const result = await transcribeDeepInfraAudio({
|
||||
buffer: Buffer.from("audio"),
|
||||
fileName: "clip.mp3",
|
||||
apiKey: "deepinfra-key",
|
||||
timeoutMs: 30_000,
|
||||
});
|
||||
|
||||
expect(result.text).toBe("hello");
|
||||
expect(transcribeOpenAiCompatibleAudioMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "deepinfra",
|
||||
defaultBaseUrl: "https://api.deepinfra.com/v1/openai",
|
||||
defaultModel: "openai/whisper-large-v3-turbo",
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
37
extensions/deepinfra/media-understanding-provider.ts
Normal file
37
extensions/deepinfra/media-understanding-provider.ts
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
import {
|
||||
describeImageWithModel,
|
||||
describeImagesWithModel,
|
||||
transcribeOpenAiCompatibleAudio,
|
||||
type AudioTranscriptionRequest,
|
||||
type MediaUnderstandingProvider,
|
||||
} from "openclaw/plugin-sdk/media-understanding";
|
||||
import {
|
||||
DEEPINFRA_BASE_URL,
|
||||
DEFAULT_DEEPINFRA_AUDIO_TRANSCRIPTION_MODEL,
|
||||
DEFAULT_DEEPINFRA_IMAGE_UNDERSTANDING_MODEL,
|
||||
} from "./media-models.js";
|
||||
|
||||
export async function transcribeDeepInfraAudio(params: AudioTranscriptionRequest) {
|
||||
return await transcribeOpenAiCompatibleAudio({
|
||||
...params,
|
||||
provider: "deepinfra",
|
||||
defaultBaseUrl: DEEPINFRA_BASE_URL,
|
||||
defaultModel: DEFAULT_DEEPINFRA_AUDIO_TRANSCRIPTION_MODEL,
|
||||
});
|
||||
}
|
||||
|
||||
export const deepinfraMediaUnderstandingProvider: MediaUnderstandingProvider = {
|
||||
id: "deepinfra",
|
||||
capabilities: ["image", "audio"],
|
||||
defaultModels: {
|
||||
image: DEFAULT_DEEPINFRA_IMAGE_UNDERSTANDING_MODEL,
|
||||
audio: DEFAULT_DEEPINFRA_AUDIO_TRANSCRIPTION_MODEL,
|
||||
},
|
||||
autoPriority: {
|
||||
image: 45,
|
||||
audio: 45,
|
||||
},
|
||||
transcribeAudio: transcribeDeepInfraAudio,
|
||||
describeImage: describeImageWithModel,
|
||||
describeImages: describeImagesWithModel,
|
||||
};
|
||||
18
extensions/deepinfra/memory-embedding-adapter.test.ts
Normal file
18
extensions/deepinfra/memory-embedding-adapter.test.ts
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import { deepinfraMemoryEmbeddingProviderAdapter } from "./memory-embedding-adapter.js";
|
||||
|
||||
describe("deepinfra memory embedding adapter", () => {
|
||||
it("declares a remote auth-backed embedding provider", () => {
|
||||
expect(deepinfraMemoryEmbeddingProviderAdapter).toMatchObject({
|
||||
id: "deepinfra",
|
||||
defaultModel: "BAAI/bge-m3",
|
||||
transport: "remote",
|
||||
authProviderId: "deepinfra",
|
||||
autoSelectPriority: 55,
|
||||
allowExplicitWhenConfiguredAuto: true,
|
||||
});
|
||||
expect(deepinfraMemoryEmbeddingProviderAdapter.shouldContinueAutoSelection).toBeTypeOf(
|
||||
"function",
|
||||
);
|
||||
});
|
||||
});
|
||||
35
extensions/deepinfra/memory-embedding-adapter.ts
Normal file
35
extensions/deepinfra/memory-embedding-adapter.ts
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
import {
|
||||
isMissingEmbeddingApiKeyError,
|
||||
type MemoryEmbeddingProviderAdapter,
|
||||
} from "openclaw/plugin-sdk/memory-core-host-engine-embeddings";
|
||||
import {
|
||||
createDeepInfraEmbeddingProvider,
|
||||
DEFAULT_DEEPINFRA_EMBEDDING_MODEL,
|
||||
} from "./embedding-provider.js";
|
||||
|
||||
export const deepinfraMemoryEmbeddingProviderAdapter: MemoryEmbeddingProviderAdapter = {
|
||||
id: "deepinfra",
|
||||
defaultModel: DEFAULT_DEEPINFRA_EMBEDDING_MODEL,
|
||||
transport: "remote",
|
||||
authProviderId: "deepinfra",
|
||||
autoSelectPriority: 55,
|
||||
allowExplicitWhenConfiguredAuto: true,
|
||||
shouldContinueAutoSelection: isMissingEmbeddingApiKeyError,
|
||||
create: async (options) => {
|
||||
const { provider, client } = await createDeepInfraEmbeddingProvider({
|
||||
...options,
|
||||
provider: "deepinfra",
|
||||
fallback: "none",
|
||||
});
|
||||
return {
|
||||
provider,
|
||||
runtime: {
|
||||
id: "deepinfra",
|
||||
cacheKeyData: {
|
||||
provider: "deepinfra",
|
||||
model: client.model,
|
||||
},
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
165
extensions/deepinfra/onboard.test.ts
Normal file
165
extensions/deepinfra/onboard.test.ts
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
import { mkdtempSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import * as providerAuth from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
type OpenClawConfig,
|
||||
resolveAgentModelPrimaryValue,
|
||||
} from "openclaw/plugin-sdk/provider-onboard";
|
||||
import { captureEnv } from "openclaw/plugin-sdk/testing";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
applyDeepInfraProviderConfig,
|
||||
applyDeepInfraConfig,
|
||||
DEEPINFRA_BASE_URL,
|
||||
DEEPINFRA_DEFAULT_MODEL_REF,
|
||||
} from "./onboard.js";
|
||||
import { DEEPINFRA_DEFAULT_MODEL_ID } from "./provider-models.js";
|
||||
|
||||
const { resolveEnvApiKey } = providerAuth;
|
||||
|
||||
const emptyCfg: OpenClawConfig = {};
|
||||
|
||||
describe("DeepInfra provider config", () => {
|
||||
describe("constants", () => {
|
||||
it("DEEPINFRA_BASE_URL points to deepinfra openai endpoint", () => {
|
||||
expect(DEEPINFRA_BASE_URL).toBe("https://api.deepinfra.com/v1/openai");
|
||||
});
|
||||
|
||||
it("DEEPINFRA_DEFAULT_MODEL_REF includes provider prefix", () => {
|
||||
expect(DEEPINFRA_DEFAULT_MODEL_REF).toBe("deepinfra/deepseek-ai/DeepSeek-V3.2");
|
||||
});
|
||||
|
||||
it("DEEPINFRA_DEFAULT_MODEL_ID is deepseek-ai/DeepSeek-V3.2", () => {
|
||||
expect(DEEPINFRA_DEFAULT_MODEL_ID).toBe("deepseek-ai/DeepSeek-V3.2");
|
||||
});
|
||||
});
|
||||
|
||||
describe("applyDeepInfraProviderConfig", () => {
|
||||
it("does not set provider models (discovery populates them at runtime)", () => {
|
||||
const result = applyDeepInfraProviderConfig(emptyCfg, DEEPINFRA_DEFAULT_MODEL_REF);
|
||||
expect(result.models?.providers?.deepinfra).toBeUndefined();
|
||||
});
|
||||
|
||||
it("sets DeepInfra alias on the provided model ref", () => {
|
||||
const result = applyDeepInfraProviderConfig(emptyCfg, DEEPINFRA_DEFAULT_MODEL_REF);
|
||||
const agentModel = result.agents?.defaults?.models?.[DEEPINFRA_DEFAULT_MODEL_REF];
|
||||
expect(agentModel).toBeDefined();
|
||||
expect(agentModel?.alias).toBe("DeepInfra");
|
||||
});
|
||||
|
||||
it("attaches the alias to a non-default model ref when provided", () => {
|
||||
const fallbackRef = "deepinfra/other/awesome-model";
|
||||
const result = applyDeepInfraProviderConfig(emptyCfg, fallbackRef);
|
||||
expect(result.agents?.defaults?.models?.[fallbackRef]?.alias).toBe("DeepInfra");
|
||||
expect(result.agents?.defaults?.models?.[DEEPINFRA_DEFAULT_MODEL_REF]).toBeUndefined();
|
||||
});
|
||||
|
||||
it("preserves existing alias if already set", () => {
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
models: {
|
||||
[DEEPINFRA_DEFAULT_MODEL_REF]: { alias: "My Custom Alias" },
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
const result = applyDeepInfraProviderConfig(cfg, DEEPINFRA_DEFAULT_MODEL_REF);
|
||||
const agentModel = result.agents?.defaults?.models?.[DEEPINFRA_DEFAULT_MODEL_REF];
|
||||
expect(agentModel?.alias).toBe("My Custom Alias");
|
||||
});
|
||||
|
||||
it("does not change the default model selection", () => {
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "openai/gpt-5" },
|
||||
},
|
||||
},
|
||||
};
|
||||
const result = applyDeepInfraProviderConfig(cfg, DEEPINFRA_DEFAULT_MODEL_REF);
|
||||
expect(resolveAgentModelPrimaryValue(result.agents?.defaults?.model)).toBe("openai/gpt-5");
|
||||
});
|
||||
});
|
||||
|
||||
describe("applyDeepInfraConfig", () => {
|
||||
it("sets the provided model ref as the primary default", () => {
|
||||
const result = applyDeepInfraConfig(emptyCfg, DEEPINFRA_DEFAULT_MODEL_REF);
|
||||
expect(resolveAgentModelPrimaryValue(result.agents?.defaults?.model)).toBe(
|
||||
DEEPINFRA_DEFAULT_MODEL_REF,
|
||||
);
|
||||
});
|
||||
|
||||
it("sets the DeepInfra alias on the provided ref", () => {
|
||||
const result = applyDeepInfraConfig(emptyCfg, DEEPINFRA_DEFAULT_MODEL_REF);
|
||||
const agentModel = result.agents?.defaults?.models?.[DEEPINFRA_DEFAULT_MODEL_REF];
|
||||
expect(agentModel?.alias).toBe("DeepInfra");
|
||||
});
|
||||
|
||||
it("honors a fallback ref when discovery picked a non-default model", () => {
|
||||
const fallbackRef = "deepinfra/other/awesome-model";
|
||||
const result = applyDeepInfraConfig(emptyCfg, fallbackRef);
|
||||
expect(resolveAgentModelPrimaryValue(result.agents?.defaults?.model)).toBe(fallbackRef);
|
||||
expect(result.agents?.defaults?.models?.[fallbackRef]?.alias).toBe("DeepInfra");
|
||||
});
|
||||
});
|
||||
|
||||
describe("env var resolution", () => {
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("resolves DEEPINFRA_API_KEY from env", () => {
|
||||
const envSnapshot = captureEnv(["DEEPINFRA_API_KEY"]);
|
||||
process.env.DEEPINFRA_API_KEY = "test-deepinfra-key";
|
||||
|
||||
try {
|
||||
const result = resolveEnvApiKey("deepinfra");
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.apiKey).toBe("test-deepinfra-key");
|
||||
expect(result?.source).toContain("DEEPINFRA_API_KEY");
|
||||
} finally {
|
||||
envSnapshot.restore();
|
||||
}
|
||||
});
|
||||
|
||||
it("returns null when DEEPINFRA_API_KEY is not set", () => {
|
||||
const envSnapshot = captureEnv(["DEEPINFRA_API_KEY"]);
|
||||
delete process.env.DEEPINFRA_API_KEY;
|
||||
|
||||
try {
|
||||
const result = resolveEnvApiKey("deepinfra");
|
||||
expect(result).toBeNull();
|
||||
} finally {
|
||||
envSnapshot.restore();
|
||||
}
|
||||
});
|
||||
|
||||
it("resolves the deepinfra api key via resolveApiKeyForProvider", async () => {
|
||||
const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
|
||||
const envSnapshot = captureEnv(["DEEPINFRA_API_KEY"]);
|
||||
process.env.DEEPINFRA_API_KEY = "deepinfra-provider-test-key";
|
||||
|
||||
const spy = vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({
|
||||
apiKey: "deepinfra-provider-test-key",
|
||||
source: "env: DEEPINFRA_API_KEY",
|
||||
mode: "api-key",
|
||||
});
|
||||
|
||||
try {
|
||||
const auth = await providerAuth.resolveApiKeyForProvider({
|
||||
provider: "deepinfra",
|
||||
agentDir,
|
||||
});
|
||||
|
||||
expect(spy).toHaveBeenCalledWith(expect.objectContaining({ provider: "deepinfra" }));
|
||||
expect(auth.apiKey).toBe("deepinfra-provider-test-key");
|
||||
expect(auth.mode).toBe("api-key");
|
||||
expect(auth.source).toContain("DEEPINFRA_API_KEY");
|
||||
} finally {
|
||||
envSnapshot.restore();
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
36
extensions/deepinfra/onboard.ts
Normal file
36
extensions/deepinfra/onboard.ts
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
import {
|
||||
applyAgentDefaultModelPrimary,
|
||||
type OpenClawConfig,
|
||||
} from "openclaw/plugin-sdk/provider-onboard";
|
||||
import { DEEPINFRA_BASE_URL, DEEPINFRA_DEFAULT_MODEL_REF } from "./provider-models.js";
|
||||
|
||||
export { DEEPINFRA_BASE_URL, DEEPINFRA_DEFAULT_MODEL_REF };
|
||||
|
||||
export function applyDeepInfraProviderConfig(
|
||||
cfg: OpenClawConfig,
|
||||
modelRef: string = DEEPINFRA_DEFAULT_MODEL_REF,
|
||||
): OpenClawConfig {
|
||||
const models = { ...cfg.agents?.defaults?.models };
|
||||
models[modelRef] = {
|
||||
...models[modelRef],
|
||||
alias: models[modelRef]?.alias ?? "DeepInfra",
|
||||
};
|
||||
|
||||
return {
|
||||
...cfg,
|
||||
agents: {
|
||||
...cfg.agents,
|
||||
defaults: {
|
||||
...cfg.agents?.defaults,
|
||||
models,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function applyDeepInfraConfig(
|
||||
cfg: OpenClawConfig,
|
||||
modelRef: string = DEEPINFRA_DEFAULT_MODEL_REF,
|
||||
): OpenClawConfig {
|
||||
return applyAgentDefaultModelPrimary(applyDeepInfraProviderConfig(cfg, modelRef), modelRef);
|
||||
}
|
||||
191
extensions/deepinfra/openclaw.plugin.json
Normal file
191
extensions/deepinfra/openclaw.plugin.json
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
{
|
||||
"id": "deepinfra",
|
||||
"enabledByDefault": true,
|
||||
"providers": ["deepinfra"],
|
||||
"providerEndpoints": [
|
||||
{
|
||||
"endpointClass": "deepinfra-native",
|
||||
"hosts": ["api.deepinfra.com"]
|
||||
}
|
||||
],
|
||||
"providerRequest": {
|
||||
"providers": {
|
||||
"deepinfra": {
|
||||
"family": "deepinfra"
|
||||
}
|
||||
}
|
||||
},
|
||||
"modelCatalog": {
|
||||
"providers": {
|
||||
"deepinfra": {
|
||||
"baseUrl": "https://api.deepinfra.com/v1/openai",
|
||||
"api": "openai-completions",
|
||||
"models": [
|
||||
{
|
||||
"id": "deepseek-ai/DeepSeek-V3.2",
|
||||
"name": "DeepSeek V3.2",
|
||||
"reasoning": false,
|
||||
"input": ["text"],
|
||||
"contextWindow": 163840,
|
||||
"maxTokens": 163840,
|
||||
"cost": {
|
||||
"input": 0.26,
|
||||
"output": 0.38,
|
||||
"cacheRead": 0.13,
|
||||
"cacheWrite": 0
|
||||
},
|
||||
"compat": {
|
||||
"supportsUsageInStreaming": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "zai-org/GLM-5.1",
|
||||
"name": "GLM-5.1",
|
||||
"reasoning": true,
|
||||
"input": ["text"],
|
||||
"contextWindow": 202752,
|
||||
"maxTokens": 202752,
|
||||
"cost": {
|
||||
"input": 1.05,
|
||||
"output": 3.5,
|
||||
"cacheRead": 0.205000005,
|
||||
"cacheWrite": 0
|
||||
},
|
||||
"compat": {
|
||||
"supportsUsageInStreaming": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "stepfun-ai/Step-3.5-Flash",
|
||||
"name": "Step 3.5 Flash",
|
||||
"reasoning": false,
|
||||
"input": ["text"],
|
||||
"contextWindow": 262144,
|
||||
"maxTokens": 262144,
|
||||
"cost": {
|
||||
"input": 0.1,
|
||||
"output": 0.3,
|
||||
"cacheRead": 0.02,
|
||||
"cacheWrite": 0
|
||||
},
|
||||
"compat": {
|
||||
"supportsUsageInStreaming": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "MiniMaxAI/MiniMax-M2.5",
|
||||
"name": "MiniMax M2.5",
|
||||
"reasoning": true,
|
||||
"input": ["text"],
|
||||
"contextWindow": 196608,
|
||||
"maxTokens": 196608,
|
||||
"cost": {
|
||||
"input": 0.15,
|
||||
"output": 1.15,
|
||||
"cacheRead": 0.03,
|
||||
"cacheWrite": 0
|
||||
},
|
||||
"compat": {
|
||||
"supportsUsageInStreaming": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "moonshotai/Kimi-K2.5",
|
||||
"name": "Kimi K2.5",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"contextWindow": 262144,
|
||||
"maxTokens": 262144,
|
||||
"cost": {
|
||||
"input": 0.45,
|
||||
"output": 2.25,
|
||||
"cacheRead": 0.070000002,
|
||||
"cacheWrite": 0
|
||||
},
|
||||
"compat": {
|
||||
"supportsUsageInStreaming": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B",
|
||||
"name": "NVIDIA Nemotron 3 Super 120B A12B",
|
||||
"reasoning": true,
|
||||
"input": ["text"],
|
||||
"contextWindow": 262144,
|
||||
"maxTokens": 262144,
|
||||
"cost": {
|
||||
"input": 0.1,
|
||||
"output": 0.5,
|
||||
"cacheRead": 0,
|
||||
"cacheWrite": 0
|
||||
},
|
||||
"compat": {
|
||||
"supportsUsageInStreaming": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
||||
"name": "Llama 3.3 70B Instruct Turbo",
|
||||
"reasoning": false,
|
||||
"input": ["text"],
|
||||
"contextWindow": 131072,
|
||||
"maxTokens": 131072,
|
||||
"cost": {
|
||||
"input": 0.1,
|
||||
"output": 0.32,
|
||||
"cacheRead": 0,
|
||||
"cacheWrite": 0
|
||||
},
|
||||
"compat": {
|
||||
"supportsUsageInStreaming": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"providerAuthEnvVars": {
|
||||
"deepinfra": ["DEEPINFRA_API_KEY"]
|
||||
},
|
||||
"providerAuthChoices": [
|
||||
{
|
||||
"provider": "deepinfra",
|
||||
"method": "api-key",
|
||||
"choiceId": "deepinfra-api-key",
|
||||
"choiceLabel": "DeepInfra API key",
|
||||
"choiceHint": "Unified API for open source models",
|
||||
"groupId": "deepinfra",
|
||||
"groupLabel": "DeepInfra",
|
||||
"groupHint": "Unified API for open source models",
|
||||
"optionKey": "deepinfraApiKey",
|
||||
"cliFlag": "--deepinfra-api-key",
|
||||
"cliOption": "--deepinfra-api-key <key>",
|
||||
"cliDescription": "DeepInfra API key"
|
||||
}
|
||||
],
|
||||
"contracts": {
|
||||
"mediaUnderstandingProviders": ["deepinfra"],
|
||||
"memoryEmbeddingProviders": ["deepinfra"],
|
||||
"imageGenerationProviders": ["deepinfra"],
|
||||
"speechProviders": ["deepinfra"],
|
||||
"videoGenerationProviders": ["deepinfra"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"deepinfra": {
|
||||
"capabilities": ["image", "audio"],
|
||||
"defaultModels": {
|
||||
"image": "moonshotai/Kimi-K2.5",
|
||||
"audio": "openai/whisper-large-v3-turbo"
|
||||
},
|
||||
"autoPriority": {
|
||||
"image": 45,
|
||||
"audio": 45
|
||||
}
|
||||
}
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {}
|
||||
}
|
||||
}
|
||||
15
extensions/deepinfra/package.json
Normal file
15
extensions/deepinfra/package.json
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"name": "@openclaw/deepinfra-provider",
|
||||
"version": "2026.4.25",
|
||||
"private": true,
|
||||
"description": "OpenClaw DeepInfra provider plugin",
|
||||
"type": "module",
|
||||
"devDependencies": {
|
||||
"@openclaw/plugin-sdk": "workspace:*"
|
||||
},
|
||||
"openclaw": {
|
||||
"extensions": [
|
||||
"./index.ts"
|
||||
]
|
||||
}
|
||||
}
|
||||
24
extensions/deepinfra/provider-catalog.ts
Normal file
24
extensions/deepinfra/provider-catalog.ts
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
import { type ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-shared";
|
||||
import {
|
||||
DEEPINFRA_BASE_URL,
|
||||
DEEPINFRA_MODEL_CATALOG,
|
||||
buildDeepInfraModelDefinition,
|
||||
discoverDeepInfraModels,
|
||||
} from "./provider-models.js";
|
||||
|
||||
export function buildStaticDeepInfraProvider(): ModelProviderConfig {
|
||||
return {
|
||||
baseUrl: DEEPINFRA_BASE_URL,
|
||||
api: "openai-completions",
|
||||
models: DEEPINFRA_MODEL_CATALOG.map(buildDeepInfraModelDefinition),
|
||||
};
|
||||
}
|
||||
|
||||
export async function buildDeepInfraProvider(): Promise<ModelProviderConfig> {
|
||||
const models = await discoverDeepInfraModels();
|
||||
return {
|
||||
baseUrl: DEEPINFRA_BASE_URL,
|
||||
api: "openai-completions",
|
||||
models,
|
||||
};
|
||||
}
|
||||
169
extensions/deepinfra/provider-models.test.ts
Normal file
169
extensions/deepinfra/provider-models.test.ts
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
DEEPINFRA_DEFAULT_MODEL_REF,
|
||||
DEEPINFRA_MODELS_URL,
|
||||
discoverDeepInfraModels,
|
||||
resetDeepInfraModelCacheForTest,
|
||||
} from "./provider-models.js";
|
||||
|
||||
beforeEach(() => {
|
||||
resetDeepInfraModelCacheForTest();
|
||||
});
|
||||
|
||||
function makeModelEntry(overrides: Record<string, unknown> = {}) {
|
||||
return {
|
||||
id: "openai/gpt-oss-120b",
|
||||
object: "model",
|
||||
owned_by: "deepinfra",
|
||||
metadata: {
|
||||
context_length: 131072,
|
||||
max_tokens: 65536,
|
||||
pricing: {
|
||||
input_tokens: 3,
|
||||
output_tokens: 15,
|
||||
cache_read_tokens: 0.3,
|
||||
},
|
||||
tags: ["vision", "reasoning_effort", "prompt_cache", "reasoning"],
|
||||
},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
async function withFetchPathTest(
|
||||
mockFetch: ReturnType<typeof vi.fn>,
|
||||
runAssertions: () => Promise<void>,
|
||||
) {
|
||||
const origNodeEnv = process.env.NODE_ENV;
|
||||
const origVitest = process.env.VITEST;
|
||||
delete process.env.NODE_ENV;
|
||||
delete process.env.VITEST;
|
||||
vi.stubGlobal("fetch", mockFetch);
|
||||
|
||||
try {
|
||||
await runAssertions();
|
||||
} finally {
|
||||
if (origNodeEnv === undefined) {
|
||||
delete process.env.NODE_ENV;
|
||||
} else {
|
||||
process.env.NODE_ENV = origNodeEnv;
|
||||
}
|
||||
if (origVitest === undefined) {
|
||||
delete process.env.VITEST;
|
||||
} else {
|
||||
process.env.VITEST = origVitest;
|
||||
}
|
||||
vi.unstubAllGlobals();
|
||||
}
|
||||
}
|
||||
|
||||
describe("discoverDeepInfraModels", () => {
|
||||
it("returns static catalog in test environment", async () => {
|
||||
const models = await discoverDeepInfraModels();
|
||||
expect(DEEPINFRA_DEFAULT_MODEL_REF).toBe("deepinfra/deepseek-ai/DeepSeek-V3.2");
|
||||
expect(models.some((m) => m.id === "deepseek-ai/DeepSeek-V3.2")).toBe(true);
|
||||
expect(models.every((m) => m.compat?.supportsUsageInStreaming)).toBe(true);
|
||||
});
|
||||
|
||||
it("fetches DeepInfra's curated LLM catalog and parses model metadata", async () => {
|
||||
const mockFetch = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ data: [makeModelEntry()] }),
|
||||
});
|
||||
|
||||
await withFetchPathTest(mockFetch, async () => {
|
||||
const models = await discoverDeepInfraModels();
|
||||
expect(mockFetch).toHaveBeenCalledWith(
|
||||
DEEPINFRA_MODELS_URL,
|
||||
expect.objectContaining({
|
||||
headers: { Accept: "application/json" },
|
||||
}),
|
||||
);
|
||||
expect(models).toEqual([
|
||||
expect.objectContaining({
|
||||
id: "openai/gpt-oss-120b",
|
||||
name: "openai/gpt-oss-120b",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 131072,
|
||||
maxTokens: 65536,
|
||||
cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 0 },
|
||||
compat: expect.objectContaining({ supportsUsageInStreaming: true }),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
it("skips non-LLM rows without metadata and deduplicates ids", async () => {
|
||||
const mockFetch = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: () =>
|
||||
Promise.resolve({
|
||||
data: [
|
||||
{ id: "BAAI/bge-m3", object: "model", metadata: null },
|
||||
makeModelEntry(),
|
||||
makeModelEntry(),
|
||||
],
|
||||
}),
|
||||
});
|
||||
|
||||
await withFetchPathTest(mockFetch, async () => {
|
||||
const models = await discoverDeepInfraModels();
|
||||
expect(models.map((m) => m.id)).toEqual(["openai/gpt-oss-120b"]);
|
||||
});
|
||||
});
|
||||
|
||||
it("uses fallback defaults for sparse metadata", async () => {
|
||||
const mockFetch = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: () =>
|
||||
Promise.resolve({
|
||||
data: [
|
||||
makeModelEntry({
|
||||
id: "some/model",
|
||||
metadata: { tags: [], pricing: {} },
|
||||
}),
|
||||
],
|
||||
}),
|
||||
});
|
||||
|
||||
await withFetchPathTest(mockFetch, async () => {
|
||||
const [model] = await discoverDeepInfraModels();
|
||||
expect(model).toMatchObject({
|
||||
id: "some/model",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 128000,
|
||||
maxTokens: 8192,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to the static catalog on network errors", async () => {
|
||||
const mockFetch = vi.fn().mockRejectedValue(new Error("network error"));
|
||||
|
||||
await withFetchPathTest(mockFetch, async () => {
|
||||
const models = await discoverDeepInfraModels();
|
||||
expect(models.some((m) => m.id === "deepseek-ai/DeepSeek-V3.2")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
it("caches successful discovery responses only", async () => {
|
||||
const mockFetch = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ data: [makeModelEntry({ id: "first/model" })] }),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ data: [makeModelEntry({ id: "second/model" })] }),
|
||||
});
|
||||
|
||||
await withFetchPathTest(mockFetch, async () => {
|
||||
expect((await discoverDeepInfraModels()).map((m) => m.id)).toEqual(["first/model"]);
|
||||
expect((await discoverDeepInfraModels()).map((m) => m.id)).toEqual(["first/model"]);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
213
extensions/deepinfra/provider-models.ts
Normal file
213
extensions/deepinfra/provider-models.ts
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
import { fetchWithTimeout } from "openclaw/plugin-sdk/provider-http";
|
||||
import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-shared";
|
||||
import { createSubsystemLogger } from "openclaw/plugin-sdk/runtime-env";
|
||||
|
||||
const log = createSubsystemLogger("deepinfra-models");
|
||||
|
||||
export const DEEPINFRA_BASE_URL = "https://api.deepinfra.com/v1/openai";
|
||||
export const DEEPINFRA_MODELS_URL = `${DEEPINFRA_BASE_URL}/models?sort_by=openclaw&filter=with_meta`;
|
||||
|
||||
export const DEEPINFRA_DEFAULT_MODEL_ID = "deepseek-ai/DeepSeek-V3.2";
|
||||
export const DEEPINFRA_DEFAULT_MODEL_REF = `deepinfra/${DEEPINFRA_DEFAULT_MODEL_ID}`;
|
||||
|
||||
export const DEEPINFRA_DEFAULT_CONTEXT_WINDOW = 128000;
|
||||
export const DEEPINFRA_DEFAULT_MAX_TOKENS = 8192;
|
||||
|
||||
export const DEEPINFRA_MODEL_CATALOG: ModelDefinitionConfig[] = [
|
||||
{
|
||||
id: "deepseek-ai/DeepSeek-V3.2",
|
||||
name: "DeepSeek V3.2",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 163840,
|
||||
maxTokens: 163840,
|
||||
cost: { input: 0.26, output: 0.38, cacheRead: 0.13, cacheWrite: 0 },
|
||||
},
|
||||
{
|
||||
id: "zai-org/GLM-5.1",
|
||||
name: "GLM-5.1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 202752,
|
||||
maxTokens: 202752,
|
||||
cost: { input: 1.05, output: 3.5, cacheRead: 0.205000005, cacheWrite: 0 },
|
||||
},
|
||||
{
|
||||
id: "stepfun-ai/Step-3.5-Flash",
|
||||
name: "Step 3.5 Flash",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
cost: { input: 0.1, output: 0.3, cacheRead: 0.02, cacheWrite: 0 },
|
||||
},
|
||||
{
|
||||
id: "MiniMaxAI/MiniMax-M2.5",
|
||||
name: "MiniMax M2.5",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 196608,
|
||||
maxTokens: 196608,
|
||||
cost: { input: 0.15, output: 1.15, cacheRead: 0.03, cacheWrite: 0 },
|
||||
},
|
||||
{
|
||||
id: "moonshotai/Kimi-K2.5",
|
||||
name: "Kimi K2.5",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
cost: { input: 0.45, output: 2.25, cacheRead: 0.070000002, cacheWrite: 0 },
|
||||
},
|
||||
{
|
||||
id: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B",
|
||||
name: "NVIDIA Nemotron 3 Super 120B A12B",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
cost: { input: 0.1, output: 0.5, cacheRead: 0, cacheWrite: 0 },
|
||||
},
|
||||
{
|
||||
id: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
||||
name: "Llama 3.3 70B Instruct Turbo",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 131072,
|
||||
maxTokens: 131072,
|
||||
cost: { input: 0.1, output: 0.32, cacheRead: 0, cacheWrite: 0 },
|
||||
},
|
||||
];
|
||||
|
||||
const DISCOVERY_TIMEOUT_MS = 5000;
|
||||
const DISCOVERY_CACHE_TTL_MS = 5 * 60 * 1000;
|
||||
|
||||
let cachedModels: ModelDefinitionConfig[] | null = null;
|
||||
let cachedAt = 0;
|
||||
|
||||
export function resetDeepInfraModelCacheForTest(): void {
|
||||
cachedModels = null;
|
||||
cachedAt = 0;
|
||||
}
|
||||
|
||||
interface DeepInfraModelPricing {
|
||||
input_tokens?: number;
|
||||
output_tokens?: number;
|
||||
cache_read_tokens?: number;
|
||||
}
|
||||
|
||||
interface DeepInfraModelMetadata {
|
||||
context_length?: number;
|
||||
max_tokens?: number;
|
||||
pricing?: DeepInfraModelPricing;
|
||||
tags?: string[];
|
||||
}
|
||||
|
||||
interface DeepInfraModelEntry {
|
||||
id: string;
|
||||
metadata: DeepInfraModelMetadata | null;
|
||||
}
|
||||
|
||||
interface DeepInfraModelsResponse {
|
||||
data?: DeepInfraModelEntry[];
|
||||
}
|
||||
|
||||
function parseModality(metadata: DeepInfraModelMetadata): Array<"text" | "image"> {
|
||||
return metadata.tags?.includes("vision") ? ["text", "image"] : ["text"];
|
||||
}
|
||||
|
||||
function parseReasoning(metadata: DeepInfraModelMetadata): boolean {
|
||||
return Boolean(
|
||||
metadata.tags?.includes("reasoning") || metadata.tags?.includes("reasoning_effort"),
|
||||
);
|
||||
}
|
||||
|
||||
export function buildDeepInfraModelDefinition(model: ModelDefinitionConfig): ModelDefinitionConfig {
|
||||
return {
|
||||
...model,
|
||||
compat: {
|
||||
...model.compat,
|
||||
supportsUsageInStreaming: model.compat?.supportsUsageInStreaming ?? true,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function toModelDefinition(entry: DeepInfraModelEntry): ModelDefinitionConfig {
|
||||
const metadata = entry.metadata;
|
||||
if (!metadata) {
|
||||
throw new Error("missing metadata");
|
||||
}
|
||||
return buildDeepInfraModelDefinition({
|
||||
id: entry.id,
|
||||
name: entry.id,
|
||||
reasoning: parseReasoning(metadata),
|
||||
input: parseModality(metadata),
|
||||
contextWindow: metadata.context_length ?? DEEPINFRA_DEFAULT_CONTEXT_WINDOW,
|
||||
maxTokens: metadata.max_tokens ?? DEEPINFRA_DEFAULT_MAX_TOKENS,
|
||||
cost: {
|
||||
input: metadata.pricing?.input_tokens ?? 0,
|
||||
output: metadata.pricing?.output_tokens ?? 0,
|
||||
cacheRead: metadata.pricing?.cache_read_tokens ?? 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function staticCatalog(): ModelDefinitionConfig[] {
|
||||
return DEEPINFRA_MODEL_CATALOG.map(buildDeepInfraModelDefinition);
|
||||
}
|
||||
|
||||
export async function discoverDeepInfraModels(): Promise<ModelDefinitionConfig[]> {
|
||||
if (process.env.NODE_ENV === "test" || process.env.VITEST) {
|
||||
return staticCatalog();
|
||||
}
|
||||
|
||||
if (cachedModels && Date.now() - cachedAt < DISCOVERY_CACHE_TTL_MS) {
|
||||
return [...cachedModels];
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetchWithTimeout(
|
||||
DEEPINFRA_MODELS_URL,
|
||||
{
|
||||
headers: { Accept: "application/json" },
|
||||
},
|
||||
DISCOVERY_TIMEOUT_MS,
|
||||
);
|
||||
if (!response.ok) {
|
||||
log.warn(`Failed to discover models: HTTP ${response.status}, using static catalog`);
|
||||
return staticCatalog();
|
||||
}
|
||||
|
||||
const body = (await response.json()) as DeepInfraModelsResponse;
|
||||
if (!Array.isArray(body.data) || body.data.length === 0) {
|
||||
log.warn("No models found from DeepInfra API, using static catalog");
|
||||
return staticCatalog();
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
const models: ModelDefinitionConfig[] = [];
|
||||
for (const entry of body.data) {
|
||||
const id = typeof entry?.id === "string" ? entry.id.trim() : "";
|
||||
if (!id || seen.has(id) || !entry.metadata) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
models.push(toModelDefinition({ ...entry, id }));
|
||||
seen.add(id);
|
||||
} catch (error) {
|
||||
log.warn(`Skipping malformed model entry "${id}": ${String(error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (models.length === 0) {
|
||||
return staticCatalog();
|
||||
}
|
||||
cachedModels = models;
|
||||
cachedAt = Date.now();
|
||||
return [...models];
|
||||
} catch (error) {
|
||||
log.warn(`Discovery failed: ${String(error)}, using static catalog`);
|
||||
return staticCatalog();
|
||||
}
|
||||
}
|
||||
41
extensions/deepinfra/provider-policy-api.test.ts
Normal file
41
extensions/deepinfra/provider-policy-api.test.ts
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
import type {
|
||||
ModelDefinitionConfig,
|
||||
ModelProviderConfig,
|
||||
} from "openclaw/plugin-sdk/provider-model-types";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { normalizeConfig } from "./provider-policy-api.js";
|
||||
|
||||
function createModel(id: string): ModelDefinitionConfig {
|
||||
return {
|
||||
id,
|
||||
name: id,
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 128_000,
|
||||
maxTokens: 8_192,
|
||||
};
|
||||
}
|
||||
|
||||
describe("deepinfra provider policy public artifact", () => {
|
||||
it("preserves the DeepInfra mid-path /v1 baseUrl without appending another /v1", () => {
|
||||
const providerConfig: ModelProviderConfig = {
|
||||
baseUrl: "https://api.deepinfra.com/v1/openai",
|
||||
api: "openai-completions",
|
||||
models: [createModel("zai-org/GLM-5")],
|
||||
};
|
||||
|
||||
const normalized = normalizeConfig({ provider: "deepinfra", providerConfig });
|
||||
|
||||
expect(normalized.baseUrl).toBe("https://api.deepinfra.com/v1/openai");
|
||||
expect(normalized.baseUrl).not.toMatch(/\/v1\/openai\/v1$/);
|
||||
});
|
||||
|
||||
it("returns the providerConfig unchanged (referentially equal)", () => {
|
||||
const providerConfig = {
|
||||
baseUrl: "https://api.deepinfra.com/v1/openai",
|
||||
models: [],
|
||||
};
|
||||
expect(normalizeConfig({ provider: "deepinfra", providerConfig })).toBe(providerConfig);
|
||||
});
|
||||
});
|
||||
21
extensions/deepinfra/provider-policy-api.ts
Normal file
21
extensions/deepinfra/provider-policy-api.ts
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-types";
|
||||
|
||||
/**
|
||||
* Passthrough normalization for DeepInfra provider config.
|
||||
*
|
||||
* DeepInfra's OpenAI-compatible base URL is `https://api.deepinfra.com/v1/openai`
|
||||
* with the `/v1` segment mid-path, not at the end. The generic
|
||||
* openai-completions config normalizer strips a trailing `/v1` and re-appends
|
||||
* one, which is idempotent for providers like OpenRouter (`.../api/v1`) but
|
||||
* doubles to `.../v1/openai/v1` here and breaks inference (404).
|
||||
*
|
||||
* Shipping this bundled policy surface short-circuits the fallback normalizer
|
||||
* chain (see `src/plugins/provider-runtime.ts:normalizeProviderConfigWithPlugin`)
|
||||
* and preserves the DeepInfra-declared baseUrl as-is.
|
||||
*/
|
||||
export function normalizeConfig(params: {
|
||||
provider: string;
|
||||
providerConfig: ModelProviderConfig;
|
||||
}): ModelProviderConfig {
|
||||
return params.providerConfig;
|
||||
}
|
||||
3
extensions/deepinfra/provider.contract.test.ts
Normal file
3
extensions/deepinfra/provider.contract.test.ts
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
import { describeProviderContracts } from "openclaw/plugin-sdk/provider-test-contracts";
|
||||
|
||||
describeProviderContracts("deepinfra");
|
||||
128
extensions/deepinfra/speech-provider.test.ts
Normal file
128
extensions/deepinfra/speech-provider.test.ts
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { buildDeepInfraSpeechProvider } from "./speech-provider.js";
|
||||
|
||||
const { assertOkOrThrowHttpErrorMock, postJsonRequestMock, resolveProviderHttpRequestConfigMock } =
|
||||
vi.hoisted(() => ({
|
||||
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
|
||||
postJsonRequestMock: vi.fn(),
|
||||
resolveProviderHttpRequestConfigMock: vi.fn((params: Record<string, unknown>) => ({
|
||||
baseUrl: params.baseUrl ?? params.defaultBaseUrl ?? "https://api.deepinfra.com/v1/openai",
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers(params.defaultHeaders as HeadersInit | undefined),
|
||||
dispatcherPolicy: undefined,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
|
||||
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
|
||||
postJsonRequest: postJsonRequestMock,
|
||||
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
|
||||
}));
|
||||
|
||||
describe("deepinfra speech provider", () => {
|
||||
afterEach(() => {
|
||||
assertOkOrThrowHttpErrorMock.mockClear();
|
||||
postJsonRequestMock.mockReset();
|
||||
resolveProviderHttpRequestConfigMock.mockClear();
|
||||
vi.unstubAllEnvs();
|
||||
});
|
||||
|
||||
it("normalizes provider-owned speech config", () => {
|
||||
const provider = buildDeepInfraSpeechProvider();
|
||||
const resolved = provider.resolveConfig?.({
|
||||
cfg: {} as never,
|
||||
timeoutMs: 30_000,
|
||||
rawConfig: {
|
||||
providers: {
|
||||
deepinfra: {
|
||||
apiKey: "sk-test",
|
||||
baseUrl: "https://api.deepinfra.com/v1/openai/",
|
||||
modelId: "deepinfra/hexgrad/Kokoro-82M",
|
||||
voiceId: "af_alloy",
|
||||
speed: 1.1,
|
||||
responseFormat: " MP3 ",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(resolved).toEqual({
|
||||
apiKey: "sk-test",
|
||||
baseUrl: "https://api.deepinfra.com/v1/openai",
|
||||
model: "hexgrad/Kokoro-82M",
|
||||
voice: "af_alloy",
|
||||
speed: 1.1,
|
||||
responseFormat: "mp3",
|
||||
extraBody: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
it("synthesizes OpenAI-compatible speech through DeepInfra", async () => {
|
||||
const release = vi.fn(async () => {});
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: new Response(new Uint8Array([1, 2, 3]), { status: 200 }),
|
||||
release,
|
||||
});
|
||||
|
||||
const provider = buildDeepInfraSpeechProvider();
|
||||
const result = await provider.synthesize({
|
||||
text: "hello",
|
||||
cfg: {
|
||||
models: {
|
||||
providers: {
|
||||
deepinfra: {
|
||||
apiKey: "sk-deepinfra",
|
||||
baseUrl: "https://api.deepinfra.com/v1/openai/",
|
||||
},
|
||||
},
|
||||
},
|
||||
} as never,
|
||||
providerConfig: {
|
||||
model: "hexgrad/Kokoro-82M",
|
||||
voice: "af_alloy",
|
||||
speed: 1.2,
|
||||
},
|
||||
target: "voice-note",
|
||||
timeoutMs: 12_345,
|
||||
});
|
||||
|
||||
expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "deepinfra",
|
||||
capability: "audio",
|
||||
baseUrl: "https://api.deepinfra.com/v1/openai",
|
||||
}),
|
||||
);
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://api.deepinfra.com/v1/openai/audio/speech",
|
||||
timeoutMs: 12_345,
|
||||
body: {
|
||||
model: "hexgrad/Kokoro-82M",
|
||||
input: "hello",
|
||||
voice: "af_alloy",
|
||||
response_format: "mp3",
|
||||
speed: 1.2,
|
||||
},
|
||||
}),
|
||||
);
|
||||
expect(result.audioBuffer).toEqual(Buffer.from([1, 2, 3]));
|
||||
expect(result.outputFormat).toBe("mp3");
|
||||
expect(result.fileExtension).toBe(".mp3");
|
||||
expect(result.voiceCompatible).toBe(true);
|
||||
expect(release).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("uses DEEPINFRA_API_KEY when provider config omits apiKey", () => {
|
||||
vi.stubEnv("DEEPINFRA_API_KEY", "sk-env");
|
||||
const provider = buildDeepInfraSpeechProvider();
|
||||
|
||||
expect(
|
||||
provider.isConfigured({
|
||||
cfg: {} as never,
|
||||
providerConfig: {},
|
||||
timeoutMs: 30_000,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
295
extensions/deepinfra/speech-provider.ts
Normal file
295
extensions/deepinfra/speech-provider.ts
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
postJsonRequest,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input";
|
||||
import {
|
||||
asFiniteNumber,
|
||||
asObject,
|
||||
trimToUndefined,
|
||||
type SpeechDirectiveTokenParseContext,
|
||||
type SpeechProviderConfig,
|
||||
type SpeechProviderOverrides,
|
||||
type SpeechProviderPlugin,
|
||||
} from "openclaw/plugin-sdk/speech";
|
||||
import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/text-runtime";
|
||||
import {
|
||||
DEEPINFRA_BASE_URL,
|
||||
DEEPINFRA_TTS_MODELS,
|
||||
DEFAULT_DEEPINFRA_TTS_MODEL,
|
||||
DEFAULT_DEEPINFRA_TTS_VOICE,
|
||||
normalizeDeepInfraBaseUrl,
|
||||
normalizeDeepInfraModelRef,
|
||||
} from "./media-models.js";
|
||||
|
||||
const DEEPINFRA_TTS_RESPONSE_FORMATS = ["mp3", "opus", "flac", "wav", "pcm"] as const;
|
||||
|
||||
type DeepInfraTtsResponseFormat = (typeof DEEPINFRA_TTS_RESPONSE_FORMATS)[number];
|
||||
|
||||
type DeepInfraTtsProviderConfig = {
|
||||
apiKey?: string;
|
||||
baseUrl?: string;
|
||||
model: string;
|
||||
voice: string;
|
||||
speed?: number;
|
||||
responseFormat?: DeepInfraTtsResponseFormat;
|
||||
extraBody?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
type DeepInfraTtsProviderOverrides = {
|
||||
model?: string;
|
||||
voice?: string;
|
||||
speed?: number;
|
||||
};
|
||||
|
||||
function normalizeDeepInfraTtsResponseFormat(
|
||||
value: unknown,
|
||||
): DeepInfraTtsResponseFormat | undefined {
|
||||
const next = normalizeOptionalLowercaseString(value);
|
||||
if (!next) {
|
||||
return undefined;
|
||||
}
|
||||
if (DEEPINFRA_TTS_RESPONSE_FORMATS.some((format) => format === next)) {
|
||||
return next as DeepInfraTtsResponseFormat;
|
||||
}
|
||||
throw new Error(`Invalid DeepInfra speech responseFormat: ${next}`);
|
||||
}
|
||||
|
||||
function resolveDeepInfraProviderConfigRecord(
|
||||
rawConfig: Record<string, unknown>,
|
||||
): Record<string, unknown> | undefined {
|
||||
const providers = asObject(rawConfig.providers);
|
||||
return asObject(providers?.deepinfra) ?? asObject(rawConfig.deepinfra);
|
||||
}
|
||||
|
||||
function normalizeDeepInfraTtsProviderConfig(
|
||||
rawConfig: Record<string, unknown>,
|
||||
): DeepInfraTtsProviderConfig {
|
||||
const raw = resolveDeepInfraProviderConfigRecord(rawConfig);
|
||||
return {
|
||||
apiKey: normalizeResolvedSecretInputString({
|
||||
value: raw?.apiKey,
|
||||
path: "messages.tts.providers.deepinfra.apiKey",
|
||||
}),
|
||||
baseUrl:
|
||||
trimToUndefined(raw?.baseUrl) == null ? undefined : normalizeDeepInfraBaseUrl(raw?.baseUrl),
|
||||
model: normalizeDeepInfraModelRef(
|
||||
trimToUndefined(raw?.model ?? raw?.modelId),
|
||||
DEFAULT_DEEPINFRA_TTS_MODEL,
|
||||
),
|
||||
voice: trimToUndefined(raw?.voice ?? raw?.voiceId) ?? DEFAULT_DEEPINFRA_TTS_VOICE,
|
||||
speed: asFiniteNumber(raw?.speed),
|
||||
responseFormat: normalizeDeepInfraTtsResponseFormat(raw?.responseFormat),
|
||||
extraBody: asObject(raw?.extraBody),
|
||||
};
|
||||
}
|
||||
|
||||
function readDeepInfraTtsProviderConfig(config: SpeechProviderConfig): DeepInfraTtsProviderConfig {
|
||||
const normalized = normalizeDeepInfraTtsProviderConfig({});
|
||||
return {
|
||||
apiKey: trimToUndefined(config.apiKey) ?? normalized.apiKey,
|
||||
baseUrl:
|
||||
trimToUndefined(config.baseUrl) == null
|
||||
? normalized.baseUrl
|
||||
: normalizeDeepInfraBaseUrl(config.baseUrl),
|
||||
model: normalizeDeepInfraModelRef(
|
||||
trimToUndefined(config.model ?? config.modelId),
|
||||
normalized.model,
|
||||
),
|
||||
voice: trimToUndefined(config.voice ?? config.voiceId) ?? normalized.voice,
|
||||
speed: asFiniteNumber(config.speed) ?? normalized.speed,
|
||||
responseFormat:
|
||||
normalizeDeepInfraTtsResponseFormat(config.responseFormat) ?? normalized.responseFormat,
|
||||
extraBody: asObject(config.extraBody) ?? normalized.extraBody,
|
||||
};
|
||||
}
|
||||
|
||||
function readDeepInfraTtsOverrides(
|
||||
overrides: SpeechProviderOverrides | undefined,
|
||||
): DeepInfraTtsProviderOverrides {
|
||||
if (!overrides) {
|
||||
return {};
|
||||
}
|
||||
return {
|
||||
model: trimToUndefined(overrides.model ?? overrides.modelId),
|
||||
voice: trimToUndefined(overrides.voice ?? overrides.voiceId),
|
||||
speed: asFiniteNumber(overrides.speed),
|
||||
};
|
||||
}
|
||||
|
||||
function resolveDeepInfraTtsApiKey(params: {
|
||||
cfg?: { models?: { providers?: { deepinfra?: { apiKey?: unknown } } } };
|
||||
providerConfig: DeepInfraTtsProviderConfig;
|
||||
}): string | undefined {
|
||||
return (
|
||||
params.providerConfig.apiKey ??
|
||||
normalizeResolvedSecretInputString({
|
||||
value: params.cfg?.models?.providers?.deepinfra?.apiKey,
|
||||
path: "models.providers.deepinfra.apiKey",
|
||||
}) ??
|
||||
trimToUndefined(process.env.DEEPINFRA_API_KEY)
|
||||
);
|
||||
}
|
||||
|
||||
function resolveDeepInfraTtsBaseUrl(params: {
|
||||
cfg?: { models?: { providers?: { deepinfra?: { baseUrl?: unknown } } } };
|
||||
providerConfig: DeepInfraTtsProviderConfig;
|
||||
}): string {
|
||||
return normalizeDeepInfraBaseUrl(
|
||||
params.providerConfig.baseUrl ??
|
||||
trimToUndefined(params.cfg?.models?.providers?.deepinfra?.baseUrl) ??
|
||||
DEEPINFRA_BASE_URL,
|
||||
);
|
||||
}
|
||||
|
||||
function responseFormatToFileExtension(
|
||||
format: DeepInfraTtsResponseFormat,
|
||||
): ".mp3" | ".opus" | ".flac" | ".wav" | ".pcm" {
|
||||
return `.${format}`;
|
||||
}
|
||||
|
||||
function parseDirectiveToken(ctx: SpeechDirectiveTokenParseContext): {
|
||||
handled: boolean;
|
||||
overrides?: SpeechProviderOverrides;
|
||||
} {
|
||||
switch (ctx.key) {
|
||||
case "voice":
|
||||
case "voice_id":
|
||||
case "voiceid":
|
||||
case "deepinfra_voice":
|
||||
case "deepinfravoice":
|
||||
if (!ctx.policy.allowVoice) {
|
||||
return { handled: true };
|
||||
}
|
||||
return { handled: true, overrides: { voice: ctx.value } };
|
||||
case "model":
|
||||
case "model_id":
|
||||
case "modelid":
|
||||
case "deepinfra_model":
|
||||
case "deepinframodel":
|
||||
if (!ctx.policy.allowModelId) {
|
||||
return { handled: true };
|
||||
}
|
||||
return { handled: true, overrides: { model: ctx.value } };
|
||||
default:
|
||||
return { handled: false };
|
||||
}
|
||||
}
|
||||
|
||||
export function buildDeepInfraSpeechProvider(): SpeechProviderPlugin {
|
||||
return {
|
||||
id: "deepinfra",
|
||||
label: "DeepInfra",
|
||||
autoSelectOrder: 45,
|
||||
models: [...DEEPINFRA_TTS_MODELS],
|
||||
voices: [DEFAULT_DEEPINFRA_TTS_VOICE],
|
||||
resolveConfig: ({ rawConfig }) => normalizeDeepInfraTtsProviderConfig(rawConfig),
|
||||
parseDirectiveToken,
|
||||
resolveTalkConfig: ({ baseTtsConfig, talkProviderConfig }) => {
|
||||
const base = normalizeDeepInfraTtsProviderConfig(baseTtsConfig);
|
||||
const responseFormat = normalizeDeepInfraTtsResponseFormat(talkProviderConfig.responseFormat);
|
||||
return {
|
||||
...base,
|
||||
...(talkProviderConfig.apiKey === undefined
|
||||
? {}
|
||||
: {
|
||||
apiKey: normalizeResolvedSecretInputString({
|
||||
value: talkProviderConfig.apiKey,
|
||||
path: "talk.providers.deepinfra.apiKey",
|
||||
}),
|
||||
}),
|
||||
...(trimToUndefined(talkProviderConfig.baseUrl) == null
|
||||
? {}
|
||||
: { baseUrl: normalizeDeepInfraBaseUrl(talkProviderConfig.baseUrl) }),
|
||||
...(trimToUndefined(talkProviderConfig.modelId) == null
|
||||
? {}
|
||||
: {
|
||||
model: normalizeDeepInfraModelRef(
|
||||
trimToUndefined(talkProviderConfig.modelId),
|
||||
DEFAULT_DEEPINFRA_TTS_MODEL,
|
||||
),
|
||||
}),
|
||||
...(trimToUndefined(talkProviderConfig.voiceId) == null
|
||||
? {}
|
||||
: { voice: trimToUndefined(talkProviderConfig.voiceId) }),
|
||||
...(asFiniteNumber(talkProviderConfig.speed) == null
|
||||
? {}
|
||||
: { speed: asFiniteNumber(talkProviderConfig.speed) }),
|
||||
...(responseFormat == null ? {} : { responseFormat }),
|
||||
};
|
||||
},
|
||||
resolveTalkOverrides: ({ params }) => ({
|
||||
...(trimToUndefined(params.voiceId ?? params.voice) == null
|
||||
? {}
|
||||
: { voice: trimToUndefined(params.voiceId ?? params.voice) }),
|
||||
...(trimToUndefined(params.modelId ?? params.model) == null
|
||||
? {}
|
||||
: { model: trimToUndefined(params.modelId ?? params.model) }),
|
||||
...(asFiniteNumber(params.speed) == null ? {} : { speed: asFiniteNumber(params.speed) }),
|
||||
}),
|
||||
listVoices: async () => [
|
||||
{ id: DEFAULT_DEEPINFRA_TTS_VOICE, name: DEFAULT_DEEPINFRA_TTS_VOICE },
|
||||
],
|
||||
isConfigured: ({ cfg, providerConfig }) => {
|
||||
const config = readDeepInfraTtsProviderConfig(providerConfig);
|
||||
return Boolean(resolveDeepInfraTtsApiKey({ cfg, providerConfig: config }));
|
||||
},
|
||||
synthesize: async (req) => {
|
||||
const config = readDeepInfraTtsProviderConfig(req.providerConfig);
|
||||
const overrides = readDeepInfraTtsOverrides(req.providerOverrides);
|
||||
const apiKey = resolveDeepInfraTtsApiKey({ cfg: req.cfg, providerConfig: config });
|
||||
if (!apiKey) {
|
||||
throw new Error("DeepInfra API key missing");
|
||||
}
|
||||
|
||||
const baseUrl = resolveDeepInfraTtsBaseUrl({ cfg: req.cfg, providerConfig: config });
|
||||
const responseFormat = config.responseFormat ?? "mp3";
|
||||
const speed = overrides.speed ?? config.speed;
|
||||
const { allowPrivateNetwork, headers, dispatcherPolicy } = resolveProviderHttpRequestConfig({
|
||||
baseUrl,
|
||||
defaultBaseUrl: DEEPINFRA_BASE_URL,
|
||||
allowPrivateNetwork: false,
|
||||
defaultHeaders: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
provider: "deepinfra",
|
||||
capability: "audio",
|
||||
transport: "http",
|
||||
});
|
||||
|
||||
const { response, release } = await postJsonRequest({
|
||||
url: `${baseUrl}/audio/speech`,
|
||||
headers,
|
||||
body: {
|
||||
model: normalizeDeepInfraModelRef(
|
||||
overrides.model ?? config.model,
|
||||
DEFAULT_DEEPINFRA_TTS_MODEL,
|
||||
),
|
||||
input: req.text,
|
||||
voice: overrides.voice ?? config.voice,
|
||||
response_format: responseFormat,
|
||||
...(speed == null ? {} : { speed }),
|
||||
...(config.extraBody == null ? {} : { extra_body: config.extraBody }),
|
||||
},
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn: fetch,
|
||||
allowPrivateNetwork,
|
||||
dispatcherPolicy,
|
||||
});
|
||||
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "DeepInfra TTS API error");
|
||||
return {
|
||||
audioBuffer: Buffer.from(await response.arrayBuffer()),
|
||||
outputFormat: responseFormat,
|
||||
fileExtension: responseFormatToFileExtension(responseFormat),
|
||||
voiceCompatible: responseFormat === "mp3" || responseFormat === "opus",
|
||||
};
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
16
extensions/deepinfra/tsconfig.json
Normal file
16
extensions/deepinfra/tsconfig.json
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"extends": "../tsconfig.package-boundary.base.json",
|
||||
"compilerOptions": {
|
||||
"rootDir": "."
|
||||
},
|
||||
"include": ["./*.ts", "./src/**/*.ts"],
|
||||
"exclude": [
|
||||
"./**/*.test.ts",
|
||||
"./dist/**",
|
||||
"./node_modules/**",
|
||||
"./src/test-support/**",
|
||||
"./src/**/*test-helpers.ts",
|
||||
"./src/**/*test-harness.ts",
|
||||
"./src/**/*test-support.ts"
|
||||
]
|
||||
}
|
||||
86
extensions/deepinfra/video-generation-provider.test.ts
Normal file
86
extensions/deepinfra/video-generation-provider.test.ts
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
import { beforeAll, describe, expect, it, vi } from "vitest";
|
||||
import { expectExplicitVideoGenerationCapabilities } from "../../test/helpers/media-generation/provider-capability-assertions.js";
|
||||
import {
|
||||
getProviderHttpMocks,
|
||||
installProviderHttpMockCleanup,
|
||||
} from "../../test/helpers/media-generation/provider-http-mocks.js";
|
||||
|
||||
const { postJsonRequestMock, resolveProviderHttpRequestConfigMock } = getProviderHttpMocks();
|
||||
|
||||
let buildDeepInfraVideoGenerationProvider: typeof import("./video-generation-provider.js").buildDeepInfraVideoGenerationProvider;
|
||||
|
||||
beforeAll(async () => {
|
||||
({ buildDeepInfraVideoGenerationProvider } = await import("./video-generation-provider.js"));
|
||||
});
|
||||
|
||||
installProviderHttpMockCleanup();
|
||||
|
||||
describe("deepinfra video generation provider", () => {
|
||||
it("declares explicit mode capabilities", () => {
|
||||
expectExplicitVideoGenerationCapabilities(buildDeepInfraVideoGenerationProvider());
|
||||
});
|
||||
|
||||
it("creates native text-to-video requests and returns the hosted output URL", async () => {
|
||||
const release = vi.fn(async () => {});
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
video_url: "/generated/video.mp4",
|
||||
request_id: "req_123",
|
||||
seed: 42,
|
||||
inference_status: { status: "succeeded" },
|
||||
}),
|
||||
},
|
||||
release,
|
||||
});
|
||||
|
||||
const provider = buildDeepInfraVideoGenerationProvider();
|
||||
const result = await provider.generateVideo({
|
||||
provider: "deepinfra",
|
||||
model: "deepinfra/Pixverse/Pixverse-T2V",
|
||||
prompt: "A bicycle weaving through a rainy neon street",
|
||||
cfg: {},
|
||||
aspectRatio: "16:9",
|
||||
durationSeconds: 8,
|
||||
providerOptions: {
|
||||
seed: 42,
|
||||
negative_prompt: "blur",
|
||||
style: "anime",
|
||||
},
|
||||
});
|
||||
|
||||
expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "deepinfra",
|
||||
capability: "video",
|
||||
baseUrl: "https://api.deepinfra.com/v1/inference",
|
||||
}),
|
||||
);
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://api.deepinfra.com/v1/inference/Pixverse/Pixverse-T2V",
|
||||
body: {
|
||||
prompt: "A bicycle weaving through a rainy neon street",
|
||||
aspect_ratio: "16:9",
|
||||
duration: 8,
|
||||
seed: 42,
|
||||
negative_prompt: "blur",
|
||||
style: "anime",
|
||||
},
|
||||
}),
|
||||
);
|
||||
expect(result.videos).toEqual([
|
||||
{
|
||||
url: "https://api.deepinfra.com/generated/video.mp4",
|
||||
mimeType: "video/mp4",
|
||||
fileName: "video-1.mp4",
|
||||
},
|
||||
]);
|
||||
expect(result.metadata).toEqual({
|
||||
requestId: "req_123",
|
||||
seed: 42,
|
||||
status: "succeeded",
|
||||
});
|
||||
expect(release).toHaveBeenCalledOnce();
|
||||
});
|
||||
});
|
||||
251
extensions/deepinfra/video-generation-provider.ts
Normal file
251
extensions/deepinfra/video-generation-provider.ts
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
postJsonRequest,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
|
||||
import type {
|
||||
GeneratedVideoAsset,
|
||||
VideoGenerationProvider,
|
||||
VideoGenerationRequest,
|
||||
} from "openclaw/plugin-sdk/video-generation";
|
||||
import {
|
||||
DEEPINFRA_NATIVE_BASE_URL,
|
||||
DEEPINFRA_VIDEO_ASPECT_RATIOS,
|
||||
DEEPINFRA_VIDEO_DURATIONS,
|
||||
DEEPINFRA_VIDEO_MODELS,
|
||||
DEFAULT_DEEPINFRA_VIDEO_MODEL,
|
||||
normalizeDeepInfraBaseUrl,
|
||||
normalizeDeepInfraModelRef,
|
||||
} from "./media-models.js";
|
||||
|
||||
type DeepInfraVideoStatus = {
|
||||
status?: string;
|
||||
runtime_ms?: number;
|
||||
};
|
||||
|
||||
type DeepInfraVideoResponse = {
|
||||
video_url?: string;
|
||||
seed?: number;
|
||||
request_id?: string;
|
||||
inference_status?: DeepInfraVideoStatus;
|
||||
};
|
||||
|
||||
function encodeDeepInfraModelPath(model: string): string {
|
||||
return model.split("/").map(encodeURIComponent).join("/");
|
||||
}
|
||||
|
||||
function resolveDeepInfraNativeBaseUrl(req: VideoGenerationRequest): string {
|
||||
const providerConfig = req.cfg?.models?.providers?.deepinfra as
|
||||
| (Record<string, unknown> & { baseUrl?: unknown })
|
||||
| undefined;
|
||||
const nativeBaseUrl = normalizeOptionalString(providerConfig?.nativeBaseUrl);
|
||||
if (nativeBaseUrl) {
|
||||
return normalizeDeepInfraBaseUrl(nativeBaseUrl, DEEPINFRA_NATIVE_BASE_URL);
|
||||
}
|
||||
const configuredBaseUrl = normalizeOptionalString(providerConfig?.baseUrl);
|
||||
if (configuredBaseUrl?.includes("/v1/inference")) {
|
||||
return normalizeDeepInfraBaseUrl(configuredBaseUrl, DEEPINFRA_NATIVE_BASE_URL);
|
||||
}
|
||||
return DEEPINFRA_NATIVE_BASE_URL;
|
||||
}
|
||||
|
||||
function normalizeDeepInfraVideoUrl(url: string): string {
|
||||
if (url.startsWith("http://") || url.startsWith("https://") || url.startsWith("data:")) {
|
||||
return url;
|
||||
}
|
||||
return new URL(url, "https://api.deepinfra.com").href;
|
||||
}
|
||||
|
||||
function parseVideoDataUrl(url: string): GeneratedVideoAsset | undefined {
|
||||
const match = /^data:([^;,]+);base64,(.+)$/u.exec(url);
|
||||
if (!match) {
|
||||
return undefined;
|
||||
}
|
||||
const mimeType = match[1] ?? "video/mp4";
|
||||
const ext = mimeType.includes("webm") ? "webm" : "mp4";
|
||||
return {
|
||||
buffer: Buffer.from(match[2] ?? "", "base64"),
|
||||
mimeType,
|
||||
fileName: `video-1.${ext}`,
|
||||
};
|
||||
}
|
||||
|
||||
function coerceProviderNumber(value: unknown): number | undefined {
|
||||
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function coerceProviderString(value: unknown): string | undefined {
|
||||
return normalizeOptionalString(value);
|
||||
}
|
||||
|
||||
function resolveDurationSeconds(value: number | undefined): number | undefined {
|
||||
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||
return undefined;
|
||||
}
|
||||
return value <= 6.5 ? 5 : 8;
|
||||
}
|
||||
|
||||
function buildDeepInfraVideoBody(
|
||||
req: VideoGenerationRequest,
|
||||
model: string,
|
||||
): Record<string, unknown> {
|
||||
const options = req.providerOptions ?? {};
|
||||
const body: Record<string, unknown> = {
|
||||
prompt: req.prompt,
|
||||
};
|
||||
const aspectRatio = normalizeOptionalString(req.aspectRatio);
|
||||
if (aspectRatio) {
|
||||
body.aspect_ratio = aspectRatio;
|
||||
}
|
||||
const duration = resolveDurationSeconds(req.durationSeconds);
|
||||
if (duration) {
|
||||
body.duration = duration;
|
||||
}
|
||||
const seed = coerceProviderNumber(options.seed);
|
||||
if (seed != null) {
|
||||
body.seed = seed;
|
||||
}
|
||||
const negativePrompt =
|
||||
coerceProviderString(options.negative_prompt) ?? coerceProviderString(options.negativePrompt);
|
||||
if (negativePrompt) {
|
||||
body.negative_prompt = negativePrompt;
|
||||
}
|
||||
const style = coerceProviderString(options.style);
|
||||
if (style) {
|
||||
body.style = style;
|
||||
}
|
||||
const guidanceScale =
|
||||
coerceProviderNumber(options.guidance_scale) ?? coerceProviderNumber(options.guidanceScale);
|
||||
if (guidanceScale != null && model.startsWith("Wan-AI/")) {
|
||||
body.guidance_scale = guidanceScale;
|
||||
}
|
||||
return body;
|
||||
}
|
||||
|
||||
function extractDeepInfraVideoAsset(payload: DeepInfraVideoResponse): GeneratedVideoAsset {
|
||||
const videoUrl = normalizeOptionalString(payload.video_url);
|
||||
if (!videoUrl) {
|
||||
throw new Error("DeepInfra video response missing video_url");
|
||||
}
|
||||
const normalizedUrl = normalizeDeepInfraVideoUrl(videoUrl);
|
||||
const dataAsset = parseVideoDataUrl(normalizedUrl);
|
||||
if (dataAsset) {
|
||||
return dataAsset;
|
||||
}
|
||||
return {
|
||||
url: normalizedUrl,
|
||||
mimeType: "video/mp4",
|
||||
fileName: "video-1.mp4",
|
||||
};
|
||||
}
|
||||
|
||||
function failureMessage(payload: DeepInfraVideoResponse): string | undefined {
|
||||
const status = normalizeOptionalString(payload.inference_status?.status)?.toLowerCase();
|
||||
if (status === "failed" || status === "error") {
|
||||
return "DeepInfra video generation failed";
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function buildDeepInfraVideoGenerationProvider(): VideoGenerationProvider {
|
||||
return {
|
||||
id: "deepinfra",
|
||||
label: "DeepInfra",
|
||||
defaultModel: DEFAULT_DEEPINFRA_VIDEO_MODEL,
|
||||
models: [...DEEPINFRA_VIDEO_MODELS],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "deepinfra",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
generate: {
|
||||
maxVideos: 1,
|
||||
maxDurationSeconds: 8,
|
||||
supportedDurationSeconds: [...DEEPINFRA_VIDEO_DURATIONS],
|
||||
supportsAspectRatio: true,
|
||||
aspectRatios: [...DEEPINFRA_VIDEO_ASPECT_RATIOS],
|
||||
providerOptions: {
|
||||
seed: "number",
|
||||
negative_prompt: "string",
|
||||
negativePrompt: "string",
|
||||
style: "string",
|
||||
guidance_scale: "number",
|
||||
guidanceScale: "number",
|
||||
},
|
||||
},
|
||||
imageToVideo: {
|
||||
enabled: false,
|
||||
},
|
||||
videoToVideo: {
|
||||
enabled: false,
|
||||
},
|
||||
},
|
||||
async generateVideo(req) {
|
||||
if ((req.inputImages?.length ?? 0) > 0) {
|
||||
throw new Error("DeepInfra video generation currently supports text-to-video only.");
|
||||
}
|
||||
if ((req.inputVideos?.length ?? 0) > 0) {
|
||||
throw new Error("DeepInfra video generation does not support video reference inputs.");
|
||||
}
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "deepinfra",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("DeepInfra API key missing");
|
||||
}
|
||||
|
||||
const model = normalizeDeepInfraModelRef(req.model, DEFAULT_DEEPINFRA_VIDEO_MODEL);
|
||||
const resolvedBaseUrl = resolveDeepInfraNativeBaseUrl(req);
|
||||
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
|
||||
resolveProviderHttpRequestConfig({
|
||||
baseUrl: resolvedBaseUrl,
|
||||
defaultBaseUrl: DEEPINFRA_NATIVE_BASE_URL,
|
||||
allowPrivateNetwork: false,
|
||||
defaultHeaders: {
|
||||
Authorization: `Bearer ${auth.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
provider: "deepinfra",
|
||||
capability: "video",
|
||||
transport: "http",
|
||||
});
|
||||
|
||||
const { response, release } = await postJsonRequest({
|
||||
url: `${baseUrl}/${encodeDeepInfraModelPath(model)}`,
|
||||
headers,
|
||||
body: buildDeepInfraVideoBody(req, model),
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn: fetch,
|
||||
allowPrivateNetwork,
|
||||
dispatcherPolicy,
|
||||
});
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "DeepInfra video generation failed");
|
||||
const payload = (await response.json()) as DeepInfraVideoResponse;
|
||||
const failed = failureMessage(payload);
|
||||
if (failed) {
|
||||
throw new Error(failed);
|
||||
}
|
||||
const video = extractDeepInfraVideoAsset(payload);
|
||||
return {
|
||||
videos: [video],
|
||||
model,
|
||||
metadata: {
|
||||
requestId: normalizeOptionalString(payload.request_id),
|
||||
seed: payload.seed,
|
||||
status: payload.inference_status?.status,
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -40,6 +40,7 @@ import {
|
|||
import { describe, expect, it } from "vitest";
|
||||
import alibabaPlugin from "./alibaba/index.js";
|
||||
import byteplusPlugin from "./byteplus/index.js";
|
||||
import deepinfraPlugin from "./deepinfra/index.js";
|
||||
import falPlugin from "./fal/index.js";
|
||||
import googlePlugin from "./google/index.js";
|
||||
import minimaxPlugin from "./minimax/index.js";
|
||||
|
|
@ -80,11 +81,10 @@ type LiveProviderCase = {
|
|||
providerId: string;
|
||||
};
|
||||
|
||||
type BufferedGeneratedVideo = Required<Pick<GeneratedVideoAsset, "buffer" | "mimeType">> &
|
||||
Pick<GeneratedVideoAsset, "fileName">;
|
||||
type LiveGeneratedVideo = GeneratedVideoAsset;
|
||||
|
||||
type LiveVideoAttemptStatus =
|
||||
| { status: "success"; video: BufferedGeneratedVideo }
|
||||
| { status: "success"; video: LiveGeneratedVideo }
|
||||
| { status: "skip" }
|
||||
| { status: "failure" };
|
||||
|
||||
|
|
@ -101,6 +101,12 @@ const CASES: LiveProviderCase[] = [
|
|||
pluginName: "BytePlus Provider",
|
||||
providerId: "byteplus",
|
||||
},
|
||||
{
|
||||
plugin: deepinfraPlugin,
|
||||
pluginId: "deepinfra",
|
||||
pluginName: "DeepInfra Provider",
|
||||
providerId: "deepinfra",
|
||||
},
|
||||
{ plugin: falPlugin, pluginId: "fal", pluginName: "fal Provider", providerId: "fal" },
|
||||
{ plugin: googlePlugin, pluginId: "google", pluginName: "Google Provider", providerId: "google" },
|
||||
{
|
||||
|
|
@ -184,17 +190,18 @@ function maybeLoadShellEnvForVideoProviders(providerIds: string[]): void {
|
|||
maybeLoadShellEnvForGenerationProviders(providerIds);
|
||||
}
|
||||
|
||||
function expectBufferedVideo(
|
||||
video: { buffer?: Buffer; mimeType: string; fileName?: string } | undefined,
|
||||
): BufferedGeneratedVideo {
|
||||
function expectGeneratedVideo(video: GeneratedVideoAsset | undefined): LiveGeneratedVideo {
|
||||
expect(video).toBeDefined();
|
||||
expect(video?.mimeType.startsWith("video/")).toBe(true);
|
||||
if (!video?.buffer) {
|
||||
throw new Error("expected generated video buffer");
|
||||
if (video?.buffer) {
|
||||
expect(video.buffer.byteLength).toBeGreaterThan(1024);
|
||||
return video;
|
||||
}
|
||||
const { buffer, mimeType, fileName } = video;
|
||||
expect(buffer.byteLength).toBeGreaterThan(1024);
|
||||
return { buffer, mimeType, fileName };
|
||||
if (!video?.url) {
|
||||
throw new Error("expected generated video buffer or url");
|
||||
}
|
||||
expect(video.url).toMatch(/^https?:\/\//u);
|
||||
return video;
|
||||
}
|
||||
|
||||
function buildLiveCapabilityOverrides(params: {
|
||||
|
|
@ -262,7 +269,7 @@ async function runLiveVideoAttempt(params: {
|
|||
try {
|
||||
const result = await params.provider.generateVideo(params.request);
|
||||
expect(result.videos.length).toBeGreaterThan(0);
|
||||
const video = expectBufferedVideo(result.videos[0]);
|
||||
const video = expectGeneratedVideo(result.videos[0]);
|
||||
params.attempted.push(
|
||||
`${params.providerId}:${params.mode}:${params.providerModel} (${params.authLabel})`,
|
||||
);
|
||||
|
|
@ -392,7 +399,7 @@ async function runLiveVideoProviderCase(testCase: LiveProviderCase): Promise<voi
|
|||
});
|
||||
const liveSize = testCase.providerId === "openai" ? "1280x720" : undefined;
|
||||
const logPrefix = `[live:video-generation] provider=${testCase.providerId} model=${providerModel}`;
|
||||
let generatedVideo: BufferedGeneratedVideo | null = null;
|
||||
let generatedVideo: LiveGeneratedVideo | null = null;
|
||||
|
||||
const generateAttempt = await runLiveVideoAttempt({
|
||||
authLabel,
|
||||
|
|
@ -503,7 +510,7 @@ async function runLiveVideoProviderCase(testCase: LiveProviderCase): Promise<voi
|
|||
return;
|
||||
}
|
||||
if (!generatedVideo?.buffer) {
|
||||
skipped.push(`${testCase.providerId}:videoToVideo missing generated seed video`);
|
||||
skipped.push(`${testCase.providerId}:videoToVideo missing buffer-backed generated seed video`);
|
||||
expectLiveVideoCasePassed(summaryParams);
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
6
pnpm-lock.yaml
generated
6
pnpm-lock.yaml
generated
|
|
@ -429,6 +429,12 @@ importers:
|
|||
specifier: workspace:*
|
||||
version: link:../../packages/plugin-sdk
|
||||
|
||||
extensions/deepinfra:
|
||||
devDependencies:
|
||||
'@openclaw/plugin-sdk':
|
||||
specifier: workspace:*
|
||||
version: link:../../packages/plugin-sdk
|
||||
|
||||
extensions/deepseek:
|
||||
devDependencies:
|
||||
'@openclaw/plugin-sdk':
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ export const MEDIA_SUITES: Record<MediaSuiteId, MediaSuiteConfig> = {
|
|||
id: "image",
|
||||
testFile: "test/image-generation.runtime.live.test.ts",
|
||||
providerEnvVar: "OPENCLAW_LIVE_IMAGE_GENERATION_PROVIDERS",
|
||||
providers: ["fal", "google", "minimax", "openai", "vydra", "xai"],
|
||||
providers: ["deepinfra", "fal", "google", "minimax", "openai", "vydra", "xai"],
|
||||
},
|
||||
music: {
|
||||
id: "music",
|
||||
|
|
@ -48,6 +48,7 @@ export const MEDIA_SUITES: Record<MediaSuiteId, MediaSuiteConfig> = {
|
|||
providers: [
|
||||
"alibaba",
|
||||
"byteplus",
|
||||
"deepinfra",
|
||||
"fal",
|
||||
"google",
|
||||
"minimax",
|
||||
|
|
@ -61,6 +62,7 @@ export const MEDIA_SUITES: Record<MediaSuiteId, MediaSuiteConfig> = {
|
|||
defaultProviders: [
|
||||
"alibaba",
|
||||
"byteplus",
|
||||
"deepinfra",
|
||||
"google",
|
||||
"minimax",
|
||||
"openai",
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js";
|
|||
export { parseProviderModelMap, redactLiveApiKey };
|
||||
|
||||
export const DEFAULT_LIVE_IMAGE_MODELS: Record<string, string> = {
|
||||
deepinfra: "deepinfra/black-forest-labs/FLUX-1-schnell",
|
||||
fal: "fal/fal-ai/flux/dev",
|
||||
google: "google/gemini-3.1-flash-image-preview",
|
||||
minimax: "minimax/image-01",
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ const EXPECTED_BUNDLED_VIDEO_PROVIDER_PLUGIN_IDS = [
|
|||
"alibaba",
|
||||
"byteplus",
|
||||
"comfy",
|
||||
"deepinfra",
|
||||
"fal",
|
||||
"google",
|
||||
"minimax",
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ export { parseProviderModelMap, redactLiveApiKey };
|
|||
export const DEFAULT_LIVE_VIDEO_MODELS: Record<string, string> = {
|
||||
alibaba: "alibaba/wan2.6-t2v",
|
||||
byteplus: "byteplus/seedance-1-0-lite-t2v-250428",
|
||||
deepinfra: "deepinfra/Pixverse/Pixverse-T2V",
|
||||
fal: "fal/fal-ai/minimax/video-01-live",
|
||||
google: "google/veo-3.1-fast-generate-preview",
|
||||
minimax: "minimax/MiniMax-Hailuo-2.3",
|
||||
|
|
|
|||
|
|
@ -51,6 +51,11 @@ function loadBundledProviderPlugin(
|
|||
}
|
||||
|
||||
const PROVIDER_CASES: LiveProviderCase[] = [
|
||||
{
|
||||
pluginId: "deepinfra",
|
||||
pluginName: "DeepInfra Provider",
|
||||
providerId: "deepinfra",
|
||||
},
|
||||
{
|
||||
pluginId: "fal",
|
||||
pluginName: "fal Provider",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue