diff --git a/packages/cli/src/ui/components/ModelDialog.test.tsx b/packages/cli/src/ui/components/ModelDialog.test.tsx
index 3ce25bfa9..b5900c80c 100644
--- a/packages/cli/src/ui/components/ModelDialog.test.tsx
+++ b/packages/cli/src/ui/components/ModelDialog.test.tsx
@@ -108,7 +108,7 @@ describe('', () => {
it('renders the title and help text', () => {
const { getByText } = renderComponent();
expect(getByText('Select Model')).toBeDefined();
- expect(getByText('(Press Esc to close)')).toBeDefined();
+ expect(getByText('Enter to select · Esc to close')).toBeDefined();
});
it('passes all model options to DescriptiveRadioButtonSelect', () => {
@@ -251,11 +251,12 @@ describe('', () => {
expect(props.onClose).toHaveBeenCalledTimes(1);
});
- it('does not pass onHighlight to DescriptiveRadioButtonSelect', () => {
+ it('passes onHighlight to DescriptiveRadioButtonSelect', () => {
renderComponent();
const childOnHighlight = mockedSelect.mock.calls[0][0].onHighlight;
- expect(childOnHighlight).toBeUndefined();
+ expect(childOnHighlight).toBeDefined();
+ expect(typeof childOnHighlight).toBe('function');
});
it('calls onClose prop when "escape" key is pressed', () => {
diff --git a/packages/cli/src/ui/components/ModelDialog.tsx b/packages/cli/src/ui/components/ModelDialog.tsx
index 8c102890f..056dfa571 100644
--- a/packages/cli/src/ui/components/ModelDialog.tsx
+++ b/packages/cli/src/ui/components/ModelDialog.tsx
@@ -13,8 +13,7 @@ import {
logModelSlashCommand,
type AvailableModel as CoreAvailableModel,
type ContentGeneratorConfig,
- type ContentGeneratorConfigSource,
- type ContentGeneratorConfigSources,
+ type InputModalities,
} from '@qwen-code/qwen-code-core';
import { useKeypress } from '../hooks/useKeypress.js';
import { theme } from '../semantic-colors.js';
@@ -26,57 +25,21 @@ import { MAINLINE_CODER } from '../models/availableModels.js';
import { getPersistScopeForModelSelection } from '../../config/modelProvidersScope.js';
import { t } from '../../i18n/index.js';
+function formatModalities(modalities?: InputModalities): string {
+ if (!modalities) return 'text-only';
+ const parts: string[] = [];
+ if (modalities.image) parts.push('image');
+ if (modalities.pdf) parts.push('pdf');
+ if (modalities.audio) parts.push('audio');
+ if (modalities.video) parts.push('video');
+ if (parts.length === 0) return 'text-only';
+ return `text · ${parts.join(' · ')}`;
+}
+
interface ModelDialogProps {
onClose: () => void;
}
-function formatSourceBadge(
- source: ContentGeneratorConfigSource | undefined,
-): string | undefined {
- if (!source) return undefined;
-
- switch (source.kind) {
- case 'cli':
- return source.detail ? `CLI ${source.detail}` : 'CLI';
- case 'env':
- return source.envKey ? `ENV ${source.envKey}` : 'ENV';
- case 'settings':
- return source.settingsPath
- ? `Settings ${source.settingsPath}`
- : 'Settings';
- case 'modelProviders': {
- const suffix =
- source.authType && source.modelId
- ? `${source.authType}:${source.modelId}`
- : source.authType
- ? `${source.authType}`
- : source.modelId
- ? `${source.modelId}`
- : '';
- return suffix ? `ModelProviders ${suffix}` : 'ModelProviders';
- }
- case 'default':
- return source.detail ? `Default ${source.detail}` : 'Default';
- case 'computed':
- return source.detail ? `Computed ${source.detail}` : 'Computed';
- case 'programmatic':
- return source.detail ? `Programmatic ${source.detail}` : 'Programmatic';
- case 'unknown':
- default:
- return undefined;
- }
-}
-
-function readSourcesFromConfig(config: unknown): ContentGeneratorConfigSources {
- if (!config) {
- return {};
- }
- const maybe = config as {
- getContentGeneratorConfigSources?: () => ContentGeneratorConfigSources;
- };
- return maybe.getContentGeneratorConfigSources?.() ?? {};
-}
-
function maskApiKey(apiKey: string | undefined): string {
if (!apiKey) return '(not set)';
const trimmed = apiKey.trim();
@@ -143,35 +106,26 @@ function handleModelSwitchSuccess({
);
}
-function ConfigRow({
+function formatContextWindow(size?: number): string {
+ if (!size) return '(unknown)';
+ return `${size.toLocaleString('en-US')} tokens`;
+}
+
+function DetailRow({
label,
value,
- badge,
}: {
label: string;
value: React.ReactNode;
- badge?: string;
}): React.JSX.Element {
return (
-
-
-
- {label}:
-
-
- {value}
-
+
+
+ {label}:
+
+
+ {value}
- {badge ? (
-
-
-
-
-
- {badge}
-
-
- ) : null}
);
}
@@ -183,13 +137,9 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
// Local error state for displaying errors within the dialog
const [errorMessage, setErrorMessage] = useState(null);
+ const [highlightedValue, setHighlightedValue] = useState(null);
const authType = config?.getAuthType();
- const effectiveConfig =
- (config?.getContentGeneratorConfig?.() as
- | ContentGeneratorConfig
- | undefined) ?? undefined;
- const sources = readSourcesFromConfig(config);
const availableModelEntries = useMemo(() => {
const allModels = config ? config.getAllConfiguredModels() : [];
@@ -319,6 +269,20 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
return index === -1 ? 0 : index;
}, [MODEL_OPTIONS, preferredKey]);
+ const handleHighlight = useCallback((value: string) => {
+ setHighlightedValue(value);
+ }, []);
+
+ const highlightedEntry = useMemo(() => {
+ const key = highlightedValue ?? preferredKey;
+ return availableModelEntries.find(
+ ({ authType: t2, model, isRuntime, snapshotId }) => {
+ const v = isRuntime && snapshotId ? snapshotId : `${t2}::${model.id}`;
+ return v === key;
+ },
+ );
+ }, [highlightedValue, preferredKey, availableModelEntries]);
+
const handleSelect = useCallback(
async (selected: string) => {
setErrorMessage(null);
@@ -413,35 +377,6 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
>
{t('Select Model')}
-
-
- {t('Current (effective) configuration')}
-
-
-
-
-
- {authType !== AuthType.QWEN_OAUTH && (
- <>
-
-
- >
- )}
-
-
-
{!hasModels ? (
@@ -465,12 +400,50 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
)}
+ {highlightedEntry && (
+
+
+
+
+
+ {highlightedEntry.authType !== AuthType.QWEN_OAUTH && (
+ <>
+
+
+ >
+ )}
+
+
+ )}
+
{errorMessage && (
@@ -480,7 +453,9 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
)}
- {t('(Press Esc to close)')}
+
+ {t('Enter to select · Esc to close')}
+
);
diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts
index f3af06bda..078729af6 100644
--- a/packages/core/src/core/contentGenerator.ts
+++ b/packages/core/src/core/contentGenerator.ts
@@ -60,6 +60,17 @@ export enum AuthType {
USE_ANTHROPIC = 'anthropic',
}
+/**
+ * Supported input modalities for a model.
+ * Omitted or false fields mean the model does not support that input type.
+ */
+export type InputModalities = {
+ image?: boolean;
+ pdf?: boolean;
+ audio?: boolean;
+ video?: boolean;
+};
+
export type ContentGeneratorConfig = {
model: string;
apiKey?: string;
@@ -98,6 +109,9 @@ export type ContentGeneratorConfig = {
customHeaders?: Record;
// Extra body parameters to be merged into the request body
extra_body?: Record;
+ // Supported input modalities. Unsupported media types are replaced with text
+ // placeholders. Leave undefined to use automatic detection from model name.
+ modalities?: InputModalities;
};
// Keep the public ContentGeneratorConfigSources API, but reuse the generic
diff --git a/packages/core/src/core/modalityDefaults.test.ts b/packages/core/src/core/modalityDefaults.test.ts
new file mode 100644
index 000000000..8aae4be76
--- /dev/null
+++ b/packages/core/src/core/modalityDefaults.test.ts
@@ -0,0 +1,219 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { defaultModalities } from './modalityDefaults.js';
+
+describe('defaultModalities', () => {
+ describe('Google Gemini', () => {
+ it('returns full multimodal for gemini-3-pro', () => {
+ expect(defaultModalities('gemini-3-pro-preview')).toEqual({
+ image: true,
+ pdf: true,
+ audio: true,
+ video: true,
+ });
+ });
+
+ it('returns full multimodal for gemini-3-flash', () => {
+ expect(defaultModalities('gemini-3-flash-preview')).toEqual({
+ image: true,
+ pdf: true,
+ audio: true,
+ video: true,
+ });
+ });
+
+ it('returns full multimodal for gemini-3.1-pro', () => {
+ expect(defaultModalities('gemini-3.1-pro-preview')).toEqual({
+ image: true,
+ pdf: true,
+ audio: true,
+ video: true,
+ });
+ });
+
+ it('returns full multimodal for gemini-2.5-pro', () => {
+ expect(defaultModalities('gemini-2.5-pro')).toEqual({
+ image: true,
+ pdf: true,
+ audio: true,
+ video: true,
+ });
+ });
+
+ it('returns full multimodal for gemini-1.5-flash', () => {
+ expect(defaultModalities('gemini-1.5-flash')).toEqual({
+ image: true,
+ pdf: true,
+ audio: true,
+ video: true,
+ });
+ });
+ });
+
+ describe('OpenAI', () => {
+ it('returns image for gpt-5.2', () => {
+ const m = defaultModalities('gpt-5.2');
+ expect(m.image).toBe(true);
+ expect(m.audio).toBeUndefined();
+ expect(m.pdf).toBeUndefined();
+ expect(m.video).toBeUndefined();
+ });
+
+ it('returns image for gpt-5-mini', () => {
+ expect(defaultModalities('gpt-5-mini').image).toBe(true);
+ });
+
+ it('returns image for gpt-4o', () => {
+ expect(defaultModalities('gpt-4o').image).toBe(true);
+ });
+
+ it('returns image for o3', () => {
+ expect(defaultModalities('o3').image).toBe(true);
+ });
+ });
+
+ describe('Anthropic Claude', () => {
+ it('returns image + pdf for claude-opus-4-6', () => {
+ const m = defaultModalities('claude-opus-4-6');
+ expect(m.image).toBe(true);
+ expect(m.pdf).toBe(true);
+ expect(m.audio).toBeUndefined();
+ expect(m.video).toBeUndefined();
+ });
+
+ it('returns image + pdf for claude-sonnet-4-6', () => {
+ const m = defaultModalities('claude-sonnet-4-6');
+ expect(m.image).toBe(true);
+ expect(m.pdf).toBe(true);
+ });
+
+ it('returns image + pdf for claude-sonnet-4', () => {
+ const m = defaultModalities('claude-sonnet-4');
+ expect(m.image).toBe(true);
+ expect(m.pdf).toBe(true);
+ });
+
+ it('returns image + pdf for claude-3.5-sonnet', () => {
+ const m = defaultModalities('claude-3.5-sonnet');
+ expect(m.image).toBe(true);
+ expect(m.pdf).toBe(true);
+ });
+ });
+
+ describe('Qwen', () => {
+ it('returns image + video for qwen-vl-max', () => {
+ const m = defaultModalities('qwen-vl-max');
+ expect(m.image).toBe(true);
+ expect(m.video).toBe(true);
+ expect(m.pdf).toBeUndefined();
+ expect(m.audio).toBeUndefined();
+ });
+
+ it('returns image + video for qwen3-vl-plus', () => {
+ const m = defaultModalities('qwen3-vl-plus');
+ expect(m.image).toBe(true);
+ expect(m.video).toBe(true);
+ });
+
+ it('returns image + video for vision-model', () => {
+ const m = defaultModalities('vision-model');
+ expect(m.image).toBe(true);
+ expect(m.video).toBe(true);
+ });
+
+ it('returns text-only for qwen3-coder-plus', () => {
+ expect(defaultModalities('qwen3-coder-plus')).toEqual({});
+ });
+
+ it('returns image + video for coder-model (same as qwen3.5-plus)', () => {
+ expect(defaultModalities('coder-model')).toEqual({
+ image: true,
+ video: true,
+ });
+ });
+
+ it('returns image + video for qwen3.5-plus', () => {
+ const m = defaultModalities('qwen3.5-plus');
+ expect(m.image).toBe(true);
+ expect(m.video).toBe(true);
+ expect(m.pdf).toBeUndefined();
+ expect(m.audio).toBeUndefined();
+ });
+
+ it('returns text-only for qwen-turbo', () => {
+ expect(defaultModalities('qwen-turbo')).toEqual({});
+ });
+ });
+
+ describe('DeepSeek', () => {
+ it('returns text-only for deepseek-chat', () => {
+ expect(defaultModalities('deepseek-chat')).toEqual({});
+ });
+
+ it('returns text-only for deepseek-reasoner', () => {
+ expect(defaultModalities('deepseek-reasoner')).toEqual({});
+ });
+ });
+
+ describe('Zhipu GLM', () => {
+ it('returns image for glm-4.5v', () => {
+ const m = defaultModalities('glm-4.5v');
+ expect(m.image).toBe(true);
+ expect(m.pdf).toBeUndefined();
+ });
+
+ it('returns text-only for glm-5', () => {
+ expect(defaultModalities('glm-5')).toEqual({});
+ });
+
+ it('returns text-only for glm-4.7', () => {
+ expect(defaultModalities('glm-4.7')).toEqual({});
+ });
+ });
+
+ describe('MiniMax', () => {
+ it('returns text-only for MiniMax-M2.5', () => {
+ expect(defaultModalities('MiniMax-M2.5')).toEqual({});
+ });
+ });
+
+ describe('Kimi', () => {
+ it('returns image + video for kimi-k2.5', () => {
+ const m = defaultModalities('kimi-k2.5');
+ expect(m.image).toBe(true);
+ expect(m.video).toBe(true);
+ expect(m.pdf).toBeUndefined();
+ expect(m.audio).toBeUndefined();
+ });
+
+ it('returns text-only for kimi-k2', () => {
+ expect(defaultModalities('kimi-k2')).toEqual({});
+ });
+ });
+
+ describe('unknown models', () => {
+ it('returns text-only for unrecognized models', () => {
+ expect(defaultModalities('some-random-model-xyz')).toEqual({});
+ });
+ });
+
+ describe('normalization', () => {
+ it('normalizes provider prefixes', () => {
+ expect(defaultModalities('openai/gpt-4o')).toEqual(
+ defaultModalities('gpt-4o'),
+ );
+ });
+
+ it('returns a fresh copy each time', () => {
+ const a = defaultModalities('gemini-2.5-pro');
+ const b = defaultModalities('gemini-2.5-pro');
+ expect(a).toEqual(b);
+ expect(a).not.toBe(b);
+ });
+ });
+});
diff --git a/packages/core/src/core/modalityDefaults.ts b/packages/core/src/core/modalityDefaults.ts
new file mode 100644
index 000000000..790499dfe
--- /dev/null
+++ b/packages/core/src/core/modalityDefaults.ts
@@ -0,0 +1,95 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { InputModalities } from './contentGenerator.js';
+import { normalize } from './tokenLimits.js';
+
+const FULL_MULTIMODAL: InputModalities = {
+ image: true,
+ pdf: true,
+ audio: true,
+ video: true,
+};
+
+/**
+ * Ordered regex patterns: most specific -> most general (first match wins).
+ * Default for unknown models is text-only (empty object = all false).
+ */
+const MODALITY_PATTERNS: Array<[RegExp, InputModalities]> = [
+ // -------------------
+ // Google Gemini — full multimodal
+ // -------------------
+ [/^gemini-3/, FULL_MULTIMODAL],
+ [/^gemini-/, FULL_MULTIMODAL],
+
+ // -------------------
+ // OpenAI — image by default for all gpt/o-series models
+ // -------------------
+ [/^gpt-5/, { image: true }],
+ [/^gpt-/, { image: true }],
+ [/^o\d/, { image: true }],
+
+ // -------------------
+ // Anthropic Claude — image + pdf
+ // -------------------
+ [/^claude-/, { image: true, pdf: true }],
+
+ // -------------------
+ // Alibaba / Qwen
+ // -------------------
+ // Qwen3.5-Plus: image support
+ [/^qwen3\.5-plus/, { image: true, video: true }],
+ [/^coder-model$/, { image: true, video: true }],
+
+ // Qwen VL (vision-language) models: image + video
+ [/^qwen-vl-/, { image: true, video: true }],
+ [/^qwen3-vl-/, { image: true, video: true }],
+ [/^vision-model$/, { image: true, video: true }],
+
+ // Qwen coder / text models: text-only
+ [/^qwen3-coder-/, {}],
+ [/^qwen/, {}],
+
+ // -------------------
+ // DeepSeek — text-only
+ // -------------------
+ [/^deepseek/, {}],
+
+ // -------------------
+ // Zhipu GLM
+ // -------------------
+ [/^glm-4\.5v/, { image: true }],
+ [/^glm-5(?:-|$)/, {}],
+ [/^glm-/, {}],
+
+ // -------------------
+ // MiniMax — text-only
+ // -------------------
+ [/^minimax-/, {}],
+
+ // -------------------
+ // Moonshot / Kimi
+ // -------------------
+ [/^kimi-k2\.5/, { image: true, video: true }],
+ [/^kimi-/, {}],
+];
+
+/**
+ * Return the default input modalities for a model based on its name.
+ *
+ * Uses the same normalize-then-regex pattern as {@link tokenLimit}.
+ * Unknown models default to text-only (empty object) to avoid sending
+ * unsupported media types that would cause unrecoverable API errors.
+ */
+export function defaultModalities(model: string): InputModalities {
+ const norm = normalize(model);
+ for (const [regex, modalities] of MODALITY_PATTERNS) {
+ if (regex.test(norm)) {
+ return { ...modalities };
+ }
+ }
+ return {};
+}
diff --git a/packages/core/src/core/openaiContentGenerator/converter.test.ts b/packages/core/src/core/openaiContentGenerator/converter.test.ts
index 36bbc812d..12b8b8982 100644
--- a/packages/core/src/core/openaiContentGenerator/converter.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/converter.test.ts
@@ -22,7 +22,12 @@ describe('OpenAIContentConverter', () => {
let converter: OpenAIContentConverter;
beforeEach(() => {
- converter = new OpenAIContentConverter('test-model');
+ converter = new OpenAIContentConverter('test-model', 'auto', {
+ image: true,
+ pdf: true,
+ audio: true,
+ video: true,
+ });
});
describe('resetStreamingToolCalls', () => {
@@ -1684,7 +1689,12 @@ describe('MCP tool result end-to-end through OpenAI converter (issue #1520)', ()
let converter: OpenAIContentConverter;
beforeEach(() => {
- converter = new OpenAIContentConverter('test-model');
+ converter = new OpenAIContentConverter('test-model', 'auto', {
+ image: true,
+ pdf: true,
+ audio: true,
+ video: true,
+ });
});
it('should preserve MCP multi-text content in tool message (not leak to user message)', () => {
@@ -1957,3 +1967,159 @@ describe('MCP tool result end-to-end through OpenAI converter (issue #1520)', ()
expect(contentArray[1].image_url?.url).toContain('data:image/png');
});
});
+
+describe('modality filtering', () => {
+ function makeRequest(parts: Part[]): GenerateContentParameters {
+ return {
+ model: 'test-model',
+ contents: [{ role: 'user', parts }],
+ };
+ }
+
+ function getUserContentParts(
+ messages: OpenAI.Chat.ChatCompletionMessageParam[],
+ ): Array<{ type: string; text?: string }> {
+ const userMsg = messages.find((m) => m.role === 'user');
+ if (
+ !userMsg ||
+ !('content' in userMsg) ||
+ !Array.isArray(userMsg.content)
+ ) {
+ return [];
+ }
+ return userMsg.content as Array<{ type: string; text?: string }>;
+ }
+
+ it('replaces image with placeholder when image modality is disabled', () => {
+ const conv = new OpenAIContentConverter('deepseek-chat', 'auto', {});
+ const request = makeRequest([
+ {
+ inlineData: { mimeType: 'image/png', data: 'abc123' },
+ displayName: 'screenshot.png',
+ } as unknown as Part,
+ ]);
+ const messages = conv.convertGeminiRequestToOpenAI(request);
+ const parts = getUserContentParts(messages);
+ expect(parts).toHaveLength(1);
+ expect(parts[0].type).toBe('text');
+ expect(parts[0].text).toContain('image file');
+ expect(parts[0].text).toContain('was not provided to you');
+ });
+
+ it('keeps image when image modality is enabled', () => {
+ const conv = new OpenAIContentConverter('gpt-4o', 'auto', { image: true });
+ const request = makeRequest([
+ {
+ inlineData: { mimeType: 'image/png', data: 'abc123' },
+ } as unknown as Part,
+ ]);
+ const messages = conv.convertGeminiRequestToOpenAI(request);
+ const parts = getUserContentParts(messages);
+ expect(parts).toHaveLength(1);
+ expect(parts[0].type).toBe('image_url');
+ });
+
+ it('replaces PDF with placeholder when pdf modality is disabled', () => {
+ const conv = new OpenAIContentConverter('test-model', 'auto', {
+ image: true,
+ });
+ const request = makeRequest([
+ {
+ inlineData: {
+ mimeType: 'application/pdf',
+ data: 'pdf-data',
+ displayName: 'doc.pdf',
+ },
+ } as unknown as Part,
+ ]);
+ const messages = conv.convertGeminiRequestToOpenAI(request);
+ const parts = getUserContentParts(messages);
+ expect(parts).toHaveLength(1);
+ expect(parts[0].type).toBe('text');
+ expect(parts[0].text).toContain('pdf file');
+ expect(parts[0].text).toContain('was not provided to you');
+ });
+
+ it('keeps PDF when pdf modality is enabled', () => {
+ const conv = new OpenAIContentConverter('claude-sonnet', 'auto', {
+ image: true,
+ pdf: true,
+ });
+ const request = makeRequest([
+ {
+ inlineData: {
+ mimeType: 'application/pdf',
+ data: 'pdf-data',
+ displayName: 'doc.pdf',
+ },
+ } as unknown as Part,
+ ]);
+ const messages = conv.convertGeminiRequestToOpenAI(request);
+ const parts = getUserContentParts(messages);
+ expect(parts).toHaveLength(1);
+ expect(parts[0].type).toBe('file');
+ });
+
+ it('replaces video with placeholder when video modality is disabled', () => {
+ const conv = new OpenAIContentConverter('test-model', 'auto', {});
+ const request = makeRequest([
+ {
+ inlineData: { mimeType: 'video/mp4', data: 'vid-data' },
+ } as unknown as Part,
+ ]);
+ const messages = conv.convertGeminiRequestToOpenAI(request);
+ const parts = getUserContentParts(messages);
+ expect(parts).toHaveLength(1);
+ expect(parts[0].type).toBe('text');
+ expect(parts[0].text).toContain('video file');
+ });
+
+ it('replaces audio with placeholder when audio modality is disabled', () => {
+ const conv = new OpenAIContentConverter('test-model', 'auto', {});
+ const request = makeRequest([
+ {
+ inlineData: { mimeType: 'audio/wav', data: 'audio-data' },
+ } as unknown as Part,
+ ]);
+ const messages = conv.convertGeminiRequestToOpenAI(request);
+ const parts = getUserContentParts(messages);
+ expect(parts).toHaveLength(1);
+ expect(parts[0].type).toBe('text');
+ expect(parts[0].text).toContain('audio file');
+ });
+
+ it('handles mixed content: keeps text + supported media, replaces unsupported', () => {
+ const conv = new OpenAIContentConverter('gpt-4o', 'auto', { image: true });
+ const request = makeRequest([
+ { text: 'Analyze these files' },
+ {
+ inlineData: { mimeType: 'image/png', data: 'img-data' },
+ } as unknown as Part,
+ {
+ inlineData: { mimeType: 'video/mp4', data: 'vid-data' },
+ } as unknown as Part,
+ ]);
+ const messages = conv.convertGeminiRequestToOpenAI(request);
+ const parts = getUserContentParts(messages);
+ expect(parts).toHaveLength(3);
+ expect(parts[0].type).toBe('text');
+ expect(parts[0].text).toBe('Analyze these files');
+ expect(parts[1].type).toBe('image_url');
+ expect(parts[2].type).toBe('text');
+ expect(parts[2].text).toContain('video file');
+ });
+
+ it('defaults to text-only when no modalities are specified', () => {
+ const conv = new OpenAIContentConverter('unknown-model');
+ const request = makeRequest([
+ {
+ inlineData: { mimeType: 'image/png', data: 'img-data' },
+ } as unknown as Part,
+ ]);
+ const messages = conv.convertGeminiRequestToOpenAI(request);
+ const parts = getUserContentParts(messages);
+ expect(parts).toHaveLength(1);
+ expect(parts[0].type).toBe('text');
+ expect(parts[0].text).toContain('image file');
+ });
+});
diff --git a/packages/core/src/core/openaiContentGenerator/converter.ts b/packages/core/src/core/openaiContentGenerator/converter.ts
index 2ca7428bd..38a2f7745 100644
--- a/packages/core/src/core/openaiContentGenerator/converter.ts
+++ b/packages/core/src/core/openaiContentGenerator/converter.ts
@@ -20,12 +20,16 @@ import type {
import { GenerateContentResponse, FinishReason } from '@google/genai';
import type OpenAI from 'openai';
import { safeJsonParse } from '../../utils/safeJsonParse.js';
+import { createDebugLogger } from '../../utils/debugLogger.js';
+import type { InputModalities } from '../contentGenerator.js';
import { StreamingToolCallParser } from './streamingToolCallParser.js';
import {
convertSchema,
type SchemaComplianceMode,
} from '../../utils/schemaConverter.js';
+const debugLogger = createDebugLogger('CONVERTER');
+
/**
* Extended usage type that supports both OpenAI standard format and alternative formats
* Some models return cached_tokens at the top level instead of in prompt_tokens_details
@@ -92,12 +96,18 @@ type OpenAIContentPart =
export class OpenAIContentConverter {
private model: string;
private schemaCompliance: SchemaComplianceMode;
+ private modalities: InputModalities;
private streamingToolCallParser: StreamingToolCallParser =
new StreamingToolCallParser();
- constructor(model: string, schemaCompliance: SchemaComplianceMode = 'auto') {
+ constructor(
+ model: string,
+ schemaCompliance: SchemaComplianceMode = 'auto',
+ modalities: InputModalities = {},
+ ) {
this.model = model;
this.schemaCompliance = schemaCompliance;
+ this.modalities = modalities;
}
/**
@@ -108,6 +118,13 @@ export class OpenAIContentConverter {
this.model = model;
}
+ /**
+ * Update the supported input modalities.
+ */
+ setModalities(modalities: InputModalities): void {
+ this.modalities = modalities;
+ }
+
/**
* Reset streaming tool calls parser for new stream processing
* This should be called at the beginning of each stream to prevent
@@ -585,13 +602,19 @@ export class OpenAIContentConverter {
}
/**
- * Create OpenAI media content part from Gemini part
+ * Create OpenAI media content part from Gemini part.
+ * Checks modality support before building each media type.
*/
private createMediaContentPart(part: Part): OpenAIContentPart | null {
if (part.inlineData?.mimeType && part.inlineData?.data) {
const mimeType = part.inlineData.mimeType;
const mediaType = this.getMediaType(mimeType);
+ const displayName = part.inlineData.displayName || mimeType;
+
if (mediaType === 'image') {
+ if (!this.modalities.image) {
+ return this.unsupportedModalityPlaceholder('image', displayName);
+ }
const dataUrl = `data:${mimeType};base64,${part.inlineData.data}`;
return {
type: 'image_url' as const,
@@ -600,6 +623,9 @@ export class OpenAIContentConverter {
}
if (mimeType === 'application/pdf') {
+ if (!this.modalities.pdf) {
+ return this.unsupportedModalityPlaceholder('pdf', displayName);
+ }
const filename = part.inlineData.displayName || 'document.pdf';
return {
type: 'file' as const,
@@ -611,6 +637,9 @@ export class OpenAIContentConverter {
}
if (mediaType === 'audio') {
+ if (!this.modalities.audio) {
+ return this.unsupportedModalityPlaceholder('audio', displayName);
+ }
const format = this.getAudioFormat(mimeType);
if (format) {
return {
@@ -624,6 +653,9 @@ export class OpenAIContentConverter {
}
if (mediaType === 'video') {
+ if (!this.modalities.video) {
+ return this.unsupportedModalityPlaceholder('video', displayName);
+ }
return {
type: 'video_url' as const,
video_url: {
@@ -632,12 +664,9 @@ export class OpenAIContentConverter {
};
}
- const displayName = part.inlineData.displayName
- ? ` (${part.inlineData.displayName})`
- : '';
return {
type: 'text' as const,
- text: `Unsupported inline media type: ${mimeType}${displayName}.`,
+ text: `Unsupported inline media type: ${mimeType} (${displayName}).`,
};
}
@@ -648,6 +677,9 @@ export class OpenAIContentConverter {
const mediaType = this.getMediaType(mimeType);
if (mediaType === 'image') {
+ if (!this.modalities.image) {
+ return this.unsupportedModalityPlaceholder('image', filename);
+ }
return {
type: 'image_url' as const,
image_url: { url: fileUri },
@@ -655,6 +687,9 @@ export class OpenAIContentConverter {
}
if (mimeType === 'application/pdf') {
+ if (!this.modalities.pdf) {
+ return this.unsupportedModalityPlaceholder('pdf', filename);
+ }
return {
type: 'file' as const,
file: {
@@ -665,6 +700,9 @@ export class OpenAIContentConverter {
}
if (mediaType === 'video') {
+ if (!this.modalities.video) {
+ return this.unsupportedModalityPlaceholder('video', filename);
+ }
return {
type: 'video_url' as const,
video_url: {
@@ -673,18 +711,43 @@ export class OpenAIContentConverter {
};
}
- const displayName = part.fileData.displayName
+ const displayNameStr = part.fileData.displayName
? ` (${part.fileData.displayName})`
: '';
return {
type: 'text' as const,
- text: `Unsupported file media type: ${mimeType}${displayName}.`,
+ text: `Unsupported file media type: ${mimeType}${displayNameStr}.`,
};
}
return null;
}
+ /**
+ * Create a text placeholder for unsupported modalities.
+ */
+ private unsupportedModalityPlaceholder(
+ modality: string,
+ displayName: string,
+ ): OpenAIContentPart {
+ debugLogger.warn(
+ `Model '${this.model}' does not support ${modality} input. ` +
+ `Replacing with text placeholder: ${displayName}`,
+ );
+ let hint: string;
+ if (modality === 'pdf') {
+ hint =
+ 'The content cannot be accessed by the read_file tool. Try using other tools or commands that can extract text from PDF files.';
+ } else {
+ hint =
+ 'The content cannot be accessed by the read_file tool. If you cannot find an alternative approach, let the user know you are unable to process this type of file.';
+ }
+ return {
+ type: 'text' as const,
+ text: `[The ${modality} file "${displayName}" was not provided to you. ${hint}]`,
+ };
+ }
+
/**
* Determine media type from MIME type
*/
diff --git a/packages/core/src/core/openaiContentGenerator/pipeline.test.ts b/packages/core/src/core/openaiContentGenerator/pipeline.test.ts
index 964f768a3..d71e23e91 100644
--- a/packages/core/src/core/openaiContentGenerator/pipeline.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/pipeline.test.ts
@@ -47,6 +47,7 @@ describe('ContentGenerationPipeline', () => {
// Mock converter
mockConverter = {
setModel: vi.fn(),
+ setModalities: vi.fn(),
convertGeminiRequestToOpenAI: vi.fn(),
convertOpenAIResponseToGemini: vi.fn(),
convertOpenAIChunkToGemini: vi.fn(),
@@ -104,6 +105,7 @@ describe('ContentGenerationPipeline', () => {
expect(OpenAIContentConverter).toHaveBeenCalledWith(
'test-model',
undefined,
+ {},
);
});
});
diff --git a/packages/core/src/core/openaiContentGenerator/pipeline.ts b/packages/core/src/core/openaiContentGenerator/pipeline.ts
index 1865adb48..8d2cc9fc7 100644
--- a/packages/core/src/core/openaiContentGenerator/pipeline.ts
+++ b/packages/core/src/core/openaiContentGenerator/pipeline.ts
@@ -46,6 +46,7 @@ export class ContentGenerationPipeline {
this.converter = new OpenAIContentConverter(
this.contentGeneratorConfig.model,
this.contentGeneratorConfig.schemaCompliance,
+ this.contentGeneratorConfig.modalities ?? {},
);
}
@@ -58,6 +59,7 @@ export class ContentGenerationPipeline {
// that is not valid/available for the OpenAI-compatible backend.
const effectiveModel = this.contentGeneratorConfig.model;
this.converter.setModel(effectiveModel);
+ this.converter.setModalities(this.contentGeneratorConfig.modalities ?? {});
return this.executeWithErrorHandling(
request,
userPromptId,
@@ -85,6 +87,7 @@ export class ContentGenerationPipeline {
): Promise> {
const effectiveModel = this.contentGeneratorConfig.model;
this.converter.setModel(effectiveModel);
+ this.converter.setModalities(this.contentGeneratorConfig.modalities ?? {});
return this.executeWithErrorHandling(
request,
userPromptId,
diff --git a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
index a57bbacb7..006cf1abd 100644
--- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
@@ -800,7 +800,7 @@ describe('DashScopeOpenAICompatibleProvider', () => {
const result = provider.buildRequest(request, 'test-prompt-id');
- expect(result.max_tokens).toBe(4096); // Should be limited to default output limit (4K)
+ expect(result.max_tokens).toBe(8192); // Should be limited to default output limit (8K)
});
it('should preserve other request parameters when limiting max_tokens', () => {
@@ -872,7 +872,7 @@ describe('DashScopeOpenAICompatibleProvider', () => {
],
},
],
- max_tokens: 50000,
+ max_tokens: 50000, // Exceeds the 32768 limit
};
const result = provider.buildRequest(request, 'test-prompt-id');
@@ -899,12 +899,12 @@ describe('DashScopeOpenAICompatibleProvider', () => {
],
},
],
- max_tokens: 9000,
+ max_tokens: 50000, // Exceeds the 32768 limit
};
const result = provider.buildRequest(request, 'test-prompt-id');
- expect(result.max_tokens).toBe(8192); // Limited to model's output limit (8K)
+ expect(result.max_tokens).toBe(32768); // Limited to model's output limit (32K)
expect(
(result as { vl_high_resolution_images?: boolean })
.vl_high_resolution_images,
diff --git a/packages/core/src/core/openaiContentGenerator/provider/deepseek.test.ts b/packages/core/src/core/openaiContentGenerator/provider/deepseek.test.ts
index 68693393b..9a69cd326 100644
--- a/packages/core/src/core/openaiContentGenerator/provider/deepseek.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/deepseek.test.ts
@@ -5,7 +5,6 @@
*/
import { describe, it, expect, vi, beforeEach } from 'vitest';
-import type OpenAI from 'openai';
import { DeepSeekOpenAICompatibleProvider } from './deepseek.js';
import type { ContentGeneratorConfig } from '../../contentGenerator.js';
import type { Config } from '../../../config/config.js';
@@ -18,7 +17,6 @@ vi.mock('openai', () => ({
}));
describe('DeepSeekOpenAICompatibleProvider', () => {
- let provider: DeepSeekOpenAICompatibleProvider;
let mockContentGeneratorConfig: ContentGeneratorConfig;
let mockCliConfig: Config;
@@ -34,11 +32,6 @@ describe('DeepSeekOpenAICompatibleProvider', () => {
mockCliConfig = {
getCliVersion: vi.fn().mockReturnValue('1.0.0'),
} as unknown as Config;
-
- provider = new DeepSeekOpenAICompatibleProvider(
- mockContentGeneratorConfig,
- mockCliConfig,
- );
});
describe('isDeepSeekProvider', () => {
@@ -61,72 +54,15 @@ describe('DeepSeekOpenAICompatibleProvider', () => {
});
});
- describe('buildRequest', () => {
- const userPromptId = 'prompt-123';
-
- it('converts array content into a string', () => {
- const originalRequest: OpenAI.Chat.ChatCompletionCreateParams = {
- model: 'deepseek-chat',
- messages: [
- {
- role: 'user',
- content: [
- { type: 'text', text: 'Hello' },
- { type: 'text', text: ' world' },
- ],
- },
- ],
- };
-
- const result = provider.buildRequest(originalRequest, userPromptId);
-
- expect(result.messages).toHaveLength(1);
- expect(result.messages?.[0]).toEqual({
- role: 'user',
- content: 'Hello world',
+ describe('getDefaultGenerationConfig', () => {
+ it('returns temperature 0', () => {
+ const provider = new DeepSeekOpenAICompatibleProvider(
+ mockContentGeneratorConfig,
+ mockCliConfig,
+ );
+ expect(provider.getDefaultGenerationConfig()).toEqual({
+ temperature: 0,
});
- expect(originalRequest.messages?.[0].content).toEqual([
- { type: 'text', text: 'Hello' },
- { type: 'text', text: ' world' },
- ]);
- });
-
- it('leaves string content unchanged', () => {
- const originalRequest: OpenAI.Chat.ChatCompletionCreateParams = {
- model: 'deepseek-chat',
- messages: [
- {
- role: 'user',
- content: 'Hello world',
- },
- ],
- };
-
- const result = provider.buildRequest(originalRequest, userPromptId);
-
- expect(result.messages?.[0].content).toBe('Hello world');
- });
-
- it('throws when encountering non-text multimodal parts', () => {
- const originalRequest: OpenAI.Chat.ChatCompletionCreateParams = {
- model: 'deepseek-chat',
- messages: [
- {
- role: 'user',
- content: [
- { type: 'text', text: 'Hello' },
- {
- type: 'image_url',
- image_url: { url: 'https://example.com/image.png' },
- },
- ],
- },
- ],
- };
-
- expect(() =>
- provider.buildRequest(originalRequest, userPromptId),
- ).toThrow(/only supports text content/i);
});
});
});
diff --git a/packages/core/src/core/openaiContentGenerator/provider/deepseek.ts b/packages/core/src/core/openaiContentGenerator/provider/deepseek.ts
index 9b5fd7479..0e246725f 100644
--- a/packages/core/src/core/openaiContentGenerator/provider/deepseek.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/deepseek.ts
@@ -4,7 +4,6 @@
* SPDX-License-Identifier: Apache-2.0
*/
-import type OpenAI from 'openai';
import type { Config } from '../../../config/config.js';
import type { ContentGeneratorConfig } from '../../contentGenerator.js';
import { DefaultOpenAICompatibleProvider } from './default.js';
@@ -26,58 +25,6 @@ export class DeepSeekOpenAICompatibleProvider extends DefaultOpenAICompatiblePro
return baseUrl.toLowerCase().includes('api.deepseek.com');
}
- override buildRequest(
- request: OpenAI.Chat.ChatCompletionCreateParams,
- userPromptId: string,
- ): OpenAI.Chat.ChatCompletionCreateParams {
- const baseRequest = super.buildRequest(request, userPromptId);
- if (!baseRequest.messages?.length) {
- return baseRequest;
- }
-
- const messages = baseRequest.messages.map((message) => {
- if (!('content' in message)) {
- return message;
- }
-
- const { content } = message;
-
- if (
- typeof content === 'string' ||
- content === null ||
- content === undefined
- ) {
- return message;
- }
-
- if (!Array.isArray(content)) {
- return message;
- }
-
- const text = content
- .map((part) => {
- if (part.type !== 'text') {
- throw new Error(
- `DeepSeek provider only supports text content. Found non-text part of type '${part.type}' in message with role '${message.role}'.`,
- );
- }
-
- return part.text ?? '';
- })
- .join('');
-
- return {
- ...message,
- content: text,
- } as OpenAI.Chat.ChatCompletionMessageParam;
- });
-
- return {
- ...baseRequest,
- messages,
- };
- }
-
override getDefaultGenerationConfig(): GenerateContentConfig {
return {
temperature: 0,
diff --git a/packages/core/src/core/tokenLimits.test.ts b/packages/core/src/core/tokenLimits.test.ts
index ffd71cd4b..8aa947262 100644
--- a/packages/core/src/core/tokenLimits.test.ts
+++ b/packages/core/src/core/tokenLimits.test.ts
@@ -91,183 +91,144 @@ describe('normalize', () => {
});
describe('tokenLimit', () => {
- // Test cases for each model family
describe('Google Gemini', () => {
- it('should return the correct limit for Gemini 1.5 Pro', () => {
- expect(tokenLimit('gemini-1.5-pro')).toBe(2097152);
+ it('should return 1M for Gemini 3.x (latest)', () => {
+ expect(tokenLimit('gemini-3-pro-preview')).toBe(1000000);
+ expect(tokenLimit('gemini-3-flash-preview')).toBe(1000000);
+ expect(tokenLimit('gemini-3.1-pro-preview')).toBe(1000000);
});
- it('should return the correct limit for Gemini 1.5 Flash', () => {
- expect(tokenLimit('gemini-1.5-flash')).toBe(1048576);
- });
- it('should return the correct limit for Gemini 2.5 Pro', () => {
- expect(tokenLimit('gemini-2.5-pro')).toBe(1048576);
- });
- it('should return the correct limit for Gemini 2.5 Flash', () => {
- expect(tokenLimit('gemini-2.5-flash')).toBe(1048576);
- });
- it('should return the correct limit for Gemini 2.0 Flash with image generation', () => {
- expect(tokenLimit('gemini-2.0-flash-image-generation')).toBe(32768);
- });
- it('should return the correct limit for Gemini 2.0 Flash', () => {
- expect(tokenLimit('gemini-2.0-flash')).toBe(1048576);
+
+ it('should return 1M for legacy Gemini (fallback)', () => {
+ expect(tokenLimit('gemini-2.5-pro')).toBe(1000000);
+ expect(tokenLimit('gemini-2.5-flash')).toBe(1000000);
+ expect(tokenLimit('gemini-2.0-flash')).toBe(1000000);
+ expect(tokenLimit('gemini-1.5-pro')).toBe(1000000);
+ expect(tokenLimit('gemini-1.5-flash')).toBe(1000000);
});
});
describe('OpenAI', () => {
- it('should return the correct limit for o3-mini', () => {
- expect(tokenLimit('o3-mini')).toBe(200000);
+ it('should return 400K for GPT-5.x (latest)', () => {
+ expect(tokenLimit('gpt-5')).toBe(400000);
+ expect(tokenLimit('gpt-5-mini')).toBe(400000);
+ expect(tokenLimit('gpt-5.2')).toBe(400000);
+ expect(tokenLimit('gpt-5.2-pro')).toBe(400000);
});
- it('should return the correct limit for o3 models', () => {
- expect(tokenLimit('o3')).toBe(200000);
- });
- it('should return the correct limit for o4-mini', () => {
- expect(tokenLimit('o4-mini')).toBe(200000);
- });
- it('should return the correct limit for gpt-4o-mini', () => {
- expect(tokenLimit('gpt-4o-mini')).toBe(131072);
- });
- it('should return the correct limit for gpt-4o', () => {
+
+ it('should return 128K for legacy GPT (fallback)', () => {
expect(tokenLimit('gpt-4o')).toBe(131072);
- });
- it('should return the correct limit for gpt-4.1-mini', () => {
- expect(tokenLimit('gpt-4.1-mini')).toBe(1048576);
- });
- it('should return the correct limit for gpt-4.1 models', () => {
- expect(tokenLimit('gpt-4.1')).toBe(1048576);
- });
- it('should return the correct limit for gpt-4', () => {
+ expect(tokenLimit('gpt-4o-mini')).toBe(131072);
+ expect(tokenLimit('gpt-4.1')).toBe(131072);
expect(tokenLimit('gpt-4')).toBe(131072);
});
+
+ it('should return 200K for o-series', () => {
+ expect(tokenLimit('o3')).toBe(200000);
+ expect(tokenLimit('o3-mini')).toBe(200000);
+ expect(tokenLimit('o4-mini')).toBe(200000);
+ });
});
describe('Anthropic Claude', () => {
- it('should return the correct limit for Claude 3.5 Sonnet', () => {
+ it('should return 200K for all Claude models', () => {
+ expect(tokenLimit('claude-opus-4-6')).toBe(200000);
+ expect(tokenLimit('claude-sonnet-4-6')).toBe(200000);
+ expect(tokenLimit('claude-sonnet-4')).toBe(200000);
+ expect(tokenLimit('claude-opus-4')).toBe(200000);
expect(tokenLimit('claude-3.5-sonnet')).toBe(200000);
- });
- it('should return the correct limit for Claude 3.7 Sonnet', () => {
- expect(tokenLimit('claude-3.7-sonnet')).toBe(1048576);
- });
- it('should return the correct limit for Claude Sonnet 4', () => {
- expect(tokenLimit('claude-sonnet-4')).toBe(1048576);
- });
- it('should return the correct limit for Claude Opus 4', () => {
- expect(tokenLimit('claude-opus-4')).toBe(1048576);
+ expect(tokenLimit('claude-3.7-sonnet')).toBe(200000);
});
});
describe('Alibaba Qwen', () => {
- it('should return the correct limit for qwen3-coder commercial models', () => {
- expect(tokenLimit('qwen3-coder-plus')).toBe(1048576);
- expect(tokenLimit('qwen3-coder-plus-20250601')).toBe(1048576);
- expect(tokenLimit('qwen3-coder-flash')).toBe(1048576);
- expect(tokenLimit('qwen3-coder-flash-20250601')).toBe(1048576);
+ it('should return 1M for commercial Qwen3 models', () => {
+ expect(tokenLimit('qwen3-coder-plus')).toBe(1000000);
+ expect(tokenLimit('qwen3-coder-plus-20250601')).toBe(1000000);
+ expect(tokenLimit('qwen3-coder-flash')).toBe(1000000);
+ expect(tokenLimit('qwen3.5-plus')).toBe(1000000);
+ expect(tokenLimit('coder-model')).toBe(1000000);
});
- it('should return the correct limit for qwen3-coder open source models', () => {
+ it('should return 256K for Qwen3 non-commercial models', () => {
+ expect(tokenLimit('qwen3-max')).toBe(262144);
+ expect(tokenLimit('qwen3-max-2026-01-23')).toBe(262144);
+ expect(tokenLimit('qwen3-vl-plus')).toBe(262144);
expect(tokenLimit('qwen3-coder-7b')).toBe(262144);
- expect(tokenLimit('qwen3-coder-480b-a35b-instruct')).toBe(262144);
- expect(tokenLimit('qwen3-coder-30b-a3b-instruct')).toBe(262144);
+ expect(tokenLimit('qwen3-coder-next')).toBe(262144);
});
- it('should return the correct limit for qwen3 2507 variants', () => {
- expect(tokenLimit('qwen3-some-model-2507-instruct')).toBe(262144);
+ it('should return 1M for studio latest models', () => {
+ expect(tokenLimit('qwen-plus-latest')).toBe(1000000);
+ expect(tokenLimit('qwen-flash-latest')).toBe(1000000);
});
- it('should return the correct limit for qwen2.5-1m', () => {
- expect(tokenLimit('qwen2.5-1m')).toBe(1048576);
- expect(tokenLimit('qwen2.5-1m-instruct')).toBe(1048576);
- });
-
- it('should return the correct limit for qwen2.5', () => {
- expect(tokenLimit('qwen2.5')).toBe(131072);
- expect(tokenLimit('qwen2.5-instruct')).toBe(131072);
- });
-
- it('should return the correct limit for qwen-plus', () => {
- expect(tokenLimit('qwen-plus-latest')).toBe(1048576);
- expect(tokenLimit('qwen-plus')).toBe(131072);
- });
-
- it('should return the correct limit for qwen-flash', () => {
- expect(tokenLimit('qwen-flash-latest')).toBe(1048576);
- });
-
- it('should return the correct limit for qwen-turbo', () => {
- expect(tokenLimit('qwen-turbo')).toBe(131072);
- expect(tokenLimit('qwen-turbo-latest')).toBe(131072);
- });
- });
-
- describe('ByteDance Seed-OSS', () => {
- it('should return the correct limit for seed-oss', () => {
- expect(tokenLimit('seed-oss')).toBe(524288);
- });
- });
-
- describe('Zhipu GLM', () => {
- it('should return the correct limit for glm-4.5v', () => {
- expect(tokenLimit('glm-4.5v')).toBe(65536);
- });
- it('should return the correct limit for glm-4.5-air', () => {
- expect(tokenLimit('glm-4.5-air')).toBe(131072);
- });
- it('should return the correct limit for glm-4.5', () => {
- expect(tokenLimit('glm-4.5')).toBe(131072);
- });
- it('should return the correct limit for glm-4.6', () => {
- expect(tokenLimit('glm-4.6')).toBe(202752);
+ it('should return 256K for Qwen fallback', () => {
+ expect(tokenLimit('qwen-plus')).toBe(262144);
+ expect(tokenLimit('qwen-turbo')).toBe(262144);
+ expect(tokenLimit('qwen2.5')).toBe(262144);
+ expect(tokenLimit('qwen-vl-max-latest')).toBe(262144);
+ expect(tokenLimit('vision-model')).toBe(262144);
});
});
describe('DeepSeek', () => {
- it('should return the correct limit for deepseek-r1', () => {
+ it('should return 128K for DeepSeek models', () => {
expect(tokenLimit('deepseek-r1')).toBe(131072);
- });
- it('should return the correct limit for deepseek-v3', () => {
expect(tokenLimit('deepseek-v3')).toBe(131072);
+ expect(tokenLimit('deepseek-chat')).toBe(131072);
});
- it('should return the correct limit for deepseek-v3.1', () => {
- expect(tokenLimit('deepseek-v3.1')).toBe(131072);
+ });
+
+ describe('Zhipu GLM', () => {
+ it('should return 200K for GLM-5 and GLM-4.7 (latest)', () => {
+ expect(tokenLimit('glm-5')).toBe(202752);
+ expect(tokenLimit('glm-4.7')).toBe(202752);
});
- it('should return the correct limit for deepseek-v3.2', () => {
- expect(tokenLimit('deepseek-v3.2-exp')).toBe(131072);
+
+ it('should return 200K for legacy GLM (fallback)', () => {
+ expect(tokenLimit('glm-4.5')).toBe(202752);
+ expect(tokenLimit('glm-4.5v')).toBe(202752);
+ expect(tokenLimit('glm-4.5-air')).toBe(202752);
+ });
+ });
+
+ describe('MiniMax', () => {
+ it('should return 1M for MiniMax-M2.5 (latest)', () => {
+ expect(tokenLimit('MiniMax-M2.5')).toBe(1000000);
+ });
+
+ it('should return 200K for MiniMax fallback', () => {
+ expect(tokenLimit('MiniMax-M2.1')).toBe(200000);
});
});
describe('Moonshot Kimi', () => {
- it('should return the correct limit for kimi-k2 variants', () => {
- expect(tokenLimit('kimi-k2-0905-preview')).toBe(262144); // 256K
+ it('should return 256K for Kimi models', () => {
+ expect(tokenLimit('kimi-k2.5')).toBe(262144);
expect(tokenLimit('kimi-k2-0905')).toBe(262144);
- expect(tokenLimit('kimi-k2-turbo-preview')).toBe(262144);
expect(tokenLimit('kimi-k2-turbo')).toBe(262144);
- expect(tokenLimit('kimi-k2-0711-preview')).toBe(262144);
- expect(tokenLimit('kimi-k2-instruct')).toBe(262144);
});
});
describe('Other models', () => {
- it('should return the correct limit for gpt-oss', () => {
- expect(tokenLimit('gpt-oss')).toBe(131072);
+ it('should return correct limits for other known models', () => {
+ expect(tokenLimit('seed-oss')).toBe(524288);
});
- it('should return the correct limit for llama-4-scout', () => {
- expect(tokenLimit('llama-4-scout')).toBe(10485760);
- });
- it('should return the correct limit for mistral-large-2', () => {
- expect(tokenLimit('mistral-large-2')).toBe(131072);
+
+ it('should return the default token limit for unknown models', () => {
+ expect(tokenLimit('llama-4-scout')).toBe(DEFAULT_TOKEN_LIMIT);
});
});
- // Test for default limit
it('should return the default token limit for an unknown model', () => {
expect(tokenLimit('unknown-model-v1.0')).toBe(DEFAULT_TOKEN_LIMIT);
+ expect(tokenLimit('mistral-large-2')).toBe(DEFAULT_TOKEN_LIMIT);
});
- // Test with complex model string
it('should return the correct limit for a complex model string', () => {
expect(tokenLimit(' a/b/c|GPT-4o:gpt-4o-2024-05-13-q4 ')).toBe(131072);
});
- // Test case-insensitive matching
it('should handle case-insensitive model names', () => {
expect(tokenLimit('GPT-4O')).toBe(131072);
expect(tokenLimit('CLAUDE-3.5-SONNET')).toBe(200000);
@@ -275,99 +236,96 @@ describe('tokenLimit', () => {
});
describe('tokenLimit with output type', () => {
- describe('Qwen models with output limits', () => {
- it('should return the correct output limit for qwen3-coder-plus', () => {
- expect(tokenLimit('qwen3-coder-plus', 'output')).toBe(65536);
- expect(tokenLimit('qwen3-coder-plus-20250601', 'output')).toBe(65536);
+ describe('latest models output limits', () => {
+ it('should return correct output limits for GPT-5.x', () => {
+ expect(tokenLimit('gpt-5.2', 'output')).toBe(131072);
+ expect(tokenLimit('gpt-5-mini', 'output')).toBe(131072);
});
- it('should return the correct output limit for qwen-vl-max-latest', () => {
- expect(tokenLimit('qwen-vl-max-latest', 'output')).toBe(8192);
+ it('should return correct output limits for Gemini 3.x', () => {
+ expect(tokenLimit('gemini-3-pro-preview', 'output')).toBe(65536);
+ expect(tokenLimit('gemini-3-flash-preview', 'output')).toBe(65536);
+ });
+
+ it('should return correct output limits for Claude 4.6', () => {
+ expect(tokenLimit('claude-opus-4-6', 'output')).toBe(131072);
+ expect(tokenLimit('claude-sonnet-4-6', 'output')).toBe(65536);
});
});
- describe('Default output limits', () => {
+ describe('legacy model output fallbacks', () => {
+ it('should return fallback output limits for legacy GPT', () => {
+ expect(tokenLimit('gpt-4o', 'output')).toBe(16384);
+ });
+
+ it('should return fallback output limits for legacy Gemini', () => {
+ expect(tokenLimit('gemini-2.5-pro', 'output')).toBe(8192);
+ });
+
+ it('should return fallback output limits for legacy Claude', () => {
+ expect(tokenLimit('claude-sonnet-4', 'output')).toBe(65536);
+ expect(tokenLimit('claude-opus-4', 'output')).toBe(65536);
+ });
+ });
+
+ describe('Qwen output limits', () => {
+ it('should return correct output limits for Qwen models', () => {
+ expect(tokenLimit('qwen3-coder-plus', 'output')).toBe(65536);
+ expect(tokenLimit('qwen3-coder-next', 'output')).toBe(65536);
+ expect(tokenLimit('qwen3.5-plus', 'output')).toBe(65536);
+ expect(tokenLimit('qwen3-max', 'output')).toBe(65536);
+ expect(tokenLimit('qwen3-max-2026-01-23', 'output')).toBe(65536);
+ expect(tokenLimit('qwen3-vl-plus', 'output')).toBe(32768);
+ expect(tokenLimit('qwen-vl-max-latest', 'output')).toBe(8192);
+ expect(tokenLimit('vision-model', 'output')).toBe(32768);
+ });
+ });
+
+ describe('other output limits', () => {
+ it('should return correct output limits for DeepSeek', () => {
+ expect(tokenLimit('deepseek-reasoner', 'output')).toBe(65536);
+ expect(tokenLimit('deepseek-chat', 'output')).toBe(8192);
+ });
+
+ it('should return correct output limits for GLM', () => {
+ expect(tokenLimit('glm-5', 'output')).toBe(16384);
+ expect(tokenLimit('glm-4.7', 'output')).toBe(16384);
+ });
+
+ it('should return correct output limits for MiniMax', () => {
+ expect(tokenLimit('MiniMax-M2.5', 'output')).toBe(65536);
+ });
+
+ it('should return correct output limits for Kimi', () => {
+ expect(tokenLimit('kimi-k2.5', 'output')).toBe(32768);
+ });
+ });
+
+ describe('default output limits', () => {
it('should return the default output limit for unknown models', () => {
expect(tokenLimit('unknown-model', 'output')).toBe(
DEFAULT_OUTPUT_TOKEN_LIMIT,
);
- expect(tokenLimit('gpt-4', 'output')).toBe(DEFAULT_OUTPUT_TOKEN_LIMIT);
- expect(tokenLimit('claude-3.5-sonnet', 'output')).toBe(
- DEFAULT_OUTPUT_TOKEN_LIMIT,
- );
- });
-
- it('should return the default output limit for models without specific output patterns', () => {
- expect(tokenLimit('qwen3-coder-7b', 'output')).toBe(
- DEFAULT_OUTPUT_TOKEN_LIMIT,
- );
- expect(tokenLimit('qwen-plus', 'output')).toBe(
- DEFAULT_OUTPUT_TOKEN_LIMIT,
- );
- expect(tokenLimit('qwen-vl-max', 'output')).toBe(
- DEFAULT_OUTPUT_TOKEN_LIMIT,
- );
});
});
- describe('Input vs Output limits comparison', () => {
- it('should return different limits for input vs output for qwen3-coder-plus', () => {
- expect(tokenLimit('qwen3-coder-plus', 'input')).toBe(1048576); // 1M input
- expect(tokenLimit('qwen3-coder-plus', 'output')).toBe(65536); // 64K output
+ describe('input vs output comparison', () => {
+ it('should return different limits for input vs output', () => {
+ expect(tokenLimit('qwen3-coder-plus', 'input')).toBe(1000000);
+ expect(tokenLimit('qwen3-coder-plus', 'output')).toBe(65536);
});
- it('should return different limits for input vs output for qwen-vl-max-latest', () => {
- expect(tokenLimit('qwen-vl-max-latest', 'input')).toBe(131072); // 128K input
- expect(tokenLimit('qwen-vl-max-latest', 'output')).toBe(8192); // 8K output
- });
-
- it('should return different limits for input vs output for qwen3-vl-plus', () => {
- expect(tokenLimit('qwen3-vl-plus', 'input')).toBe(262144); // 256K input
- expect(tokenLimit('qwen3-vl-plus', 'output')).toBe(32768); // 32K output
- });
-
- it('should return same default limits for unknown models', () => {
- expect(tokenLimit('unknown-model', 'input')).toBe(DEFAULT_TOKEN_LIMIT); // 128K input
- expect(tokenLimit('unknown-model', 'output')).toBe(
- DEFAULT_OUTPUT_TOKEN_LIMIT,
- ); // 4K output
- });
- });
-
- describe('Backward compatibility', () => {
it('should default to input type when no type is specified', () => {
- expect(tokenLimit('qwen3-coder-plus')).toBe(1048576); // Should be input limit
- expect(tokenLimit('qwen-vl-max-latest')).toBe(131072); // Should be input limit
- expect(tokenLimit('unknown-model')).toBe(DEFAULT_TOKEN_LIMIT); // Should be input default
- });
-
- it('should work with explicit input type', () => {
- expect(tokenLimit('qwen3-coder-plus', 'input')).toBe(1048576);
- expect(tokenLimit('qwen-vl-max-latest', 'input')).toBe(131072);
- expect(tokenLimit('unknown-model', 'input')).toBe(DEFAULT_TOKEN_LIMIT);
+ expect(tokenLimit('qwen3-coder-plus')).toBe(1000000);
+ expect(tokenLimit('unknown-model')).toBe(DEFAULT_TOKEN_LIMIT);
});
});
- describe('Model normalization with output limits', () => {
+ describe('normalization with output limits', () => {
it('should handle normalized model names for output limits', () => {
expect(tokenLimit('QWEN3-CODER-PLUS', 'output')).toBe(65536);
expect(tokenLimit('qwen3-coder-plus-20250601', 'output')).toBe(65536);
expect(tokenLimit('QWEN-VL-MAX-LATEST', 'output')).toBe(8192);
});
-
- it('should handle complex model strings for output limits', () => {
- expect(
- tokenLimit(
- ' a/b/c|QWEN3-CODER-PLUS:qwen3-coder-plus-2024-05-13 ',
- 'output',
- ),
- ).toBe(65536);
- expect(
- tokenLimit(
- 'provider/qwen-vl-max-latest:qwen-vl-max-latest-v1',
- 'output',
- ),
- ).toBe(8192);
- });
});
});
diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts
index ae6cbd9e2..7d18497b7 100644
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@@ -9,23 +9,23 @@ type TokenCount = number;
export type TokenLimitType = 'input' | 'output';
export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two)
-export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 4_096; // 4K tokens
+export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 8_192; // 8K tokens
/**
* Accurate numeric limits:
* - power-of-two approximations (128K -> 131072, 256K -> 262144, etc.)
- * - vendor-declared exact values (e.g., 200k -> 200000) are used as stated in docs.
+ * - vendor-declared exact values (e.g., 200k -> 200000, 1m -> 1000000) are
+ * used as stated in docs.
*/
const LIMITS = {
'32k': 32_768,
'64k': 65_536,
'128k': 131_072,
- '200k': 200_000, // vendor-declared decimal, used by OpenAI, Anthropic, GLM etc.
+ '200k': 200_000, // vendor-declared decimal, used by OpenAI, Anthropic, etc.
'256k': 262_144,
+ '400k': 400_000, // vendor-declared decimal, used by OpenAI GPT-5.x
'512k': 524_288,
- '1m': 1_048_576,
- '2m': 2_097_152,
- '10m': 10_485_760, // 10 million tokens
+ '1m': 1_000_000,
// Output token limits (typically much smaller than input limits)
'4k': 4_096,
'8k': 8_192,
@@ -81,113 +81,67 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
// -------------------
// Google Gemini
// -------------------
- [/^gemini-1\.5-pro$/, LIMITS['2m']],
- [/^gemini-1\.5-flash$/, LIMITS['1m']],
- [/^gemini-2\.5-pro.*$/, LIMITS['1m']],
- [/^gemini-2\.5-flash.*$/, LIMITS['1m']],
- [/^gemini-2\.0-flash-image-generation$/, LIMITS['32k']],
- [/^gemini-2\.0-flash.*$/, LIMITS['1m']],
+ [/^gemini-3/, LIMITS['1m']], // Gemini 3.x (Pro, Flash, 3.1, etc.): 1M
+ [/^gemini-/, LIMITS['1m']], // Gemini fallback (1.5, 2.x): 1M
// -------------------
- // OpenAI (o3 / o4-mini / gpt-4.1 / gpt-4o family)
- // o3 and o4-mini document a 200,000-token context window (decimal).
- // Note: GPT-4.1 models typically report 1_048_576 (1M) context in OpenAI announcements.
- [/^o3(?:-mini|$).*$/, LIMITS['200k']],
- [/^o3.*$/, LIMITS['200k']],
- [/^o4-mini.*$/, LIMITS['200k']],
- [/^gpt-4\.1-mini.*$/, LIMITS['1m']],
- [/^gpt-4\.1.*$/, LIMITS['1m']],
- [/^gpt-4o-mini.*$/, LIMITS['128k']],
- [/^gpt-4o.*$/, LIMITS['128k']],
- [/^gpt-4.*$/, LIMITS['128k']],
+ // OpenAI
+ // -------------------
+ [/^gpt-5/, LIMITS['400k']], // GPT-5.x: 400K
+ [/^gpt-/, LIMITS['128k']], // GPT fallback (4o, 4.1, etc.): 128K
+ [/^o\d/, LIMITS['200k']], // o-series (o3, o4-mini, etc.): 200K
// -------------------
// Anthropic Claude
- // - Claude Sonnet / Sonnet 3.5 and related Sonnet variants: 200,000 tokens documented.
- // - Some Sonnet/Opus models offer 1M in beta/enterprise tiers (handled separately if needed).
- [/^claude-3\.5-sonnet.*$/, LIMITS['200k']],
- [/^claude-3\.7-sonnet.*$/, LIMITS['1m']], // some Sonnet 3.7/Opus variants advertise 1M beta in docs
- [/^claude-sonnet-4.*$/, LIMITS['1m']],
- [/^claude-opus-4.*$/, LIMITS['1m']],
+ // -------------------
+ [/^claude-/, LIMITS['200k']], // All Claude models: 200K
// -------------------
// Alibaba / Qwen
// -------------------
- // Commercial Qwen3-Coder-Plus: 1M token context
- [/^qwen3-coder-plus(-.*)?$/, LIMITS['1m']], // catches "qwen3-coder-plus" and date variants
-
- // Commercial Qwen3-Coder-Flash: 1M token context
- [/^qwen3-coder-flash(-.*)?$/, LIMITS['1m']], // catches "qwen3-coder-flash" and date variants
-
- // Commercial Qwen3.5-Plus: 1M token context
- [/^qwen3\.5-plus(-.*)?$/, LIMITS['1m']], // catches "qwen3.5-plus" and date variants
-
- // Generic coder-model: same as qwen3.5-plus (1M token context)
+ // Commercial API models (1,000,000 context)
+ [/^qwen3-coder-plus/, LIMITS['1m']],
+ [/^qwen3-coder-flash/, LIMITS['1m']],
+ [/^qwen3\.5-plus/, LIMITS['1m']],
[/^coder-model$/, LIMITS['1m']],
-
- // Commercial Qwen3-Max-Preview: 256K token context
- [/^qwen3-max(-preview)?(-.*)?$/, LIMITS['256k']], // catches "qwen3-max" or "qwen3-max-preview" and date variants
-
- // Open-source Qwen3-Coder variants: 256K native
- [/^qwen3-coder-.*$/, LIMITS['256k']],
- // Open-source Qwen3 2507 variants: 256K native
- [/^qwen3-.*-2507-.*$/, LIMITS['256k']],
-
- // Open-source long-context Qwen2.5-1M
- [/^qwen2\.5-1m.*$/, LIMITS['1m']],
-
- // Standard Qwen2.5: 128K
- [/^qwen2\.5.*$/, LIMITS['128k']],
-
- // Studio commercial Qwen-Plus / Qwen-Flash / Qwen-Turbo
- [/^qwen-plus-latest$/, LIMITS['1m']], // Commercial latest: 1M
- [/^qwen-plus.*$/, LIMITS['128k']], // Standard: 128K
+ // Commercial API models (256K context)
+ [/^qwen3-max/, LIMITS['256k']],
+ [/^qwen3-vl-plus$/, LIMITS['256k']],
+ [/^vision-model$/, LIMITS['256k']],
+ // Open-source Qwen3 variants: 256K native
+ [/^qwen3-coder-/, LIMITS['256k']],
+ // Studio commercial Qwen-Plus / Qwen-Flash
+ [/^qwen-plus-latest$/, LIMITS['1m']],
[/^qwen-flash-latest$/, LIMITS['1m']],
- [/^qwen-turbo.*$/, LIMITS['128k']],
-
- // Qwen Vision Models
- [/^qwen3-vl-plus$/, LIMITS['256k']], // Qwen3-VL-Plus: 256K input
- [/^qwen-vl-max.*$/, LIMITS['128k']],
-
- // Generic vision-model: same as qwen-vl-max (128K token context)
- [/^vision-model$/, LIMITS['128k']],
-
- // -------------------
- // ByteDance Seed-OSS (512K)
- // -------------------
- [/^seed-oss.*$/, LIMITS['512k']],
-
- // -------------------
- // Zhipu GLM
- // -------------------
- [/^glm-4\.5v(?:-.*)?$/, LIMITS['64k']],
- [/^glm-4\.5-air(?:-.*)?$/, LIMITS['128k']],
- [/^glm-4\.5(?:-.*)?$/, LIMITS['128k']],
- [/^glm-4\.6(?:-.*)?$/, 202_752 as unknown as TokenCount], // exact limit from the model config file
- [/^glm-4\.7(?:-.*)?$/, LIMITS['200k']],
+ // Qwen fallback (VL, turbo, plus, 2.5, etc.): 128K
+ [/^qwen/, LIMITS['256k']],
// -------------------
// DeepSeek
// -------------------
- [/^deepseek(?:-.*)?$/, LIMITS['128k']],
+ [/^deepseek/, LIMITS['128k']],
// -------------------
- // Moonshot / Kimi
+ // Zhipu GLM
// -------------------
- [/^kimi-2\.5.*$/, LIMITS['256k']], // Kimi-2.5: 256K context
- [/^kimi-k2.*$/, LIMITS['256k']], // Kimi-k2 variants: 256K context
-
- // -------------------
- // GPT-OSS / Llama & Mistral examples
- // -------------------
- [/^gpt-oss.*$/, LIMITS['128k']],
- [/^llama-4-scout.*$/, LIMITS['10m']],
- [/^mistral-large-2.*$/, LIMITS['128k']],
+ [/^glm-5/, 202_752 as TokenCount], // GLM-5: exact vendor limit
+ [/^glm-/, 202_752 as TokenCount], // GLM fallback: 128K
// -------------------
// MiniMax
// -------------------
- [/^minimax-m2\.1.*$/i, LIMITS['200k']], // MiniMax-M2.1: 200K context
+ [/^minimax-m2\.5/i, LIMITS['1m']], // MiniMax-M2.5: 1,000,000
+ [/^minimax-/i, LIMITS['200k']], // MiniMax fallback: 200K
+
+ // -------------------
+ // Moonshot / Kimi
+ // -------------------
+ [/^kimi-/, LIMITS['256k']], // Kimi fallback: 256K
+
+ // -------------------
+ // Other
+ // -------------------
+ [/^seed-oss/, LIMITS['512k']],
];
/**
@@ -196,35 +150,40 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
* in a single response for specific models.
*/
const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
- // -------------------
- // Alibaba / Qwen - DashScope Models
- // -------------------
- // Qwen3-Coder-Plus: 65,536 max output tokens
- [/^qwen3-coder-plus(-.*)?$/, LIMITS['64k']],
+ // Google Gemini
+ [/^gemini-3/, LIMITS['64k']], // Gemini 3.x: 64K
+ [/^gemini-/, LIMITS['8k']], // Gemini fallback: 8K
- // Qwen3.5-Plus: 65,536 max output tokens
- [/^qwen3\.5-plus(-.*)?$/, LIMITS['64k']],
+ // OpenAI
+ [/^gpt-5/, LIMITS['128k']], // GPT-5.x: 128K
+ [/^gpt-/, LIMITS['16k']], // GPT fallback: 16K
+ [/^o\d/, LIMITS['128k']], // o-series: 128K
- // Generic coder-model: same as qwen3.5-plus (64K max output tokens)
+ // Anthropic Claude
+ [/^claude-opus-4-6/, LIMITS['128k']], // Opus 4.6: 128K
+ [/^claude-sonnet-4-6/, LIMITS['64k']], // Sonnet 4.6: 64K
+ [/^claude-/, LIMITS['64k']], // Claude fallback: 64K
+
+ // Alibaba / Qwen
+ [/^qwen3\.5/, LIMITS['64k']],
[/^coder-model$/, LIMITS['64k']],
-
- // Qwen3-Max: 65,536 max output tokens
- [/^qwen3-max(-preview)?(-.*)?$/, LIMITS['64k']],
-
- // Qwen-VL-Max-Latest: 8,192 max output tokens
- [/^qwen-vl-max-latest$/, LIMITS['8k']],
-
- // Generic vision-model: same as qwen-vl-max-latest (8K max output tokens)
- [/^vision-model$/, LIMITS['8k']],
-
- // Qwen3-VL-Plus: 32K max output tokens
[/^qwen3-vl-plus$/, LIMITS['32k']],
+ [/^vision-model$/, LIMITS['32k']],
+ [/^qwen3-/, LIMITS['64k']],
- // Deepseek-chat: 8k max tokens
- [/^deepseek-chat$/, LIMITS['8k']],
+ // DeepSeek
+ [/^deepseek-reasoner/, LIMITS['64k']],
+ [/^deepseek-chat/, LIMITS['8k']],
- // Deepseek-reasoner: 64k max tokens
- [/^deepseek-reasoner$/, LIMITS['64k']],
+ // Zhipu GLM
+ [/^glm-5/, LIMITS['16k']],
+ [/^glm-4\.7/, LIMITS['16k']],
+
+ // MiniMax
+ [/^minimax-m2\.5/i, LIMITS['64k']],
+
+ // Kimi
+ [/^kimi-k2\.5/, LIMITS['32k']],
];
/**
diff --git a/packages/core/src/models/constants.ts b/packages/core/src/models/constants.ts
index 9e5d15009..4ed57ae42 100644
--- a/packages/core/src/models/constants.ts
+++ b/packages/core/src/models/constants.ts
@@ -28,6 +28,7 @@ export const MODEL_GENERATION_CONFIG_FIELDS = [
'contextWindowSize',
'customHeaders',
'extra_body',
+ 'modalities',
] as const satisfies ReadonlyArray;
/**
@@ -107,7 +108,7 @@ export const QWEN_OAUTH_MODELS: ModelConfig[] = [
name: 'coder-model',
description:
'Qwen 3.5 Plus — efficient hybrid model with leading coding performance',
- capabilities: { vision: false },
+ capabilities: { vision: true },
},
{
id: 'vision-model',
diff --git a/packages/core/src/models/modelRegistry.ts b/packages/core/src/models/modelRegistry.ts
index 7b9bdad77..c2815fb32 100644
--- a/packages/core/src/models/modelRegistry.ts
+++ b/packages/core/src/models/modelRegistry.ts
@@ -5,6 +5,8 @@
*/
import { AuthType } from '../core/contentGenerator.js';
+import { defaultModalities } from '../core/modalityDefaults.js';
+import { tokenLimit } from '../core/tokenLimits.js';
import { DEFAULT_OPENAI_BASE_URL } from '../core/openaiContentGenerator/constants.js';
import {
type ModelConfig,
@@ -121,7 +123,12 @@ export class ModelRegistry {
capabilities: model.capabilities,
authType: model.authType,
isVision: model.capabilities?.vision ?? false,
- contextWindowSize: model.generationConfig.contextWindowSize,
+ contextWindowSize:
+ model.generationConfig.contextWindowSize ?? tokenLimit(model.id),
+ modalities:
+ model.generationConfig.modalities ?? defaultModalities(model.id),
+ baseUrl: model.baseUrl,
+ envKey: model.envKey,
}));
}
diff --git a/packages/core/src/models/modelsConfig.ts b/packages/core/src/models/modelsConfig.ts
index 9311c9279..3b53c868c 100644
--- a/packages/core/src/models/modelsConfig.ts
+++ b/packages/core/src/models/modelsConfig.ts
@@ -11,6 +11,7 @@ import type { ContentGeneratorConfig } from '../core/contentGenerator.js';
import type { ContentGeneratorConfigSources } from '../core/contentGenerator.js';
import { DEFAULT_QWEN_MODEL } from '../config/models.js';
import { tokenLimit } from '../core/tokenLimits.js';
+import { defaultModalities } from '../core/modalityDefaults.js';
import { ModelRegistry } from './modelRegistry.js';
import {
@@ -769,6 +770,15 @@ export class ModelsConfig {
detail: 'auto-detected from model',
};
}
+
+ // modalities fallback: auto-detect from model when not set by provider
+ if (gc.modalities === undefined) {
+ this._generationConfig.modalities = defaultModalities(model.id);
+ this.generationConfigSources['modalities'] = {
+ kind: 'computed',
+ detail: 'auto-detected from model',
+ };
+ }
}
/**
diff --git a/packages/core/src/models/types.ts b/packages/core/src/models/types.ts
index 69c286729..5c9c9b51d 100644
--- a/packages/core/src/models/types.ts
+++ b/packages/core/src/models/types.ts
@@ -7,6 +7,7 @@
import type {
AuthType,
ContentGeneratorConfig,
+ InputModalities,
} from '../core/contentGenerator.js';
import type { ConfigSources } from '../utils/configResolver.js';
@@ -35,6 +36,7 @@ export type ModelGenerationConfig = Pick<
| 'customHeaders'
| 'extra_body'
| 'contextWindowSize'
+ | 'modalities'
>;
/**
@@ -93,6 +95,9 @@ export interface AvailableModel {
authType: AuthType;
isVision?: boolean;
contextWindowSize?: number;
+ modalities?: InputModalities;
+ baseUrl?: string;
+ envKey?: string;
/** Whether this is a runtime model (not from modelProviders) */
isRuntimeModel?: boolean;