diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md index 9cbbe0387..7df625a69 100644 --- a/docs/users/configuration/settings.md +++ b/docs/users/configuration/settings.md @@ -96,18 +96,18 @@ Settings are organized into categories. All settings should be placed within the #### model -| Setting | Type | Description | Default | -| -------------------------------------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | -| `model.name` | string | The Qwen model to use for conversations. | `undefined` | -| `model.maxSessionTurns` | number | Maximum number of user/model/tool turns to keep in a session. -1 means unlimited. | `-1` | -| `model.summarizeToolOutput` | object | Enables or disables the summarization of tool output. You can specify the token budget for the summarization using the `tokenBudget` setting. Note: Currently only the `run_shell_command` tool is supported. For example `{"run_shell_command": {"tokenBudget": 2000}}` | `undefined` | -| `model.generationConfig` | object | Advanced overrides passed to the underlying content generator. Supports request controls such as `timeout`, `maxRetries`, `disableCacheControl`, and `customHeaders` (custom HTTP headers for API requests), along with fine-tuning knobs under `samplingParams` (for example `temperature`, `top_p`, `max_tokens`). Leave unset to rely on provider defaults. | `undefined` | -| `model.chatCompression.contextPercentageThreshold` | number | Sets the threshold for chat history compression as a percentage of the model's total token limit. This is a value between 0 and 1 that applies to both automatic compression and the manual `/compress` command. For example, a value of `0.6` will trigger compression when the chat history exceeds 60% of the token limit. Use `0` to disable compression entirely. | `0.7` | -| `model.skipNextSpeakerCheck` | boolean | Skip the next speaker check. | `false` | -| `model.skipLoopDetection` | boolean | Disables loop detection checks. Loop detection prevents infinite loops in AI responses but can generate false positives that interrupt legitimate workflows. Enable this option if you experience frequent false positive loop detection interruptions. | `false` | -| `model.skipStartupContext` | boolean | Skips sending the startup workspace context (environment summary and acknowledgement) at the beginning of each session. Enable this if you prefer to provide context manually or want to save tokens on startup. | `false` | -| `model.enableOpenAILogging` | boolean | Enables logging of OpenAI API calls for debugging and analysis. When enabled, API requests and responses are logged to JSON files. | `false` | -| `model.openAILoggingDir` | string | Custom directory path for OpenAI API logs. If not specified, defaults to `logs/openai` in the current working directory. Supports absolute paths, relative paths (resolved from current working directory), and `~` expansion (home directory). | `undefined` | +| Setting | Type | Description | Default | +| -------------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | +| `model.name` | string | The Qwen model to use for conversations. | `undefined` | +| `model.maxSessionTurns` | number | Maximum number of user/model/tool turns to keep in a session. -1 means unlimited. | `-1` | +| `model.summarizeToolOutput` | object | Enables or disables the summarization of tool output. You can specify the token budget for the summarization using the `tokenBudget` setting. Note: Currently only the `run_shell_command` tool is supported. For example `{"run_shell_command": {"tokenBudget": 2000}}` | `undefined` | +| `model.generationConfig` | object | Advanced overrides passed to the underlying content generator. Supports request controls such as `timeout`, `maxRetries`, `disableCacheControl`, `customHeaders` (custom HTTP headers for API requests), and `extra_body` (additional body parameters for OpenAI-compatible API requests only), along with fine-tuning knobs under `samplingParams` (for example `temperature`, `top_p`, `max_tokens`). Leave unset to rely on provider defaults. | `undefined` | +| `model.chatCompression.contextPercentageThreshold` | number | Sets the threshold for chat history compression as a percentage of the model's total token limit. This is a value between 0 and 1 that applies to both automatic compression and the manual `/compress` command. For example, a value of `0.6` will trigger compression when the chat history exceeds 60% of the token limit. Use `0` to disable compression entirely. | `0.7` | +| `model.skipNextSpeakerCheck` | boolean | Skip the next speaker check. | `false` | +| `model.skipLoopDetection` | boolean | Disables loop detection checks. Loop detection prevents infinite loops in AI responses but can generate false positives that interrupt legitimate workflows. Enable this option if you experience frequent false positive loop detection interruptions. | `false` | +| `model.skipStartupContext` | boolean | Skips sending the startup workspace context (environment summary and acknowledgement) at the beginning of each session. Enable this if you prefer to provide context manually or want to save tokens on startup. | `false` | +| `model.enableOpenAILogging` | boolean | Enables logging of OpenAI API calls for debugging and analysis. When enabled, API requests and responses are logged to JSON files. | `false` | +| `model.openAILoggingDir` | string | Custom directory path for OpenAI API logs. If not specified, defaults to `logs/openai` in the current working directory. Supports absolute paths, relative paths (resolved from current working directory), and `~` expansion (home directory). | `undefined` | **Example model.generationConfig:** @@ -121,6 +121,9 @@ Settings are organized into categories. All settings should be placed within the "X-Request-ID": "req-123", "X-User-ID": "user-456" }, + "extra_body": { + "enable_thinking": true + }, "samplingParams": { "temperature": 0.2, "top_p": 0.8, @@ -133,6 +136,8 @@ Settings are organized into categories. All settings should be placed within the The `customHeaders` field allows you to add custom HTTP headers to all API requests. This is useful for request tracing, monitoring, API gateway routing, or when different models require different headers. If `customHeaders` is defined in `modelProviders[].generationConfig.customHeaders`, it will be used directly; otherwise, headers from `model.generationConfig.customHeaders` will be used. No merging occurs between the two levels. +The `extra_body` field allows you to add custom parameters to the request body sent to the API. This is useful for provider-specific options that are not covered by the standard configuration fields. **Note: This field is only supported for OpenAI-compatible providers (`openai`, `qwen-oauth`). It is ignored for Anthropic and Gemini providers.** If `extra_body` is defined in `modelProviders[].generationConfig.extra_body`, it will be used directly; otherwise, values from `model.generationConfig.extra_body` will be used. + **model.openAILoggingDir examples:** - `"~/qwen-logs"` - Logs to `~/qwen-logs` directory @@ -161,6 +166,9 @@ Use `modelProviders` to declare curated model lists per auth type that the `/mod "X-Model-Version": "v1.0", "X-Request-Priority": "high" }, + "extra_body": { + "enable_thinking": true + }, "samplingParams": { "temperature": 0.2 } } } @@ -222,7 +230,7 @@ Per-field precedence for `generationConfig`: 3. `settings.model.generationConfig` 4. Content-generator defaults (`getDefaultGenerationConfig` for OpenAI, `getParameterValue` for Gemini, etc.) -`samplingParams` and `customHeaders` are both treated atomically; provider values replace the entire object. If `modelProviders[].generationConfig` defines these fields, they are used directly; otherwise, values from `model.generationConfig` are used. No merging occurs between provider and global configuration levels. Defaults from the content generator apply last so each provider retains its tuned baseline. +`samplingParams`, `customHeaders`, and `extra_body` are all treated atomically; provider values replace the entire object. If `modelProviders[].generationConfig` defines these fields, they are used directly; otherwise, values from `model.generationConfig` are used. No merging occurs between provider and global configuration levels. Defaults from the content generator apply last so each provider retains its tuned baseline. ##### Selection persistence and recommendations diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts index 8849400a5..aaa76e396 100644 --- a/packages/core/src/core/contentGenerator.ts +++ b/packages/core/src/core/contentGenerator.ts @@ -93,6 +93,8 @@ export type ContentGeneratorConfig = { schemaCompliance?: 'auto' | 'openapi_30'; // Custom HTTP headers to be sent with requests customHeaders?: Record; + // Extra body parameters to be merged into the request body + extra_body?: Record; }; // Keep the public ContentGeneratorConfigSources API, but reuse the generic diff --git a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts index 09f4c83ca..107c2fcba 100644 --- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts +++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts @@ -929,5 +929,71 @@ describe('DashScopeOpenAICompatibleProvider', () => { expect(result.max_tokens).toBe(65536); // Should be limited expect(result.stream).toBe(true); // Streaming should be preserved }); + + it('should merge extra_body into the request', () => { + const providerWithExtraBody = new DashScopeOpenAICompatibleProvider( + { + ...mockContentGeneratorConfig, + extra_body: { + custom_param: 'custom_value', + nested: { key: 'value' }, + }, + }, + mockCliConfig, + ); + + const request: OpenAI.Chat.ChatCompletionCreateParams = { + model: 'qwen3-coder-plus', + messages: [{ role: 'user', content: 'Hello' }], + }; + + const result = providerWithExtraBody.buildRequest( + request, + 'test-prompt-id', + ); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + expect((result as any).custom_param).toBe('custom_value'); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + expect((result as any).nested).toEqual({ key: 'value' }); + }); + + it('should merge extra_body into vision model requests', () => { + const providerWithExtraBody = new DashScopeOpenAICompatibleProvider( + { + ...mockContentGeneratorConfig, + extra_body: { + custom_param: 'custom_value', + }, + }, + mockCliConfig, + ); + + const request: OpenAI.Chat.ChatCompletionCreateParams = { + model: 'qwen-vl-max', + messages: [{ role: 'user', content: 'Hello' }], + }; + + const result = providerWithExtraBody.buildRequest( + request, + 'test-prompt-id', + ); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + expect((result as any).custom_param).toBe('custom_value'); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + expect((result as any).vl_high_resolution_images).toBe(true); + }); + + it('should not include extra_body when not configured', () => { + const request: OpenAI.Chat.ChatCompletionCreateParams = { + model: 'qwen3-coder-plus', + messages: [{ role: 'user', content: 'Hello' }], + }; + + const result = provider.buildRequest(request, 'test-prompt-id'); + + expect(result).not.toHaveProperty('custom_param'); + }); }); }); diff --git a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts index 0a8458e0a..5ebccdf8f 100644 --- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts +++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts @@ -124,6 +124,8 @@ export class DashScopeOpenAICompatibleProvider request.model, ); + const extraBody = this.contentGeneratorConfig.extra_body; + if (this.isVisionModel(request.model)) { return { ...requestWithTokenLimits, @@ -132,6 +134,7 @@ export class DashScopeOpenAICompatibleProvider ...(this.buildMetadata(userPromptId) || {}), /* @ts-expect-error dashscope exclusive */ vl_high_resolution_images: true, + ...(extraBody ? extraBody : {}), } as OpenAI.Chat.ChatCompletionCreateParams; } @@ -140,6 +143,7 @@ export class DashScopeOpenAICompatibleProvider messages, ...(tools ? { tools } : {}), ...(this.buildMetadata(userPromptId) || {}), + ...(extraBody ? extraBody : {}), } as OpenAI.Chat.ChatCompletionCreateParams; } diff --git a/packages/core/src/core/openaiContentGenerator/provider/default.test.ts b/packages/core/src/core/openaiContentGenerator/provider/default.test.ts index fc921c7c0..cc227b464 100644 --- a/packages/core/src/core/openaiContentGenerator/provider/default.test.ts +++ b/packages/core/src/core/openaiContentGenerator/provider/default.test.ts @@ -261,5 +261,48 @@ describe('DefaultOpenAICompatibleProvider', () => { // Result should be a different object expect(result).not.toBe(originalRequest); }); + + it('should merge extra_body into the request', () => { + const providerWithExtraBody = new DefaultOpenAICompatibleProvider( + { + ...mockContentGeneratorConfig, + extra_body: { + custom_param: 'custom_value', + nested: { key: 'value' }, + }, + } as ContentGeneratorConfig, + mockCliConfig, + ); + + const originalRequest: OpenAI.Chat.ChatCompletionCreateParams = { + model: 'gpt-4', + messages: [{ role: 'user', content: 'Hello' }], + temperature: 0.7, + }; + + const result = providerWithExtraBody.buildRequest( + originalRequest, + 'prompt-id', + ); + + expect(result).toEqual({ + ...originalRequest, + custom_param: 'custom_value', + nested: { key: 'value' }, + }); + }); + + it('should not include extra_body when not configured', () => { + const originalRequest: OpenAI.Chat.ChatCompletionCreateParams = { + model: 'gpt-4', + messages: [{ role: 'user', content: 'Hello' }], + temperature: 0.7, + }; + + const result = provider.buildRequest(originalRequest, 'prompt-id'); + + expect(result).toEqual(originalRequest); + expect(result).not.toHaveProperty('custom_param'); + }); }); }); diff --git a/packages/core/src/core/openaiContentGenerator/provider/default.ts b/packages/core/src/core/openaiContentGenerator/provider/default.ts index b7d8644c9..d865a89f2 100644 --- a/packages/core/src/core/openaiContentGenerator/provider/default.ts +++ b/packages/core/src/core/openaiContentGenerator/provider/default.ts @@ -64,9 +64,11 @@ export class DefaultOpenAICompatibleProvider request: OpenAI.Chat.ChatCompletionCreateParams, _userPromptId: string, ): OpenAI.Chat.ChatCompletionCreateParams { + const extraBody = this.contentGeneratorConfig.extra_body; // Default provider doesn't need special enhancements, just pass through all parameters return { ...request, // Preserve all original parameters including sampling params + ...(extraBody ? extraBody : {}), }; } diff --git a/packages/core/src/models/constants.ts b/packages/core/src/models/constants.ts index e4c8ad729..75552b55d 100644 --- a/packages/core/src/models/constants.ts +++ b/packages/core/src/models/constants.ts @@ -26,6 +26,7 @@ export const MODEL_GENERATION_CONFIG_FIELDS = [ 'schemaCompliance', 'reasoning', 'customHeaders', + 'extra_body', ] as const satisfies ReadonlyArray; /** diff --git a/packages/core/src/models/types.ts b/packages/core/src/models/types.ts index c8360e158..f6987d89a 100644 --- a/packages/core/src/models/types.ts +++ b/packages/core/src/models/types.ts @@ -32,6 +32,7 @@ export type ModelGenerationConfig = Pick< | 'schemaCompliance' | 'reasoning' | 'customHeaders' + | 'extra_body' >; /**