Vision model support for Qwen-OAuth (#525)
Some checks are pending
Qwen Code CI / Lint (GitHub Actions) (push) Waiting to run
Qwen Code CI / Lint (Javascript) (push) Waiting to run
Qwen Code CI / Lint (Shell) (push) Waiting to run
Qwen Code CI / Lint (YAML) (push) Waiting to run
Qwen Code CI / Lint (push) Blocked by required conditions
Qwen Code CI / Test (push) Blocked by required conditions
Qwen Code CI / Test-1 (push) Blocked by required conditions
Qwen Code CI / Test-3 (push) Blocked by required conditions
Qwen Code CI / Test-2 (push) Blocked by required conditions
Qwen Code CI / Test-4 (push) Blocked by required conditions
Qwen Code CI / Test-5 (push) Blocked by required conditions
Qwen Code CI / Test-6 (push) Blocked by required conditions
Qwen Code CI / Test-7 (push) Blocked by required conditions
Qwen Code CI / Test-8 (push) Blocked by required conditions
Qwen Code CI / Post Coverage Comment (push) Blocked by required conditions
Qwen Code CI / CodeQL (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:docker (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:none (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:docker-1 (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:none-1 (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:docker-2 (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:none-2 (push) Waiting to run
E2E Tests / E2E Test - macOS (push) Waiting to run

* refactor: openaiContentGenerator

* refactor: optimize stream handling

* refactor: re-organize refactored files

* fix: unit test cases

* feat: `/model` command for switching to vision model

* fix: lint error

* feat: add image tokenizer to fit vlm context window

* fix: lint and type errors

* feat: add `visionModelPreview` to control default visibility of vision models

* fix: remove deprecated files

* fix: align supported image formats with bailian doc
This commit is contained in:
Mingholy 2025-09-18 13:32:00 +08:00 committed by GitHub
parent 56808ac210
commit 761833c915
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
41 changed files with 4083 additions and 5336 deletions

View file

@ -42,6 +42,7 @@ import type {
import { StreamingState, MessageType, ToolCallStatus } from '../types.js';
import { isAtCommand, isSlashCommand } from '../utils/commandUtils.js';
import { useShellCommandProcessor } from './shellCommandProcessor.js';
import { useVisionAutoSwitch } from './useVisionAutoSwitch.js';
import { handleAtCommand } from './atCommandProcessor.js';
import { findLastSafeSplitPoint } from '../utils/markdownUtilities.js';
import { useStateAndRef } from './useStateAndRef.js';
@ -88,6 +89,12 @@ export const useGeminiStream = (
setModelSwitchedFromQuotaError: React.Dispatch<React.SetStateAction<boolean>>,
onEditorClose: () => void,
onCancelSubmit: () => void,
visionModelPreviewEnabled: boolean = false,
onVisionSwitchRequired?: (query: PartListUnion) => Promise<{
modelOverride?: string;
persistSessionModel?: string;
showGuidance?: boolean;
}>,
) => {
const [initError, setInitError] = useState<string | null>(null);
const abortControllerRef = useRef<AbortController | null>(null);
@ -155,6 +162,13 @@ export const useGeminiStream = (
geminiClient,
);
const { handleVisionSwitch, restoreOriginalModel } = useVisionAutoSwitch(
config,
addItem,
visionModelPreviewEnabled,
onVisionSwitchRequired,
);
const streamingState = useMemo(() => {
if (toolCalls.some((tc) => tc.status === 'awaiting_approval')) {
return StreamingState.WaitingForConfirmation;
@ -715,6 +729,20 @@ export const useGeminiStream = (
return;
}
// Handle vision switch requirement
const visionSwitchResult = await handleVisionSwitch(
queryToSend,
userMessageTimestamp,
options?.isContinuation || false,
);
if (!visionSwitchResult.shouldProceed) {
isSubmittingQueryRef.current = false;
return;
}
const finalQueryToSend = queryToSend;
if (!options?.isContinuation) {
startNewPrompt();
setThought(null); // Reset thought when starting a new prompt
@ -725,7 +753,7 @@ export const useGeminiStream = (
try {
const stream = geminiClient.sendMessageStream(
queryToSend,
finalQueryToSend,
abortSignal,
prompt_id!,
);
@ -736,6 +764,8 @@ export const useGeminiStream = (
);
if (processingStatus === StreamProcessingStatus.UserCancelled) {
// Restore original model if it was temporarily overridden
restoreOriginalModel();
isSubmittingQueryRef.current = false;
return;
}
@ -748,7 +778,13 @@ export const useGeminiStream = (
loopDetectedRef.current = false;
handleLoopDetectedEvent();
}
// Restore original model if it was temporarily overridden
restoreOriginalModel();
} catch (error: unknown) {
// Restore original model if it was temporarily overridden
restoreOriginalModel();
if (error instanceof UnauthorizedError) {
onAuthError();
} else if (!isNodeError(error) || error.name !== 'AbortError') {
@ -786,6 +822,8 @@ export const useGeminiStream = (
startNewPrompt,
getPromptCount,
handleLoopDetectedEvent,
handleVisionSwitch,
restoreOriginalModel,
],
);