mirror of
https://github.com/block/goose.git
synced 2026-04-28 03:29:36 +00:00
Add preferred microphone selection (#7805)
Co-authored-by: Douwe Osinga <douwe@squareup.com>
This commit is contained in:
parent
d74f6f6302
commit
da51b4c19f
4 changed files with 251 additions and 5 deletions
|
|
@ -102,6 +102,10 @@ module.exports = [
|
|||
AnalyserNode: 'readonly',
|
||||
MediaRecorder: 'readonly',
|
||||
MediaStream: 'readonly',
|
||||
AudioWorkletNode: 'readonly',
|
||||
DOMException: 'readonly',
|
||||
MediaDeviceInfo: 'readonly',
|
||||
MediaTrackConstraints: 'readonly',
|
||||
Blob: 'readonly',
|
||||
FormData: 'readonly',
|
||||
},
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import { Input } from '../../ui/input';
|
|||
import { Button } from '../../ui/button';
|
||||
import { trackSettingToggled } from '../../../utils/analytics';
|
||||
import { LocalModelManager } from './LocalModelManager';
|
||||
import { MicrophoneSelector } from './MicrophoneSelector';
|
||||
import { DICTATION_ALLOWED_PROVIDERS } from '../../../updates';
|
||||
import {
|
||||
DropdownMenu,
|
||||
|
|
@ -20,6 +21,7 @@ export const DictationSettings = () => {
|
|||
const [providerStatuses, setProviderStatuses] = useState<Record<string, DictationProviderStatus>>(
|
||||
{}
|
||||
);
|
||||
const [preferredMic, setPreferredMic] = useState<string | null>(null);
|
||||
const [apiKey, setApiKey] = useState('');
|
||||
const [isEditingKey, setIsEditingKey] = useState(false);
|
||||
const { read, upsert, remove } = useConfig();
|
||||
|
|
@ -44,6 +46,10 @@ export const DictationSettings = () => {
|
|||
}
|
||||
|
||||
setProvider(loadedProvider);
|
||||
|
||||
const micValue = await read('voice_dictation_preferred_mic', false);
|
||||
setPreferredMic((micValue as string) || null);
|
||||
|
||||
await refreshStatuses();
|
||||
};
|
||||
|
||||
|
|
@ -57,6 +63,11 @@ export const DictationSettings = () => {
|
|||
trackSettingToggled('voice_dictation', newProvider !== null);
|
||||
};
|
||||
|
||||
const handleMicChange = (deviceId: string | null) => {
|
||||
setPreferredMic(deviceId);
|
||||
upsert('voice_dictation_preferred_mic', deviceId || '', false);
|
||||
};
|
||||
|
||||
const handleSaveKey = async () => {
|
||||
if (!provider) return;
|
||||
const providerConfig = providerStatuses[provider];
|
||||
|
|
@ -194,6 +205,11 @@ export const DictationSettings = () => {
|
|||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<MicrophoneSelector
|
||||
selectedDeviceId={preferredMic}
|
||||
onDeviceChange={handleMicChange}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,204 @@
|
|||
import { useState, useEffect, useRef, useCallback } from 'react';
|
||||
import { ChevronDown, Mic } from 'lucide-react';
|
||||
import { Button } from '../../ui/button';
|
||||
import {
|
||||
DropdownMenu,
|
||||
DropdownMenuContent,
|
||||
DropdownMenuRadioGroup,
|
||||
DropdownMenuRadioItem,
|
||||
DropdownMenuTrigger,
|
||||
} from '../../ui/dropdown-menu';
|
||||
|
||||
interface MicrophoneSelectorProps {
|
||||
selectedDeviceId: string | null;
|
||||
onDeviceChange: (deviceId: string | null) => void;
|
||||
}
|
||||
|
||||
const TEST_DURATION_MS = 5000;
|
||||
|
||||
export const MicrophoneSelector = ({ selectedDeviceId, onDeviceChange }: MicrophoneSelectorProps) => {
|
||||
const [devices, setDevices] = useState<MediaDeviceInfo[]>([]);
|
||||
const [hasPermission, setHasPermission] = useState(false);
|
||||
const [isTesting, setIsTesting] = useState(false);
|
||||
const [vuLevel, setVuLevel] = useState(0);
|
||||
|
||||
const testStreamRef = useRef<MediaStream | null>(null);
|
||||
const testCtxRef = useRef<AudioContext | null>(null);
|
||||
const rafRef = useRef<number>(0);
|
||||
const testTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
|
||||
const enumerate = useCallback(async () => {
|
||||
try {
|
||||
const all = await navigator.mediaDevices.enumerateDevices();
|
||||
const inputs = all.filter((d) => d.kind === 'audioinput');
|
||||
setHasPermission(inputs.some((d) => d.label !== ''));
|
||||
setDevices(inputs);
|
||||
} catch (e) {
|
||||
console.error('Failed to enumerate devices:', e);
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
enumerate();
|
||||
navigator.mediaDevices.addEventListener('devicechange', enumerate);
|
||||
return () => navigator.mediaDevices.removeEventListener('devicechange', enumerate);
|
||||
}, [enumerate]);
|
||||
|
||||
const requestPermission = async () => {
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
stream.getTracks().forEach((t) => t.stop());
|
||||
await enumerate();
|
||||
} catch (e) {
|
||||
console.error('Microphone permission denied:', e);
|
||||
}
|
||||
};
|
||||
|
||||
const stopTest = useCallback(() => {
|
||||
if (rafRef.current) cancelAnimationFrame(rafRef.current);
|
||||
rafRef.current = 0;
|
||||
if (testTimerRef.current) clearTimeout(testTimerRef.current);
|
||||
testTimerRef.current = null;
|
||||
testCtxRef.current?.close();
|
||||
testCtxRef.current = null;
|
||||
testStreamRef.current?.getTracks().forEach((t) => t.stop());
|
||||
testStreamRef.current = null;
|
||||
setIsTesting(false);
|
||||
setVuLevel(0);
|
||||
}, []);
|
||||
|
||||
const startTest = async () => {
|
||||
stopTest();
|
||||
try {
|
||||
const constraints: MediaTrackConstraints = {
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
autoGainControl: true,
|
||||
};
|
||||
if (selectedDeviceId) {
|
||||
constraints.deviceId = { exact: selectedDeviceId };
|
||||
}
|
||||
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: constraints });
|
||||
testStreamRef.current = stream;
|
||||
|
||||
const ctx = new AudioContext();
|
||||
testCtxRef.current = ctx;
|
||||
const source = ctx.createMediaStreamSource(stream);
|
||||
const analyser = ctx.createAnalyser();
|
||||
analyser.fftSize = 256;
|
||||
source.connect(analyser);
|
||||
|
||||
const dataArray = new Uint8Array(analyser.frequencyBinCount);
|
||||
|
||||
const poll = () => {
|
||||
analyser.getByteTimeDomainData(dataArray);
|
||||
let sum = 0;
|
||||
for (let i = 0; i < dataArray.length; i++) {
|
||||
const v = (dataArray[i] - 128) / 128;
|
||||
sum += v * v;
|
||||
}
|
||||
const rms = Math.sqrt(sum / dataArray.length);
|
||||
setVuLevel(Math.min(1, rms * 5));
|
||||
rafRef.current = requestAnimationFrame(poll);
|
||||
};
|
||||
|
||||
setIsTesting(true);
|
||||
rafRef.current = requestAnimationFrame(poll);
|
||||
testTimerRef.current = setTimeout(stopTest, TEST_DURATION_MS);
|
||||
} catch (e) {
|
||||
console.error('Mic test failed:', e);
|
||||
stopTest();
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
return () => stopTest();
|
||||
}, [stopTest]);
|
||||
|
||||
const getDeviceLabel = (device: MediaDeviceInfo, index: number): string => {
|
||||
return device.label || `Microphone ${index + 1}`;
|
||||
};
|
||||
|
||||
const selectedLabel = (): string => {
|
||||
if (!selectedDeviceId) return 'System Default';
|
||||
const device = devices.find((d) => d.deviceId === selectedDeviceId);
|
||||
if (device) return device.label || 'Selected Microphone';
|
||||
return 'System Default';
|
||||
};
|
||||
|
||||
if (!hasPermission) {
|
||||
return (
|
||||
<div className="flex items-center justify-between py-2 px-2 hover:bg-background-secondary rounded-lg transition-all">
|
||||
<div>
|
||||
<h3 className="text-text-primary text-sm">Microphone</h3>
|
||||
<p className="text-xs text-text-secondary max-w-md mt-[2px]">
|
||||
Grant access to see available microphones
|
||||
</p>
|
||||
</div>
|
||||
<Button variant="outline" size="sm" onClick={requestPermission}>
|
||||
Grant Access
|
||||
</Button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-3">
|
||||
<div className="flex items-center justify-between py-2 px-2 hover:bg-background-secondary rounded-lg transition-all">
|
||||
<div>
|
||||
<h3 className="text-text-primary text-sm">Microphone</h3>
|
||||
<p className="text-xs text-text-secondary max-w-md mt-[2px]">
|
||||
Choose which microphone to use for dictation
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger className="flex items-center gap-2 px-3 py-1.5 text-sm border border-border-primary rounded-md hover:border-border-primary transition-colors text-text-primary bg-background-primary max-w-[220px]">
|
||||
<span className="truncate">{selectedLabel()}</span>
|
||||
<ChevronDown className="w-4 h-4 shrink-0" />
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent align="end" className="w-max min-w-[250px] max-w-[350px]">
|
||||
<DropdownMenuRadioGroup
|
||||
value={selectedDeviceId ?? 'system_default'}
|
||||
onValueChange={(v) => onDeviceChange(v === 'system_default' ? null : v)}
|
||||
>
|
||||
<DropdownMenuRadioItem value="system_default">
|
||||
System Default
|
||||
</DropdownMenuRadioItem>
|
||||
{devices.map((device, i) => (
|
||||
<DropdownMenuRadioItem key={device.deviceId} value={device.deviceId}>
|
||||
<span className="truncate">{getDeviceLabel(device, i)}</span>
|
||||
</DropdownMenuRadioItem>
|
||||
))}
|
||||
</DropdownMenuRadioGroup>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={isTesting ? stopTest : startTest}
|
||||
className="shrink-0"
|
||||
>
|
||||
<Mic className="w-4 h-4 mr-1" />
|
||||
{isTesting ? 'Stop' : 'Test'}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{isTesting && (
|
||||
<div className="px-2">
|
||||
<div className="w-full bg-background-secondary rounded-full h-2 overflow-hidden">
|
||||
<div
|
||||
className="bg-green-500 h-2 rounded-full transition-all duration-75"
|
||||
style={{ width: `${vuLevel * 100}%` }}
|
||||
/>
|
||||
</div>
|
||||
<p className="text-xs text-text-secondary mt-1">
|
||||
Speak to test your microphone ({Math.ceil(TEST_DURATION_MS / 1000)}s)
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
|
@ -202,9 +202,32 @@ export const useAudioRecorder = ({ onTranscription, onError }: UseAudioRecorderO
|
|||
}
|
||||
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: { echoCancellation: true, noiseSuppression: true, autoGainControl: true },
|
||||
});
|
||||
const preferredMic = await read('voice_dictation_preferred_mic', false);
|
||||
|
||||
const audioConstraints: MediaTrackConstraints = {
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
autoGainControl: true,
|
||||
};
|
||||
if (preferredMic && typeof preferredMic === 'string') {
|
||||
audioConstraints.deviceId = { exact: preferredMic };
|
||||
}
|
||||
|
||||
let stream: MediaStream;
|
||||
try {
|
||||
stream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints });
|
||||
} catch (e) {
|
||||
if (
|
||||
preferredMic &&
|
||||
e instanceof DOMException &&
|
||||
(e.name === 'NotFoundError' || e.name === 'OverconstrainedError')
|
||||
) {
|
||||
delete audioConstraints.deviceId;
|
||||
stream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints });
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
streamRef.current = stream;
|
||||
|
||||
const ctx = new AudioContext({ sampleRate: SAMPLE_RATE });
|
||||
|
|
@ -213,7 +236,6 @@ export const useAudioRecorder = ({ onTranscription, onError }: UseAudioRecorderO
|
|||
await ctx.audioWorklet.addModule(WORKLET_URL);
|
||||
|
||||
const source = ctx.createMediaStreamSource(stream);
|
||||
// eslint-disable-next-line no-undef
|
||||
const worklet = new AudioWorkletNode(ctx, 'audio-capture');
|
||||
|
||||
worklet.port.onmessage = (e: MessageEvent<Float32Array>) => handleSamples(e.data);
|
||||
|
|
@ -230,7 +252,7 @@ export const useAudioRecorder = ({ onTranscription, onError }: UseAudioRecorderO
|
|||
stopRecording();
|
||||
onError(errorMessage(error));
|
||||
}
|
||||
}, [isEnabled, onError, handleSamples, stopRecording]);
|
||||
}, [isEnabled, onError, handleSamples, stopRecording, read]);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue