Add preferred microphone selection (#7805)

Co-authored-by: Douwe Osinga <douwe@squareup.com>
2026-05-19 16:27:54 +00:00 · 2026-03-11 13:03:00 -04:00 · 2026-03-11 13:03:00 -04:00 · da51b4c19f
commit da51b4c19f
parent d74f6f6302
4 changed files with 251 additions and 5 deletions
--- a/ui/desktop/eslint.config.js
+++ b/ui/desktop/eslint.config.js
@ -102,6 +102,10 @@ module.exports = [
        AnalyserNode: 'readonly',
        MediaRecorder: 'readonly',
        MediaStream: 'readonly',
+        AudioWorkletNode: 'readonly',
+        DOMException: 'readonly',
+        MediaDeviceInfo: 'readonly',
+        MediaTrackConstraints: 'readonly',
        Blob: 'readonly',
        FormData: 'readonly',
      },
--- a/ui/desktop/src/components/settings/dictation/DictationSettings.tsx
+++ b/ui/desktop/src/components/settings/dictation/DictationSettings.tsx
@ -6,6 +6,7 @@ import { Input } from '../../ui/input';
 import { Button } from '../../ui/button';
 import { trackSettingToggled } from '../../../utils/analytics';
 import { LocalModelManager } from './LocalModelManager';
+import { MicrophoneSelector } from './MicrophoneSelector';
 import { DICTATION_ALLOWED_PROVIDERS } from '../../../updates';
 import {
  DropdownMenu,
@ -20,6 +21,7 @@ export const DictationSettings = () => {
  const [providerStatuses, setProviderStatuses] = useState<Record<string, DictationProviderStatus>>(
    {}
  );
+  const [preferredMic, setPreferredMic] = useState<string | null>(null);
  const [apiKey, setApiKey] = useState('');
  const [isEditingKey, setIsEditingKey] = useState(false);
  const { read, upsert, remove } = useConfig();
@ -44,6 +46,10 @@ export const DictationSettings = () => {
      }

      setProvider(loadedProvider);
+
+      const micValue = await read('voice_dictation_preferred_mic', false);
+      setPreferredMic((micValue as string) || null);
+
      await refreshStatuses();
    };

@ -57,6 +63,11 @@ export const DictationSettings = () => {
    trackSettingToggled('voice_dictation', newProvider !== null);
  };

+  const handleMicChange = (deviceId: string | null) => {
+    setPreferredMic(deviceId);
+    upsert('voice_dictation_preferred_mic', deviceId || '', false);
+  };
+
  const handleSaveKey = async () => {
    if (!provider) return;
    const providerConfig = providerStatuses[provider];
@ -194,6 +205,11 @@ export const DictationSettings = () => {
              )}
            </div>
          )}
+
+          <MicrophoneSelector
+            selectedDeviceId={preferredMic}
+            onDeviceChange={handleMicChange}
+          />
        </>
      )}
    </div>
--- a/ui/desktop/src/components/settings/dictation/MicrophoneSelector.tsx
+++ b/ui/desktop/src/components/settings/dictation/MicrophoneSelector.tsx
@ -0,0 +1,204 @@
+import { useState, useEffect, useRef, useCallback } from 'react';
+import { ChevronDown, Mic } from 'lucide-react';
+import { Button } from '../../ui/button';
+import {
+  DropdownMenu,
+  DropdownMenuContent,
+  DropdownMenuRadioGroup,
+  DropdownMenuRadioItem,
+  DropdownMenuTrigger,
+} from '../../ui/dropdown-menu';
+
+interface MicrophoneSelectorProps {
+  selectedDeviceId: string | null;
+  onDeviceChange: (deviceId: string | null) => void;
+}
+
+const TEST_DURATION_MS = 5000;
+
+export const MicrophoneSelector = ({ selectedDeviceId, onDeviceChange }: MicrophoneSelectorProps) => {
+  const [devices, setDevices] = useState<MediaDeviceInfo[]>([]);
+  const [hasPermission, setHasPermission] = useState(false);
+  const [isTesting, setIsTesting] = useState(false);
+  const [vuLevel, setVuLevel] = useState(0);
+
+  const testStreamRef = useRef<MediaStream | null>(null);
+  const testCtxRef = useRef<AudioContext | null>(null);
+  const rafRef = useRef<number>(0);
+  const testTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+
+  const enumerate = useCallback(async () => {
+    try {
+      const all = await navigator.mediaDevices.enumerateDevices();
+      const inputs = all.filter((d) => d.kind === 'audioinput');
+      setHasPermission(inputs.some((d) => d.label !== ''));
+      setDevices(inputs);
+    } catch (e) {
+      console.error('Failed to enumerate devices:', e);
+    }
+  }, []);
+
+  useEffect(() => {
+    enumerate();
+    navigator.mediaDevices.addEventListener('devicechange', enumerate);
+    return () => navigator.mediaDevices.removeEventListener('devicechange', enumerate);
+  }, [enumerate]);
+
+  const requestPermission = async () => {
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      stream.getTracks().forEach((t) => t.stop());
+      await enumerate();
+    } catch (e) {
+      console.error('Microphone permission denied:', e);
+    }
+  };
+
+  const stopTest = useCallback(() => {
+    if (rafRef.current) cancelAnimationFrame(rafRef.current);
+    rafRef.current = 0;
+    if (testTimerRef.current) clearTimeout(testTimerRef.current);
+    testTimerRef.current = null;
+    testCtxRef.current?.close();
+    testCtxRef.current = null;
+    testStreamRef.current?.getTracks().forEach((t) => t.stop());
+    testStreamRef.current = null;
+    setIsTesting(false);
+    setVuLevel(0);
+  }, []);
+
+  const startTest = async () => {
+    stopTest();
+    try {
+      const constraints: MediaTrackConstraints = {
+        echoCancellation: true,
+        noiseSuppression: true,
+        autoGainControl: true,
+      };
+      if (selectedDeviceId) {
+        constraints.deviceId = { exact: selectedDeviceId };
+      }
+
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: constraints });
+      testStreamRef.current = stream;
+
+      const ctx = new AudioContext();
+      testCtxRef.current = ctx;
+      const source = ctx.createMediaStreamSource(stream);
+      const analyser = ctx.createAnalyser();
+      analyser.fftSize = 256;
+      source.connect(analyser);
+
+      const dataArray = new Uint8Array(analyser.frequencyBinCount);
+
+      const poll = () => {
+        analyser.getByteTimeDomainData(dataArray);
+        let sum = 0;
+        for (let i = 0; i < dataArray.length; i++) {
+          const v = (dataArray[i] - 128) / 128;
+          sum += v * v;
+        }
+        const rms = Math.sqrt(sum / dataArray.length);
+        setVuLevel(Math.min(1, rms * 5));
+        rafRef.current = requestAnimationFrame(poll);
+      };
+
+      setIsTesting(true);
+      rafRef.current = requestAnimationFrame(poll);
+      testTimerRef.current = setTimeout(stopTest, TEST_DURATION_MS);
+    } catch (e) {
+      console.error('Mic test failed:', e);
+      stopTest();
+    }
+  };
+
+  useEffect(() => {
+    return () => stopTest();
+  }, [stopTest]);
+
+  const getDeviceLabel = (device: MediaDeviceInfo, index: number): string => {
+    return device.label || `Microphone ${index + 1}`;
+  };
+
+  const selectedLabel = (): string => {
+    if (!selectedDeviceId) return 'System Default';
+    const device = devices.find((d) => d.deviceId === selectedDeviceId);
+    if (device) return device.label || 'Selected Microphone';
+    return 'System Default';
+  };
+
+  if (!hasPermission) {
+    return (
+      <div className="flex items-center justify-between py-2 px-2 hover:bg-background-secondary rounded-lg transition-all">
+        <div>
+          <h3 className="text-text-primary text-sm">Microphone</h3>
+          <p className="text-xs text-text-secondary max-w-md mt-[2px]">
+            Grant access to see available microphones
+          </p>
+        </div>
+        <Button variant="outline" size="sm" onClick={requestPermission}>
+          Grant Access
+        </Button>
+      </div>
+    );
+  }
+
+  return (
+    <div className="space-y-3">
+      <div className="flex items-center justify-between py-2 px-2 hover:bg-background-secondary rounded-lg transition-all">
+        <div>
+          <h3 className="text-text-primary text-sm">Microphone</h3>
+          <p className="text-xs text-text-secondary max-w-md mt-[2px]">
+            Choose which microphone to use for dictation
+          </p>
+        </div>
+        <div className="flex items-center gap-2">
+          <DropdownMenu>
+            <DropdownMenuTrigger className="flex items-center gap-2 px-3 py-1.5 text-sm border border-border-primary rounded-md hover:border-border-primary transition-colors text-text-primary bg-background-primary max-w-[220px]">
+              <span className="truncate">{selectedLabel()}</span>
+              <ChevronDown className="w-4 h-4 shrink-0" />
+            </DropdownMenuTrigger>
+            <DropdownMenuContent align="end" className="w-max min-w-[250px] max-w-[350px]">
+              <DropdownMenuRadioGroup
+                value={selectedDeviceId ?? 'system_default'}
+                onValueChange={(v) => onDeviceChange(v === 'system_default' ? null : v)}
+              >
+                <DropdownMenuRadioItem value="system_default">
+                  System Default
+                </DropdownMenuRadioItem>
+                {devices.map((device, i) => (
+                  <DropdownMenuRadioItem key={device.deviceId} value={device.deviceId}>
+                    <span className="truncate">{getDeviceLabel(device, i)}</span>
+                  </DropdownMenuRadioItem>
+                ))}
+              </DropdownMenuRadioGroup>
+            </DropdownMenuContent>
+          </DropdownMenu>
+          <Button
+            variant="outline"
+            size="sm"
+            onClick={isTesting ? stopTest : startTest}
+            className="shrink-0"
+          >
+            <Mic className="w-4 h-4 mr-1" />
+            {isTesting ? 'Stop' : 'Test'}
+          </Button>
+        </div>
+      </div>
+
+      {isTesting && (
+        <div className="px-2">
+          <div className="w-full bg-background-secondary rounded-full h-2 overflow-hidden">
+            <div
+              className="bg-green-500 h-2 rounded-full transition-all duration-75"
+              style={{ width: `${vuLevel * 100}%` }}
+            />
+          </div>
+          <p className="text-xs text-text-secondary mt-1">
+            Speak to test your microphone ({Math.ceil(TEST_DURATION_MS / 1000)}s)
+          </p>
+        </div>
+      )}
+    </div>
+  );
+};
--- a/ui/desktop/src/hooks/useAudioRecorder.ts
+++ b/ui/desktop/src/hooks/useAudioRecorder.ts
@ -202,9 +202,32 @@ export const useAudioRecorder = ({ onTranscription, onError }: UseAudioRecorderO
    }

    try {
-      const stream = await navigator.mediaDevices.getUserMedia({
-        audio: { echoCancellation: true, noiseSuppression: true, autoGainControl: true },
-      });
+      const preferredMic = await read('voice_dictation_preferred_mic', false);
+
+      const audioConstraints: MediaTrackConstraints = {
+        echoCancellation: true,
+        noiseSuppression: true,
+        autoGainControl: true,
+      };
+      if (preferredMic && typeof preferredMic === 'string') {
+        audioConstraints.deviceId = { exact: preferredMic };
+      }
+
+      let stream: MediaStream;
+      try {
+        stream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints });
+      } catch (e) {
+        if (
+          preferredMic &&
+          e instanceof DOMException &&
+          (e.name === 'NotFoundError' || e.name === 'OverconstrainedError')
+        ) {
+          delete audioConstraints.deviceId;
+          stream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints });
+        } else {
+          throw e;
+        }
+      }
      streamRef.current = stream;

      const ctx = new AudioContext({ sampleRate: SAMPLE_RATE });
@ -213,7 +236,6 @@ export const useAudioRecorder = ({ onTranscription, onError }: UseAudioRecorderO
      await ctx.audioWorklet.addModule(WORKLET_URL);

      const source = ctx.createMediaStreamSource(stream);
-      // eslint-disable-next-line no-undef
      const worklet = new AudioWorkletNode(ctx, 'audio-capture');

      worklet.port.onmessage = (e: MessageEvent<Float32Array>) => handleSamples(e.data);
@ -230,7 +252,7 @@ export const useAudioRecorder = ({ onTranscription, onError }: UseAudioRecorderO
      stopRecording();
      onError(errorMessage(error));
    }
-  }, [isEnabled, onError, handleSamples, stopRecording]);
+  }, [isEnabled, onError, handleSamples, stopRecording, read]);

  useEffect(() => {
    return () => {