mirror of
https://github.com/Alishahryar1/free-claude-code.git
synced 2026-04-28 11:30:03 +00:00
## Summary Added NVIDIA NIM as a second transcription option ( alongside local Whisper). This lets you transcribe voice notes using NVIDIA's cloud API instead of running Whisper locally. ## What changed - **Transcription**: Now supports the two backends - Local Whisper: Free, runs on your GPU/CPU (existing) - NVIDIA NIM: Cloud API via Riva gRPC (new) - **Supported models**: 8 NVIDIA NIM models added (Parakeet variants for different languages, Whisper Large V3) --------- Co-authored-by: Alishahryar1 <alishahryar2@gmail.com>
125 lines
4.3 KiB
Python
125 lines
4.3 KiB
Python
"""Tests for voice note transcription."""
|
|
|
|
import tempfile
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from messaging.transcription import (
|
|
MAX_AUDIO_SIZE_BYTES,
|
|
transcribe_audio,
|
|
)
|
|
|
|
|
|
def test_transcribe_file_not_found_raises():
|
|
"""Non-existent file raises FileNotFoundError."""
|
|
with pytest.raises(FileNotFoundError, match="not found"):
|
|
transcribe_audio(Path("/nonexistent/file.ogg"), "audio/ogg")
|
|
|
|
|
|
def test_transcribe_file_too_large_raises():
|
|
"""File exceeding max size raises ValueError."""
|
|
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as f:
|
|
f.write(b"x" * (MAX_AUDIO_SIZE_BYTES + 1))
|
|
path = Path(f.name)
|
|
try:
|
|
with pytest.raises(ValueError, match="too large"):
|
|
transcribe_audio(path, "audio/ogg", whisper_device="auto")
|
|
finally:
|
|
path.unlink(missing_ok=True)
|
|
|
|
|
|
def test_transcribe_local_success():
|
|
"""Local backend returns transcribed text."""
|
|
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as f:
|
|
f.write(b"fake ogg content")
|
|
path = Path(f.name)
|
|
try:
|
|
mock_pipe = MagicMock()
|
|
mock_pipe.return_value = {"text": "Hello world"}
|
|
fake_audio = {"array": [0.0], "sampling_rate": 16000}
|
|
|
|
with (
|
|
patch("messaging.transcription._load_audio", return_value=fake_audio),
|
|
patch(
|
|
"messaging.transcription._get_pipeline",
|
|
return_value=mock_pipe,
|
|
),
|
|
):
|
|
result = transcribe_audio(path, "audio/ogg", whisper_model="base")
|
|
|
|
assert result == "Hello world"
|
|
mock_pipe.assert_called_once_with(
|
|
fake_audio, generate_kwargs={"language": "en", "task": "transcribe"}
|
|
)
|
|
finally:
|
|
path.unlink(missing_ok=True)
|
|
|
|
|
|
def test_transcribe_local_empty_segments_returns_no_speech():
|
|
"""Local backend with no speech returns placeholder."""
|
|
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as f:
|
|
f.write(b"fake ogg")
|
|
path = Path(f.name)
|
|
try:
|
|
mock_pipe = MagicMock()
|
|
mock_pipe.return_value = {"text": ""}
|
|
fake_audio = {"array": [0.0], "sampling_rate": 16000}
|
|
|
|
with (
|
|
patch("messaging.transcription._load_audio", return_value=fake_audio),
|
|
patch(
|
|
"messaging.transcription._get_pipeline",
|
|
return_value=mock_pipe,
|
|
),
|
|
):
|
|
result = transcribe_audio(path, "audio/ogg", whisper_model="base")
|
|
|
|
assert result == "(no speech detected)"
|
|
finally:
|
|
path.unlink(missing_ok=True)
|
|
|
|
|
|
def test_transcribe_invalid_device_raises():
|
|
"""Invalid whisper_device raises ValueError."""
|
|
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as f:
|
|
f.write(b"fake ogg")
|
|
path = Path(f.name)
|
|
try:
|
|
# Mock settings to return invalid device "auto"
|
|
mock_settings = MagicMock()
|
|
mock_settings.whisper_device = "auto"
|
|
mock_settings.whisper_model = "base"
|
|
|
|
# Patch _load_audio to avoid ImportError from missing librosa
|
|
# Device validation happens in _get_pipeline before torch import
|
|
with (
|
|
patch("messaging.transcription.get_settings", return_value=mock_settings),
|
|
patch("messaging.transcription._load_audio"),
|
|
pytest.raises(ValueError, match="whisper_device must be 'cpu' or 'cuda'"),
|
|
):
|
|
transcribe_audio(path, "audio/ogg", whisper_device="auto")
|
|
finally:
|
|
path.unlink(missing_ok=True)
|
|
|
|
|
|
def test_transcribe_local_import_error_raises():
|
|
"""Local backend when voice_local extra not installed raises ImportError."""
|
|
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as f:
|
|
f.write(b"fake ogg")
|
|
path = Path(f.name)
|
|
try:
|
|
with (
|
|
patch(
|
|
"messaging.transcription._get_pipeline",
|
|
side_effect=ImportError(
|
|
"Local Whisper requires the voice_local extra. "
|
|
"Install with: uv sync --extra voice_local"
|
|
),
|
|
),
|
|
pytest.raises(ImportError, match="voice_local extra"),
|
|
):
|
|
transcribe_audio(path, "audio/ogg", whisper_device="auto")
|
|
finally:
|
|
path.unlink(missing_ok=True)
|