diff --git a/.env.example b/.env.example index b3b9394..74a52c7 100644 --- a/.env.example +++ b/.env.example @@ -35,7 +35,7 @@ MESSAGING_RATE_WINDOW=1 # Voice Note Transcription (transformers Whisper; install with: uv sync --extra voice) VOICE_NOTE_ENABLED=true # WHISPER_MODEL: Hugging Face ID or short name (tiny, base, small, medium, large-v2, large-v3, large-v3-turbo) -WHISPER_MODEL=openai/whisper-large-v3-turbo +WHISPER_MODEL=base HF_TOKEN="" diff --git a/README.md b/README.md index 18bb309..6953a06 100644 --- a/README.md +++ b/README.md @@ -248,7 +248,7 @@ uv sync --extra voice | Variable | Description | Default | |----------|-------------|---------| | `VOICE_NOTE_ENABLED` | Enable voice note handling | `true` | -| `WHISPER_MODEL` | Hugging Face model ID or short name (`tiny`, `base`, `small`, `medium`, `large-v2`, `large-v3`, `large-v3-turbo`) | `openai/whisper-large-v3-turbo` | +| `WHISPER_MODEL` | Hugging Face model ID or short name (`tiny`, `base`, `small`, `medium`, `large-v2`, `large-v3`, `large-v3-turbo`) | `base` | | `WHISPER_DEVICE` | `cpu` \| `cuda` | `cpu` | | `HF_TOKEN` | Hugging Face token for faster model downloads (optional; [create one](https://huggingface.co/settings/tokens)) | — | diff --git a/config/settings.py b/config/settings.py index 09b9223..7142d7d 100644 --- a/config/settings.py +++ b/config/settings.py @@ -78,10 +78,8 @@ class Settings(BaseSettings): ) # Hugging Face token for faster model downloads (optional) hf_token: str = Field(default="", validation_alias="HF_TOKEN") - # Hugging Face Whisper model ID (e.g. openai/whisper-large-v3-turbo) or short name - whisper_model: str = Field( - default="openai/whisper-large-v3-turbo", validation_alias="WHISPER_MODEL" - ) + # Hugging Face Whisper model ID (e.g. openai/whisper-base) or short name + whisper_model: str = Field(default="base", validation_alias="WHISPER_MODEL") # Device: "cpu" | "cuda" whisper_device: str = Field(default="cpu", validation_alias="WHISPER_DEVICE") diff --git a/messaging/transcription.py b/messaging/transcription.py index 62f618c..70346b6 100644 --- a/messaging/transcription.py +++ b/messaging/transcription.py @@ -85,7 +85,7 @@ def transcribe_audio( file_path: Path, mime_type: str, *, - whisper_model: str = "openai/whisper-large-v3-turbo", + whisper_model: str = "base", whisper_device: str = "cpu", ) -> str: """ @@ -94,7 +94,7 @@ def transcribe_audio( Args: file_path: Path to audio file (OGG, MP3, MP4, WAV, M4A supported) mime_type: MIME type of the audio (e.g. "audio/ogg") - whisper_model: Model ID (e.g. "openai/whisper-large-v3-turbo") or short name + whisper_model: Model ID (e.g. "openai/whisper-base") or short name whisper_device: "cpu" | "cuda" Returns: