Refactor voice note transcription to use Hugging Face transformers Whisper pipeline

- Updated transcription logic to utilize Hugging Face's Whisper models instead of faster-whisper. - Introduced new model mapping and pipeline loading functions. - Adjusted tests to reflect changes in the transcription process. - Updated documentation in README, .env.example, and settings to align with the new implementation. - Ensured compatibility with CUDA 13 and removed unnecessary dependencies.
2026-04-28 11:30:03 +00:00 · 2026-02-18 06:18:28 -08:00 · 2026-02-18 06:18:28 -08:00 · 75e066f17f
commit 75e066f17f
parent 34fb8e2ca7
7 changed files with 773 additions and 161 deletions
--- a/config/settings.py
+++ b/config/settings.py
@ -78,8 +78,10 @@ class Settings(BaseSettings):
    )
    # Hugging Face token for faster model downloads (optional)
    hf_token: str = Field(default="", validation_alias="HF_TOKEN")
-    # Model size: "tiny" | "base" | "small" | "medium" | "large-v2" | "large-v3" | "large-v3-turbo"
-    whisper_model: str = Field(default="base", validation_alias="WHISPER_MODEL")
+    # Hugging Face Whisper model ID (e.g. openai/whisper-large-v3-turbo) or short name
+    whisper_model: str = Field(
+        default="openai/whisper-large-v3-turbo", validation_alias="WHISPER_MODEL"
+    )
    # Device: "cpu" | "cuda"
    whisper_device: str = Field(default="cpu", validation_alias="WHISPER_DEVICE")