Refactor voice note transcription to use Hugging Face transformers Whisper pipeline

- Updated transcription logic to utilize Hugging Face's Whisper models instead of faster-whisper.
- Introduced new model mapping and pipeline loading functions.
- Adjusted tests to reflect changes in the transcription process.
- Updated documentation in README, .env.example, and settings to align with the new implementation.
- Ensured compatibility with CUDA 13 and removed unnecessary dependencies.
This commit is contained in:
Alishahryar1 2026-02-18 06:18:28 -08:00
parent 34fb8e2ca7
commit 75e066f17f
7 changed files with 773 additions and 161 deletions

View file

@ -78,8 +78,10 @@ class Settings(BaseSettings):
)
# Hugging Face token for faster model downloads (optional)
hf_token: str = Field(default="", validation_alias="HF_TOKEN")
# Model size: "tiny" | "base" | "small" | "medium" | "large-v2" | "large-v3" | "large-v3-turbo"
whisper_model: str = Field(default="base", validation_alias="WHISPER_MODEL")
# Hugging Face Whisper model ID (e.g. openai/whisper-large-v3-turbo) or short name
whisper_model: str = Field(
default="openai/whisper-large-v3-turbo", validation_alias="WHISPER_MODEL"
)
# Device: "cpu" | "cuda"
whisper_device: str = Field(default="cpu", validation_alias="WHISPER_DEVICE")