refactor: integrate voice selection utility in podcast audio generation

- remove NavUser component and update sidebar layout;
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-08-12 16:48:03 -07:00
parent d851e1bd6d
commit 2fb86ad687
4 changed files with 71 additions and 191 deletions

View file

@ -16,6 +16,7 @@ from app.services.llm_service import get_user_long_context_llm
from .configuration import Configuration
from .prompts import get_podcast_generation_prompt
from .state import PodcastTranscriptEntry, PodcastTranscripts, State
from .utils import get_voice_for_provider
async def create_podcast_transcript(
@ -121,16 +122,6 @@ async def create_merged_podcast_audio(
output_path = f"podcasts/{session_id}_podcast.mp3"
os.makedirs("podcasts", exist_ok=True)
# Map of speaker_id to voice
voice_mapping = {
0: "alloy", # Default/intro voice
1: "echo", # First speaker
# 2: "fable", # Second speaker
# 3: "onyx", # Third speaker
# 4: "nova", # Fourth speaker
# 5: "shimmer" # Fifth speaker
}
# Generate audio for each transcript segment
audio_files = []
@ -144,7 +135,7 @@ async def create_merged_podcast_audio(
dialog = segment.get("dialog", "")
# Select voice based on speaker_id
voice = voice_mapping.get(speaker_id, "alloy")
voice = get_voice_for_provider(app_config.TTS_SERVICE, speaker_id)
# Generate a unique filename for this segment
filename = f"{temp_dir}/{session_id}_{index}.mp3"

View file

@ -0,0 +1,69 @@
def get_voice_for_provider(provider: str, speaker_id: int) -> dict | str:
"""
Get the appropriate voice configuration based on the TTS provider and speaker ID.
Args:
provider: The TTS provider (e.g., "openai/tts-1", "vertex_ai/test")
speaker_id: The ID of the speaker (0-5)
Returns:
Voice configuration - string for OpenAI, dict for Vertex AI
"""
# Extract provider type from the model string
provider_type = (
provider.split("/")[0].lower() if "/" in provider else provider.lower()
)
if provider_type == "openai":
# OpenAI voice mapping - simple string values
openai_voices = {
0: "alloy", # Default/intro voice
1: "echo", # First speaker
2: "fable", # Second speaker
3: "onyx", # Third speaker
4: "nova", # Fourth speaker
5: "shimmer", # Fifth speaker
}
return openai_voices.get(speaker_id, "alloy")
elif provider_type == "vertex_ai":
# Vertex AI voice mapping - dict with languageCode and name
vertex_voices = {
0: {
"languageCode": "en-US",
"name": "en-US-Studio-O",
},
1: {
"languageCode": "en-US",
"name": "en-US-Studio-M",
},
2: {
"languageCode": "en-UK",
"name": "en-UK-Studio-A",
},
3: {
"languageCode": "en-UK",
"name": "en-UK-Studio-B",
},
4: {
"languageCode": "en-AU",
"name": "en-AU-Studio-A",
},
5: {
"languageCode": "en-AU",
"name": "en-AU-Studio-B",
},
}
return vertex_voices.get(speaker_id, vertex_voices[0])
else:
# Default fallback to OpenAI format for unknown providers
default_voices = {
0: "alloy",
1: "echo",
2: "fable",
3: "onyx",
4: "nova",
5: "shimmer",
}
return default_voices.get(speaker_id, "alloy")