mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-17 01:29:44 +00:00
refactor: integrate voice selection utility in podcast audio generation
- remove NavUser component and update sidebar layout;
This commit is contained in:
parent
d851e1bd6d
commit
2fb86ad687
4 changed files with 71 additions and 191 deletions
|
@ -16,6 +16,7 @@ from app.services.llm_service import get_user_long_context_llm
|
|||
from .configuration import Configuration
|
||||
from .prompts import get_podcast_generation_prompt
|
||||
from .state import PodcastTranscriptEntry, PodcastTranscripts, State
|
||||
from .utils import get_voice_for_provider
|
||||
|
||||
|
||||
async def create_podcast_transcript(
|
||||
|
@ -121,16 +122,6 @@ async def create_merged_podcast_audio(
|
|||
output_path = f"podcasts/{session_id}_podcast.mp3"
|
||||
os.makedirs("podcasts", exist_ok=True)
|
||||
|
||||
# Map of speaker_id to voice
|
||||
voice_mapping = {
|
||||
0: "alloy", # Default/intro voice
|
||||
1: "echo", # First speaker
|
||||
# 2: "fable", # Second speaker
|
||||
# 3: "onyx", # Third speaker
|
||||
# 4: "nova", # Fourth speaker
|
||||
# 5: "shimmer" # Fifth speaker
|
||||
}
|
||||
|
||||
# Generate audio for each transcript segment
|
||||
audio_files = []
|
||||
|
||||
|
@ -144,7 +135,7 @@ async def create_merged_podcast_audio(
|
|||
dialog = segment.get("dialog", "")
|
||||
|
||||
# Select voice based on speaker_id
|
||||
voice = voice_mapping.get(speaker_id, "alloy")
|
||||
voice = get_voice_for_provider(app_config.TTS_SERVICE, speaker_id)
|
||||
|
||||
# Generate a unique filename for this segment
|
||||
filename = f"{temp_dir}/{session_id}_{index}.mp3"
|
||||
|
|
69
surfsense_backend/app/agents/podcaster/utils.py
Normal file
69
surfsense_backend/app/agents/podcaster/utils.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
def get_voice_for_provider(provider: str, speaker_id: int) -> dict | str:
|
||||
"""
|
||||
Get the appropriate voice configuration based on the TTS provider and speaker ID.
|
||||
|
||||
Args:
|
||||
provider: The TTS provider (e.g., "openai/tts-1", "vertex_ai/test")
|
||||
speaker_id: The ID of the speaker (0-5)
|
||||
|
||||
Returns:
|
||||
Voice configuration - string for OpenAI, dict for Vertex AI
|
||||
"""
|
||||
# Extract provider type from the model string
|
||||
provider_type = (
|
||||
provider.split("/")[0].lower() if "/" in provider else provider.lower()
|
||||
)
|
||||
|
||||
if provider_type == "openai":
|
||||
# OpenAI voice mapping - simple string values
|
||||
openai_voices = {
|
||||
0: "alloy", # Default/intro voice
|
||||
1: "echo", # First speaker
|
||||
2: "fable", # Second speaker
|
||||
3: "onyx", # Third speaker
|
||||
4: "nova", # Fourth speaker
|
||||
5: "shimmer", # Fifth speaker
|
||||
}
|
||||
return openai_voices.get(speaker_id, "alloy")
|
||||
|
||||
elif provider_type == "vertex_ai":
|
||||
# Vertex AI voice mapping - dict with languageCode and name
|
||||
vertex_voices = {
|
||||
0: {
|
||||
"languageCode": "en-US",
|
||||
"name": "en-US-Studio-O",
|
||||
},
|
||||
1: {
|
||||
"languageCode": "en-US",
|
||||
"name": "en-US-Studio-M",
|
||||
},
|
||||
2: {
|
||||
"languageCode": "en-UK",
|
||||
"name": "en-UK-Studio-A",
|
||||
},
|
||||
3: {
|
||||
"languageCode": "en-UK",
|
||||
"name": "en-UK-Studio-B",
|
||||
},
|
||||
4: {
|
||||
"languageCode": "en-AU",
|
||||
"name": "en-AU-Studio-A",
|
||||
},
|
||||
5: {
|
||||
"languageCode": "en-AU",
|
||||
"name": "en-AU-Studio-B",
|
||||
},
|
||||
}
|
||||
return vertex_voices.get(speaker_id, vertex_voices[0])
|
||||
|
||||
else:
|
||||
# Default fallback to OpenAI format for unknown providers
|
||||
default_voices = {
|
||||
0: "alloy",
|
||||
1: "echo",
|
||||
2: "fable",
|
||||
3: "onyx",
|
||||
4: "nova",
|
||||
5: "shimmer",
|
||||
}
|
||||
return default_voices.get(speaker_id, "alloy")
|
Loading…
Add table
Add a link
Reference in a new issue