open-notebook/open_notebook/plugins/podcasts.py

from typing import ClassVar, List, Optional

from loguru import logger
from podcastfy.client import generate_podcast
from pydantic import Field, field_validator

from open_notebook.config import DATA_FOLDER
from open_notebook.domain.notebook import ObjectModel


class PodcastEpisode(ObjectModel):
    table_name: ClassVar[str] = "podcast_episode"
    name: str
    template: str
    instructions: str
    text: str
    audio_file: str


class PodcastConfig(ObjectModel):
    table_name: ClassVar[str] = "podcast_config"
    name: str
    podcast_name: str
    podcast_tagline: str
    output_language: str = Field(default="English")
    person1_role: str
    person2_role: str
    conversation_style: List[str]
    engagement_technique: List[str]
    dialogue_structure: List[str]
    transcript_model: Optional[str] = None
    transcript_model_provider: Optional[str] = None
    user_instructions: Optional[str] = None
    ending_message: Optional[str] = None
    wordcount: int = Field(ge=400, le=10000)
    creativity: float = Field(ge=0, le=1)
    provider: str = Field(default="openai")
    voice1: Optional[str] = None
    voice2: Optional[str] = None
    model: str

    def generate_episode(self, episode_name, text, instructions=None):
        self.user_instructions = (
            instructions if instructions else self.user_instructions
        )
        conversation_config = {
            "word_count": self.wordcount,
            "conversation_style": self.conversation_style,
            "roles_person1": self.person1_role,
            "roles_person2": self.person2_role,
            "dialogue_structure": self.dialogue_structure,
            "podcast_name": self.podcast_name,
            "podcast_tagline": self.podcast_tagline,
            "output_language": self.output_language,
            "user_instructions": self.user_instructions,
            "engagement_techniques": self.engagement_technique,
            "creativity": self.creativity,
            "text_to_speech": {
                "output_directories": {
                    "transcripts": f"{DATA_FOLDER}/podcasts/transcripts",
                    "audio": f"{DATA_FOLDER}/podcasts/audio",
                },
                "temp_audio_dir": f"{DATA_FOLDER}/podcasts/audio/tmp",
                "ending_message": "Thank you for listening to this episode. Don't forget to subscribe to our podcast for more interesting conversations.",
                "default_tts_model": self.provider,
                self.provider: {
                    "default_voices": {
                        "question": self.voice1,
                        "answer": self.voice2,
                    },
                    "model": self.model,
                },
                "audio_format": "mp3",
            },
        }

        logger.debug(
            f"Generating episode {episode_name} with config {conversation_config}"
        )

        api_key_label = None
        llm_model_name = None
        if self.transcript_model_provider:
            if self.transcript_model_provider == "openai":
                api_key_label = "OPENAI_API_KEY"
                llm_model_name = self.transcript_model
            elif self.transcript_model_provider == "anthropic":
                api_key_label = "ANTHROPIC_API_KEY"
                llm_model_name = self.transcript_model
            elif self.transcript_model_provider == "gemini":
                api_key_label = "GEMINI_API_KEY"
                llm_model_name = self.transcript_model

        audio_file = generate_podcast(
            conversation_config=conversation_config,
            text=text,
            tts_model=self.provider,
            llm_model_name=llm_model_name,
            api_key_label=api_key_label,
        )
        episode = PodcastEpisode(
            name=episode_name,
            template=self.name,
            instructions=instructions,
            text=str(text),
            audio_file=audio_file,
        )
        episode.save()

    @field_validator(
        "name", "podcast_name", "podcast_tagline", "output_language", "model"
    )
    @classmethod
    def validate_required_strings(cls, value: str, field) -> str:
        if value is None or value.strip() == "":
            raise ValueError(f"{field.field_name} cannot be None or empty string")
        return value.strip()

    @field_validator("wordcount")
    def validate_wordcount(cls, value):
        if not 400 <= value <= 6000:
            raise ValueError("Wordcount must be between 400 and 10000")
        return value

    @field_validator("creativity")
    def validate_creativity(cls, value):
        if not 0 <= value <= 1:
            raise ValueError("Creativity must be between 0 and 1")
        return value


conversation_styles = [
    "Analytical",
    "Argumentative",
    "Informative",
    "Humorous",
    "Casual",
    "Formal",
    "Inspirational",
    "Debate-style",
    "Interview-style",
    "Storytelling",
    "Reflective",
    "Narrative",
    "Satirical",
    "Educational",
    "Conversational",
    "Critical",
    "Empathetic",
    "Philosophical",
    "Speculative",
    "Motivational",
    "Fun",
    "Technical",
    "Light-hearted",
    "Serious",
    "Investigative",
    "Debunking",
    "Collaborative",
    "Didactic",
    "Thought-provoking",
    "Controversial",
    "Skeptical",
    "Optimistic",
    "Pessimistic",
    "Objective",
    "Subjective",
    "Sarcastic",
    "Emotional",
    "Exploratory",
    "Friendly",
    "Fast-paced",
    "Slow-paced",
    "Introspective",
    "Open-ended",
    "Affirmative",
    "Dissenting",
]

# Dialogue Structures
dialogue_structures = [
    "Topic Introduction",
    "Opening Monologue",
    "Guest Introduction",
    "Icebreakers",
    "Historical Context",
    "Defining Terms",
    "Problem Statement",
    "Overview of the Issue",
    "Deep Dive into Subtopics",
    "Pro Arguments",
    "Con Arguments",
    "Cross-examination",
    "Rebuttal",
    "Expert Interviews",
    "Panel Discussion",
    "Case Studies",
    "Myth Busting",
    "Debunking Misconceptions",
    "Audience Questions",
    "Q&A Session",
    "Listener Feedback",
    "Rapid-fire Questions",
    "Summary of Key Points",
    "Recap",
    "Key Takeaways",
    "Actionable Tips",
    "Call to Action",
    "Future Outlook",
    "Teaser for Next Episode",
    "Closing Remarks",
    "Thank You and Credits",
    "Outtakes or Bloopers",
    "Sponsor Messages",
    "Social Media Shout-outs",
    "Resource Recommendations",
    "Feedback Request",
    "Lightning Round",
    "Behind-the-Scenes Insights",
    "Ethical Considerations",
    "Fact-checking Segment",
    "Trending Topics",
    "Closing Inspirational Quote",
    "Final Reflections",
    "Debrief",
    "Farewell Messages",
    "Next Episode Preview",
    "Live Reactions",
    "Call-in Segment",
    "Acknowledgements",
    "Transition Segments",
    "Break Segments",
]

# Podcast Participant Roles
participant_roles = [
    "Main Summarizer",
    "Questioner/Clarifier",
    "Optimist",
    "Skeptic",
    "Specialist",
    "Thesis Presenter",
    "Counterargument Provider",
    "Professor",
    "Student",
    "Moderator",
    "Host",
    "Co-host",
    "Expert Guest",
    "Novice",
    "Devil's Advocate",
    "Analyst",
    "Storyteller",
    "Fact-checker",
    "Comedian",
    "Interviewer",
    "Interviewee",
    "Historian",
    "Visionary",
    "Strategist",
    "Critic",
    "Enthusiast",
    "Mediator",
    "Commentator",
    "Researcher",
    "Reporter",
    "Advocate",
    "Influencer",
    "Observer",
    "Listener",
    "Facilitator",
    "Innovator",
    "Debater",
    "Educator",
    "Motivator",
    "Narrator",
    "Explorer",
    "Opponent",
    "Proponent",
    "Philosopher",
    "Engineer",
    "Doctor",
    "Psychologist",
    "Economist",
    "Politician",
    "Scientist",
    "Entrepreneur",
    "Artist",
    "Author",
    "Journalist",
    "Activist",
    "Challenger",
    "Supporter",
    "Mentor",
    "Mentee",
    "Panelist",
    "Audience Representative",
    "Case Study Presenter",
    "Data Analyst",
    "Ethicist",
    "Cultural Critic",
    "Technologist",
    "Environmentalist",
    "Legal Expert",
    "Healthcare Professional",
    "Financial Advisor",
    "Policy Maker",
    "Sociologist",
    "Anthropologist",
    "Myth Buster",
    "Trend Analyst",
    "Futurist",
    "Negotiator",
    "Community Leader",
    "Voice of Reason",
    "Conflict Resolver",
    "Emotional Support",
    "Pragmatist",
    "Idealist",
    "Realist",
    "Satirist",
    "Story Analyst",
    "Language Expert",
    "Historical Witness",
    "Survivor",
    "Inspirational Figure",
    "Cultural Ambassador",
    "Digital Nomad",
    "Remote Correspondent",
    "Field Reporter",
    "Data Scientist",
    "Gamer",
    "Musician",
    "Filmmaker",
]

# Engagement Techniques
engagement_techniques = [
    "Rhetorical Questions",
    "Anecdotes",
    "Analogies",
    "Humor",
    "Metaphors",
    "Storytelling",
    "Quizzes",
    "Polls",
    "Contests/Giveaways",
    "Guest Appearances",
    "Sound Effects",
    "Music Interludes",
    "Shout-outs",
    "Interactive Challenges",
    "Personal Testimonials",
    "Quotes",
    "Jokes",
    "Surprise Elements",
    "Emotional Appeals",
    "Provocative Statements",
    "Irony",
    "Sarcasm",
    "Alliteration",
    "Repetition",
    "Foreshadowing",
    "Cliffhangers",
    "Audience Participation",
    "Sensory Descriptions",
    "Visual Aids (if applicable)",
    "Callbacks to Earlier Points",
    "Pop Culture References",
    "Hyperbole",
    "Parables",
    "Thought Experiments",
    "Puzzles and Riddles",
    "Role-playing",
    "Mock Scenarios",
    "Debates",
    "Sound Bites",
    "Catchphrases",
    "Voice Modulation",
    "Interactive Games",
    "Live Demos",
    "Behind-the-Scenes Insights",
    "Vivid Imagery",
    "Statistics and Facts",
    "Open-ended Questions",
    "Challenges to Assumptions",
    "Evoking Curiosity",
    "Memes (if visual components are included)",
    "Surveys",
    "Testimonials",
    "Provocations",
]