AudioSumma/summarize.py

import os
import requests
import datetime
import tempfile
import subprocess
from dotenv import load_dotenv

# Use local models with the OpenAI library and a custom baseurl
from openai import OpenAI

# Load environment variables from .env file
load_dotenv()

# Load settings from environment
WHISPERCPP_URL = os.getenv("WHISPERCPP_URL")
LLAMACPP_URL = os.getenv("LLAMACPP_URL")
SYSTEM_MESSAGE = os.getenv("SYSTEM_MESSAGE")
SUMMARY_PROMPT = os.getenv("SUMMARY_PROMPT")
FACT_PROMPT = os.getenv("FACT_PROMPT")
SENTIMENT_PROMPT = os.getenv("SENTIMENT_PROMPT")
CHUNK_SIZE = int(os.getenv("CHUNK_SIZE"))
TEMPERATURE = float(os.getenv("TEMPERATURE"))
TOP_P = float(os.getenv("TOP_P"))
MAX_TOKENS = float(os.getenv("MAX_TOKENS"))

def whisper_api(file):
    """Transcribe audio file using Whisper API."""
    files = {"file": file}
    api_data = {
        "temperature": "0.0",
        "response_format": "json"
    }
    response = requests.post(WHISPERCPP_URL, data=api_data, files=files)
    return response.json()["text"]

def llm_local(prompt):
    client = OpenAI(api_key="doesntmatter", base_url=LLAMACPP_URL)
    messages=[{"role": "system", "content": SYSTEM_MESSAGE},{"role": "user", "content": prompt}]
    response = client.chat.completions.create(model="whatever", max_tokens=MAX_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, messages=messages)
    return response.choices[0].message.content

def trim_silence(filename):
    """Trim silence from audio file using FFmpeg."""
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
        temp_filename = temp_file.name

    ffmpeg_command = [
        "ffmpeg",
        "-i", filename,
        "-af", "silenceremove=stop_threshold=-40dB:stop_duration=1:stop_periods=-1",
        "-y",  # Overwrite output file if it exists
        temp_filename
    ]

    result = subprocess.run(ffmpeg_command, capture_output=True, text=True, check=True)
    os.replace(temp_filename, filename)

def process_wav_files():
    """Process WAV files: trim silence and transcribe."""
    wav_files = [f for f in os.listdir(".") if f.endswith(".wav")]
    for wav_file in wav_files:
        # Generate the expected transcript filename
        transcript_file = os.path.splitext(wav_file)[0] + ".tns"

        # Check if transcript already exists
        if os.path.exists(transcript_file):
            print(f"Transcript already exists for {wav_file}, skipping transcription")
            continue

        print("Trimming silence: " + wav_file)
        trim_silence(wav_file)

        with open(wav_file, "rb") as file:
            print("Transcribing: " + wav_file)
            output_text = whisper_api(file)
            output_file = os.path.splitext(wav_file)[0] + ".tns"
            with open(output_file, "w") as output:
                output.write(output_text)

def chunk_transcript(string, chunk_size):
    """Chunk the transcript to fit in the context window."""
    chunks = []
    lines = string.split("\n")
    current_chunk = ""
    for line in lines:
        current_chunk += line
        if len(current_chunk) >= chunk_size:
            chunks.append(current_chunk)
            current_chunk = ""
    if current_chunk:
        chunks.append(current_chunk)
    return chunks

def summarize_transcripts():
    """Summarize transcript files."""
    today = datetime.datetime.now().strftime('%Y%m%d')
    summary_filename = "summary-" + today + ".md"
    transcript_files = [f for f in os.listdir(".") if f.endswith(".tns")]

    for transcript in transcript_files:
        print("Summarizing: " + transcript)
        with open(transcript, "r") as file:
            transcript_data = file.read()
            chunked_data = chunk_transcript(transcript_data, CHUNK_SIZE)

            with open(summary_filename, "a") as md_file:
                for i, chunk in enumerate(chunked_data):
                    print("Processing part " + str(i))
                    summary = llm_local(SUMMARY_PROMPT.format(chunk=chunk))
                    facts = llm_local(FACT_PROMPT.format(chunk=chunk))
                    sentiment = llm_local(SENTIMENT_PROMPT.format(chunk=chunk))

                    md_file.write(f"# Call Transcript - {transcript} - Part {i + 1}\n\nSummary: {summary}\n\nFacts:\n{facts}\n\nSentiment: {sentiment}\n\n---\n")

    print("Summarizing complete")

if __name__ == "__main__":
    process_wav_files()
    summarize_transcripts()
No results found.