After merging summarize into meetings removed meetings.py

2025-09-02 18:50:21 +00:00 · 2025-05-09 09:37:26 -04:00 · 2025-05-09 09:37:26 -04:00 · 2fe4893296
commit 2fe4893296
parent 04659ed489
3 changed files with 4 additions and 119 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,6 +3,9 @@ __pycache__/
 *.py[cod]
 *$py.class
 # Stupid mac stuff
 .DS_Store
 # C extensions
 *.so
@ -140,3 +143,4 @@ dmypy.json
 # Pyre type checker
 .pyre/
 .aider*
--- a/meetings.sh
+++ b/meetings.sh
--- a/summarize.py
+++ b/summarize.py
@ -1,119 +0,0 @@
 import os
 import requests
 import datetime
 import tempfile
 import subprocess
 from dotenv import load_dotenv
 # Use local models with the OpenAI library and a custom baseurl
 from openai import OpenAI
 # Load environment variables from .env file
 load_dotenv()
 # Load settings from environment
 WHISPERCPP_URL = os.getenv("WHISPERCPP_URL")
 LLAMACPP_URL = os.getenv("LLAMACPP_URL")
 SYSTEM_MESSAGE = os.getenv("SYSTEM_MESSAGE")
 SUMMARY_PROMPT = os.getenv("SUMMARY_PROMPT")
 FACT_PROMPT = os.getenv("FACT_PROMPT")
 SENTIMENT_PROMPT = os.getenv("SENTIMENT_PROMPT")
 CHUNK_SIZE = int(os.getenv("CHUNK_SIZE"))
 TEMPERATURE = float(os.getenv("TEMPERATURE"))
 TOP_P = float(os.getenv("TOP_P"))
 MAX_TOKENS = float(os.getenv("MAX_TOKENS"))
 def whisper_api(file):
    """Transcribe audio file using Whisper API."""
    files = {"file": file}
    api_data = {
        "temperature": "0.0",
        "response_format": "json"
    }
    response = requests.post(WHISPERCPP_URL, data=api_data, files=files)
    return response.json()["text"]
 def llm_local(prompt):
    client = OpenAI(api_key="doesntmatter", base_url=LLAMACPP_URL)
    messages=[{"role": "system", "content": SYSTEM_MESSAGE},{"role": "user", "content": prompt}]
    response = client.chat.completions.create(model="whatever", max_tokens=MAX_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, messages=messages)
    return response.choices[0].message.content
 def trim_silence(filename):
    """Trim silence from audio file using FFmpeg."""
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
        temp_filename = temp_file.name
    ffmpeg_command = [
        "ffmpeg",
        "-i", filename,
        "-af", "silenceremove=stop_threshold=-40dB:stop_duration=1:stop_periods=-1",
        "-y",  # Overwrite output file if it exists
        temp_filename
    ]
    result = subprocess.run(ffmpeg_command, capture_output=True, text=True, check=True)
    os.replace(temp_filename, filename)
 def process_wav_files():
    """Process WAV files: trim silence and transcribe."""
    wav_files = [f for f in os.listdir(".") if f.endswith(".wav")]
    for wav_file in wav_files:
        # Generate the expected transcript filename
        transcript_file = os.path.splitext(wav_file)[0] + ".tns"
        # Check if transcript already exists
        if os.path.exists(transcript_file):
            print(f"Transcript already exists for {wav_file}, skipping transcription")
            continue
        print("Trimming silence: " + wav_file)
        trim_silence(wav_file)
        with open(wav_file, "rb") as file:
            print("Transcribing: " + wav_file)
            output_text = whisper_api(file)
            output_file = os.path.splitext(wav_file)[0] + ".tns"
            with open(output_file, "w") as output:
                output.write(output_text)
 def chunk_transcript(string, chunk_size):
    """Chunk the transcript to fit in the context window."""
    chunks = []
    lines = string.split("\n")
    current_chunk = ""
    for line in lines:
        current_chunk += line
        if len(current_chunk) >= chunk_size:
            chunks.append(current_chunk)
            current_chunk = ""
    if current_chunk:
        chunks.append(current_chunk)
    return chunks
 def summarize_transcripts():
    """Summarize transcript files."""
    today = datetime.datetime.now().strftime('%Y%m%d')
    summary_filename = "summary-" + today + ".md"
    transcript_files = [f for f in os.listdir(".") if f.endswith(".tns")]
    for transcript in transcript_files:
        print("Summarizing: " + transcript)
        with open(transcript, "r") as file:
            transcript_data = file.read()
            chunked_data = chunk_transcript(transcript_data, CHUNK_SIZE)
            with open(summary_filename, "a") as md_file:
                for i, chunk in enumerate(chunked_data):
                    print("Processing part " + str(i))
                    summary = llm_local(SUMMARY_PROMPT.format(chunk=chunk))
                    facts = llm_local(FACT_PROMPT.format(chunk=chunk))
                    sentiment = llm_local(SENTIMENT_PROMPT.format(chunk=chunk))
                    md_file.write(f"# Call Transcript - {transcript} - Part {i + 1}\n\nSummary: {summary}\n\nFacts:\n{facts}\n\nSentiment: {sentiment}\n\n---\n")
    print("Summarizing complete")
 if __name__ == "__main__":
    process_wav_files()
    summarize_transcripts()