After merging summarize into meetings removed meetings.py

2026-04-28 03:29:53 +00:00 · 2025-05-09 09:37:26 -04:00 · 2025-05-09 09:37:26 -04:00 · 2fe4893296
commit 2fe4893296
parent 04659ed489
3 changed files with 4 additions and 119 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,6 +3,9 @@ __pycache__/
 *.py[cod]
 *$py.class

+# Stupid mac stuff
+.DS_Store
+
 # C extensions
 *.so

@ -140,3 +143,4 @@ dmypy.json

 # Pyre type checker
 .pyre/
+.aider*
--- a/meetings.sh
+++ b/meetings.sh
--- a/summarize.py
+++ b/summarize.py
@ -1,119 +0,0 @@
-import os
-import requests
-import datetime
-import tempfile
-import subprocess
-from dotenv import load_dotenv
-
-# Use local models with the OpenAI library and a custom baseurl
-from openai import OpenAI
-
-# Load environment variables from .env file
-load_dotenv()
-
-# Load settings from environment
-WHISPERCPP_URL = os.getenv("WHISPERCPP_URL")
-LLAMACPP_URL = os.getenv("LLAMACPP_URL")
-SYSTEM_MESSAGE = os.getenv("SYSTEM_MESSAGE")
-SUMMARY_PROMPT = os.getenv("SUMMARY_PROMPT")
-FACT_PROMPT = os.getenv("FACT_PROMPT")
-SENTIMENT_PROMPT = os.getenv("SENTIMENT_PROMPT")
-CHUNK_SIZE = int(os.getenv("CHUNK_SIZE"))
-TEMPERATURE = float(os.getenv("TEMPERATURE"))
-TOP_P = float(os.getenv("TOP_P"))
-MAX_TOKENS = float(os.getenv("MAX_TOKENS"))
-
-def whisper_api(file):
-    """Transcribe audio file using Whisper API."""
-    files = {"file": file}
-    api_data = {
-        "temperature": "0.0",
-        "response_format": "json"
-    }
-    response = requests.post(WHISPERCPP_URL, data=api_data, files=files)
-    return response.json()["text"]
-
-def llm_local(prompt):
-    client = OpenAI(api_key="doesntmatter", base_url=LLAMACPP_URL)
-    messages=[{"role": "system", "content": SYSTEM_MESSAGE},{"role": "user", "content": prompt}]
-    response = client.chat.completions.create(model="whatever", max_tokens=MAX_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, messages=messages)
-    return response.choices[0].message.content
-
-def trim_silence(filename):
-    """Trim silence from audio file using FFmpeg."""
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-        temp_filename = temp_file.name
-
-    ffmpeg_command = [
-        "ffmpeg",
-        "-i", filename,
-        "-af", "silenceremove=stop_threshold=-40dB:stop_duration=1:stop_periods=-1",
-        "-y",  # Overwrite output file if it exists
-        temp_filename
-    ]
-
-    result = subprocess.run(ffmpeg_command, capture_output=True, text=True, check=True)
-    os.replace(temp_filename, filename)
-
-def process_wav_files():
-    """Process WAV files: trim silence and transcribe."""
-    wav_files = [f for f in os.listdir(".") if f.endswith(".wav")]
-    for wav_file in wav_files:
-        # Generate the expected transcript filename
-        transcript_file = os.path.splitext(wav_file)[0] + ".tns"
-
-        # Check if transcript already exists
-        if os.path.exists(transcript_file):
-            print(f"Transcript already exists for {wav_file}, skipping transcription")
-            continue
-
-        print("Trimming silence: " + wav_file)
-        trim_silence(wav_file)
-
-        with open(wav_file, "rb") as file:
-            print("Transcribing: " + wav_file)
-            output_text = whisper_api(file)
-            output_file = os.path.splitext(wav_file)[0] + ".tns"
-            with open(output_file, "w") as output:
-                output.write(output_text)
-
-def chunk_transcript(string, chunk_size):
-    """Chunk the transcript to fit in the context window."""
-    chunks = []
-    lines = string.split("\n")
-    current_chunk = ""
-    for line in lines:
-        current_chunk += line
-        if len(current_chunk) >= chunk_size:
-            chunks.append(current_chunk)
-            current_chunk = ""
-    if current_chunk:
-        chunks.append(current_chunk)
-    return chunks
-
-def summarize_transcripts():
-    """Summarize transcript files."""
-    today = datetime.datetime.now().strftime('%Y%m%d')
-    summary_filename = "summary-" + today + ".md"
-    transcript_files = [f for f in os.listdir(".") if f.endswith(".tns")]
-
-    for transcript in transcript_files:
-        print("Summarizing: " + transcript)
-        with open(transcript, "r") as file:
-            transcript_data = file.read()
-            chunked_data = chunk_transcript(transcript_data, CHUNK_SIZE)
-
-            with open(summary_filename, "a") as md_file:
-                for i, chunk in enumerate(chunked_data):
-                    print("Processing part " + str(i))
-                    summary = llm_local(SUMMARY_PROMPT.format(chunk=chunk))
-                    facts = llm_local(FACT_PROMPT.format(chunk=chunk))
-                    sentiment = llm_local(SENTIMENT_PROMPT.format(chunk=chunk))
-
-                    md_file.write(f"# Call Transcript - {transcript} - Part {i + 1}\n\nSummary: {summary}\n\nFacts:\n{facts}\n\nSentiment: {sentiment}\n\n---\n")
-
-    print("Summarizing complete")
-
-if __name__ == "__main__":
-    process_wav_files()
-    summarize_transcripts()