Added some silence trimming

2025-04-13 20:39:09 +00:00 · 2024-07-30 11:56:40 -04:00 · 2024-07-30 11:56:40 -04:00 · 81e4e165bc
commit 81e4e165bc
parent f5af4960fc
2 changed files with 36 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -13,6 +13,12 @@ __pycache__/
 *.wav
 *.tns

+# Ignore my ffmpeg
+*.exe
+
+# Ignore my summary files
+summary-*.md
+
 # Distribution / packaging
 .Python
 build/
--- a/summarize.py
+++ b/summarize.py
@ -1,6 +1,8 @@
 import os
 import requests
 import datetime
+import tempfile
+import subprocess
 from dotenv import load_dotenv

 # Load environment variables from .env file
@ -48,10 +50,37 @@ def llama_api(prompt):
    json_output = response.json()
    return json_output['content']

+# Use ffmpeg to trim silence in wav files, to prevent issues with 
+# whisper.cpp stopping the transcode if it detects a large amount of silence
+def trim_silence(filename):
+    # Create a temporary file for the output
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+        temp_filename = temp_file.name
+
+    # Construct the FFmpeg command
+    ffmpeg_command = [
+        "ffmpeg",
+        "-i", filename,
+        "-af", "silenceremove=stop_threshold=-40dB:stop_duration=1:stop_periods=-1",
+        "-y",  # Overwrite output file if it exists
+        temp_filename
+    ]
+
+    # Run the FFmpeg command
+    result = subprocess.run(ffmpeg_command, capture_output=True, text=True, check=True)
+
+    # If FFmpeg command was successful, replace the original file
+    os.replace(temp_filename, filename)
+
 # Iterate over each WAV file and transcode with whisper API
 wav_files = [f for f in os.listdir(".") if f.endswith(".wav")]
 for wav_file in wav_files:
-    # Open the WAV file
+
+    # Trim silence on the wav file first
+    print("Trimming silence: " + wav_file)
+    trim_silence(wav_file)
+    
+    # Open the WAV file for sending to whisper REST API
    with open(wav_file, "rb") as file:
        print("Transcribing: " + wav_file)
        # Call whisper API to transcode file