mirror of
https://github.com/patw/AudioSumma.git
synced 2025-04-13 20:39:09 +00:00
Added some silence trimming
This commit is contained in:
parent
f5af4960fc
commit
81e4e165bc
2 changed files with 36 additions and 1 deletions
6
.gitignore
vendored
6
.gitignore
vendored
|
@ -13,6 +13,12 @@ __pycache__/
|
|||
*.wav
|
||||
*.tns
|
||||
|
||||
# Ignore my ffmpeg
|
||||
*.exe
|
||||
|
||||
# Ignore my summary files
|
||||
summary-*.md
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
|
|
31
summarize.py
31
summarize.py
|
@ -1,6 +1,8 @@
|
|||
import os
|
||||
import requests
|
||||
import datetime
|
||||
import tempfile
|
||||
import subprocess
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables from .env file
|
||||
|
@ -48,10 +50,37 @@ def llama_api(prompt):
|
|||
json_output = response.json()
|
||||
return json_output['content']
|
||||
|
||||
# Use ffmpeg to trim silence in wav files, to prevent issues with
|
||||
# whisper.cpp stopping the transcode if it detects a large amount of silence
|
||||
def trim_silence(filename):
|
||||
# Create a temporary file for the output
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
||||
temp_filename = temp_file.name
|
||||
|
||||
# Construct the FFmpeg command
|
||||
ffmpeg_command = [
|
||||
"ffmpeg",
|
||||
"-i", filename,
|
||||
"-af", "silenceremove=stop_threshold=-40dB:stop_duration=1:stop_periods=-1",
|
||||
"-y", # Overwrite output file if it exists
|
||||
temp_filename
|
||||
]
|
||||
|
||||
# Run the FFmpeg command
|
||||
result = subprocess.run(ffmpeg_command, capture_output=True, text=True, check=True)
|
||||
|
||||
# If FFmpeg command was successful, replace the original file
|
||||
os.replace(temp_filename, filename)
|
||||
|
||||
# Iterate over each WAV file and transcode with whisper API
|
||||
wav_files = [f for f in os.listdir(".") if f.endswith(".wav")]
|
||||
for wav_file in wav_files:
|
||||
# Open the WAV file
|
||||
|
||||
# Trim silence on the wav file first
|
||||
print("Trimming silence: " + wav_file)
|
||||
trim_silence(wav_file)
|
||||
|
||||
# Open the WAV file for sending to whisper REST API
|
||||
with open(wav_file, "rb") as file:
|
||||
print("Transcribing: " + wav_file)
|
||||
# Call whisper API to transcode file
|
||||
|
|
Loading…
Add table
Reference in a new issue