mirror of
https://github.com/patw/AudioSumma.git
synced 2025-04-16 13:59:08 +00:00
Added some silence trimming
This commit is contained in:
parent
f5af4960fc
commit
81e4e165bc
2 changed files with 36 additions and 1 deletions
6
.gitignore
vendored
6
.gitignore
vendored
|
@ -13,6 +13,12 @@ __pycache__/
|
||||||
*.wav
|
*.wav
|
||||||
*.tns
|
*.tns
|
||||||
|
|
||||||
|
# Ignore my ffmpeg
|
||||||
|
*.exe
|
||||||
|
|
||||||
|
# Ignore my summary files
|
||||||
|
summary-*.md
|
||||||
|
|
||||||
# Distribution / packaging
|
# Distribution / packaging
|
||||||
.Python
|
.Python
|
||||||
build/
|
build/
|
||||||
|
|
31
summarize.py
31
summarize.py
|
@ -1,6 +1,8 @@
|
||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
import datetime
|
import datetime
|
||||||
|
import tempfile
|
||||||
|
import subprocess
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
# Load environment variables from .env file
|
# Load environment variables from .env file
|
||||||
|
@ -48,10 +50,37 @@ def llama_api(prompt):
|
||||||
json_output = response.json()
|
json_output = response.json()
|
||||||
return json_output['content']
|
return json_output['content']
|
||||||
|
|
||||||
|
# Use ffmpeg to trim silence in wav files, to prevent issues with
|
||||||
|
# whisper.cpp stopping the transcode if it detects a large amount of silence
|
||||||
|
def trim_silence(filename):
|
||||||
|
# Create a temporary file for the output
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
||||||
|
temp_filename = temp_file.name
|
||||||
|
|
||||||
|
# Construct the FFmpeg command
|
||||||
|
ffmpeg_command = [
|
||||||
|
"ffmpeg",
|
||||||
|
"-i", filename,
|
||||||
|
"-af", "silenceremove=stop_threshold=-40dB:stop_duration=1:stop_periods=-1",
|
||||||
|
"-y", # Overwrite output file if it exists
|
||||||
|
temp_filename
|
||||||
|
]
|
||||||
|
|
||||||
|
# Run the FFmpeg command
|
||||||
|
result = subprocess.run(ffmpeg_command, capture_output=True, text=True, check=True)
|
||||||
|
|
||||||
|
# If FFmpeg command was successful, replace the original file
|
||||||
|
os.replace(temp_filename, filename)
|
||||||
|
|
||||||
# Iterate over each WAV file and transcode with whisper API
|
# Iterate over each WAV file and transcode with whisper API
|
||||||
wav_files = [f for f in os.listdir(".") if f.endswith(".wav")]
|
wav_files = [f for f in os.listdir(".") if f.endswith(".wav")]
|
||||||
for wav_file in wav_files:
|
for wav_file in wav_files:
|
||||||
# Open the WAV file
|
|
||||||
|
# Trim silence on the wav file first
|
||||||
|
print("Trimming silence: " + wav_file)
|
||||||
|
trim_silence(wav_file)
|
||||||
|
|
||||||
|
# Open the WAV file for sending to whisper REST API
|
||||||
with open(wav_file, "rb") as file:
|
with open(wav_file, "rb") as file:
|
||||||
print("Transcribing: " + wav_file)
|
print("Transcribing: " + wav_file)
|
||||||
# Call whisper API to transcode file
|
# Call whisper API to transcode file
|
||||||
|
|
Loading…
Add table
Reference in a new issue