mirror of
https://github.com/patw/AudioSumma.git
synced 2025-09-04 19:50:29 +00:00
Used mistral large to re-write the summarize process.
This commit is contained in:
parent
81e4e165bc
commit
3c05cd78dd
1 changed files with 38 additions and 72 deletions
110
summarize.py
110
summarize.py
|
@ -21,23 +21,18 @@ CHUNK_SIZE = int(os.getenv("CHUNK_SIZE"))
|
||||||
TEMPERATURE = float(os.getenv("TEMPERATURE"))
|
TEMPERATURE = float(os.getenv("TEMPERATURE"))
|
||||||
|
|
||||||
def whisper_api(file):
|
def whisper_api(file):
|
||||||
# Whisper supports multiple files, but we're sending one
|
"""Transcribe audio file using Whisper API."""
|
||||||
files = {"file": file}
|
files = {"file": file}
|
||||||
|
|
||||||
# Required API call data
|
|
||||||
api_data = {
|
api_data = {
|
||||||
"temperature": "0.0",
|
"temperature": "0.0",
|
||||||
"response_format": "json"
|
"response_format": "json"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Call API and return text
|
|
||||||
response = requests.post(WHISPERCPP_URL, data=api_data, files=files)
|
response = requests.post(WHISPERCPP_URL, data=api_data, files=files)
|
||||||
return response.json()["text"]
|
return response.json()["text"]
|
||||||
|
|
||||||
def llama_api(prompt):
|
def llama_api(prompt):
|
||||||
# Format prompt before sending
|
"""Generate response using llama.cpp server API."""
|
||||||
formatted_prompt = PROMPT_FORMAT.format(system=SYSTEM_MESSAGE, prompt=prompt)
|
formatted_prompt = PROMPT_FORMAT.format(system=SYSTEM_MESSAGE, prompt=prompt)
|
||||||
|
|
||||||
api_data = {
|
api_data = {
|
||||||
"prompt": formatted_prompt,
|
"prompt": formatted_prompt,
|
||||||
"n_predict": -1,
|
"n_predict": -1,
|
||||||
|
@ -45,19 +40,15 @@ def llama_api(prompt):
|
||||||
"stop": [STOP_TOKEN],
|
"stop": [STOP_TOKEN],
|
||||||
"tokens_cached": 0
|
"tokens_cached": 0
|
||||||
}
|
}
|
||||||
|
|
||||||
response = requests.post(LLAMACPP_URL, headers={"Content-Type": "application/json"}, json=api_data)
|
response = requests.post(LLAMACPP_URL, headers={"Content-Type": "application/json"}, json=api_data)
|
||||||
json_output = response.json()
|
json_output = response.json()
|
||||||
return json_output['content']
|
return json_output['content']
|
||||||
|
|
||||||
# Use ffmpeg to trim silence in wav files, to prevent issues with
|
|
||||||
# whisper.cpp stopping the transcode if it detects a large amount of silence
|
|
||||||
def trim_silence(filename):
|
def trim_silence(filename):
|
||||||
# Create a temporary file for the output
|
"""Trim silence from audio file using FFmpeg."""
|
||||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
||||||
temp_filename = temp_file.name
|
temp_filename = temp_file.name
|
||||||
|
|
||||||
# Construct the FFmpeg command
|
|
||||||
ffmpeg_command = [
|
ffmpeg_command = [
|
||||||
"ffmpeg",
|
"ffmpeg",
|
||||||
"-i", filename,
|
"-i", filename,
|
||||||
|
@ -66,84 +57,59 @@ def trim_silence(filename):
|
||||||
temp_filename
|
temp_filename
|
||||||
]
|
]
|
||||||
|
|
||||||
# Run the FFmpeg command
|
|
||||||
result = subprocess.run(ffmpeg_command, capture_output=True, text=True, check=True)
|
result = subprocess.run(ffmpeg_command, capture_output=True, text=True, check=True)
|
||||||
|
|
||||||
# If FFmpeg command was successful, replace the original file
|
|
||||||
os.replace(temp_filename, filename)
|
os.replace(temp_filename, filename)
|
||||||
|
|
||||||
# Iterate over each WAV file and transcode with whisper API
|
def process_wav_files():
|
||||||
wav_files = [f for f in os.listdir(".") if f.endswith(".wav")]
|
"""Process WAV files: trim silence and transcribe."""
|
||||||
for wav_file in wav_files:
|
wav_files = [f for f in os.listdir(".") if f.endswith(".wav")]
|
||||||
|
for wav_file in wav_files:
|
||||||
|
print("Trimming silence: " + wav_file)
|
||||||
|
trim_silence(wav_file)
|
||||||
|
|
||||||
# Trim silence on the wav file first
|
with open(wav_file, "rb") as file:
|
||||||
print("Trimming silence: " + wav_file)
|
print("Transcribing: " + wav_file)
|
||||||
trim_silence(wav_file)
|
output_text = whisper_api(file)
|
||||||
|
output_file = os.path.splitext(wav_file)[0] + ".tns"
|
||||||
# Open the WAV file for sending to whisper REST API
|
with open(output_file, "w") as output:
|
||||||
with open(wav_file, "rb") as file:
|
output.write(output_text)
|
||||||
print("Transcribing: " + wav_file)
|
|
||||||
# Call whisper API to transcode file
|
|
||||||
output_text = whisper_api(file)
|
|
||||||
|
|
||||||
# Generate the output file name by replacing the extension with .tns
|
|
||||||
output_file = os.path.splitext(wav_file)[0] + ".tns"
|
|
||||||
|
|
||||||
# Write the output text to the file
|
|
||||||
with open(output_file, "w") as output:
|
|
||||||
output.write(output_text)
|
|
||||||
|
|
||||||
# Chunk the full transcript into multiple parts to fit in the context window
|
|
||||||
# and allow for better reasoning capability
|
|
||||||
def chunk_transcript(string, chunk_size):
|
def chunk_transcript(string, chunk_size):
|
||||||
|
"""Chunk the transcript to fit in the context window."""
|
||||||
chunks = []
|
chunks = []
|
||||||
lines = string.split("\n") # Split the string on newline characters
|
lines = string.split("\n")
|
||||||
current_chunk = ""
|
current_chunk = ""
|
||||||
for line in lines:
|
for line in lines:
|
||||||
current_chunk += line # Build up the string until the chunk size is reached
|
current_chunk += line
|
||||||
if len(current_chunk) >= chunk_size:
|
if len(current_chunk) >= chunk_size:
|
||||||
chunks.append(current_chunk)
|
chunks.append(current_chunk)
|
||||||
current_chunk = ""
|
current_chunk = ""
|
||||||
if current_chunk: # Add the last chunk if it's not empty
|
if current_chunk:
|
||||||
chunks.append(current_chunk)
|
chunks.append(current_chunk)
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
# Get the current date in yyyymmdd format
|
def summarize_transcripts():
|
||||||
today = datetime.datetime.now().strftime('%Y%m%d')
|
"""Summarize transcript files."""
|
||||||
|
today = datetime.datetime.now().strftime('%Y%m%d')
|
||||||
|
summary_filename = "summary-" + today + ".md"
|
||||||
|
transcript_files = [f for f in os.listdir(".") if f.endswith(".tns")]
|
||||||
|
|
||||||
# Modify the filename by appending the current date
|
for transcript in transcript_files:
|
||||||
summary_filename = "summary-" + today + ".md"
|
print("Summarizing: " + transcript)
|
||||||
|
with open(transcript, "r") as file:
|
||||||
|
transcript_data = file.read()
|
||||||
|
chunked_data = chunk_transcript(transcript_data, CHUNK_SIZE)
|
||||||
|
|
||||||
# Get the list of transcript files in the current directory
|
|
||||||
transcript_files = [f for f in os.listdir(".") if f.endswith(".tns")]
|
|
||||||
|
|
||||||
# Iterate over each WAV file
|
|
||||||
for transcript in transcript_files:
|
|
||||||
print("Summarizing: " + transcript)
|
|
||||||
|
|
||||||
# Open the WAV file
|
|
||||||
with open(transcript, "r") as file:
|
|
||||||
transcript_data = file.read()
|
|
||||||
|
|
||||||
# chunk the transcript so we don't blow out the context window
|
|
||||||
chunked_data = chunk_transcript(transcript_data, CHUNK_SIZE)
|
|
||||||
|
|
||||||
# Iterate through the chunks, and summarize them
|
|
||||||
for i, chunk in enumerate(chunked_data):
|
|
||||||
with open(summary_filename, "a") as md_file:
|
with open(summary_filename, "a") as md_file:
|
||||||
# Generate call summary
|
for i, chunk in enumerate(chunked_data):
|
||||||
summary_prompt = SUMMARY_PROMPT.format(chunk=chunk)
|
summary = llama_api(SUMMARY_PROMPT.format(chunk=chunk))
|
||||||
summary = llama_api(summary_prompt)
|
facts = llama_api(FACT_PROMPT.format(chunk=chunk))
|
||||||
|
sentiment = llama_api(SENTIMENT_PROMPT.format(chunk=chunk))
|
||||||
|
|
||||||
# Generate fact summary
|
md_file.write(f"# Call Transcript - {transcript} - Part {i + 1}\n\nSummary: {summary}\n\nFacts:\n{facts}\n\nSentiment: {sentiment}\n\n---\n")
|
||||||
fact_prompt = FACT_PROMPT.format(chunk=chunk)
|
|
||||||
facts = llama_api(fact_prompt)
|
|
||||||
|
|
||||||
# Generate call sentiment
|
print("Summarizing complete")
|
||||||
sentiment_prompt = SENTIMENT_PROMPT.format(chunk=chunk)
|
|
||||||
sentiment = llama_api(sentiment_prompt)
|
|
||||||
|
|
||||||
# Write the notes
|
if __name__ == "__main__":
|
||||||
md_file.write(f"# Call Transcript - {transcript} - Part {i + 1}\n\nSummary: {summary}\n\nFacts:\n{facts}\n\nSentiment: {sentiment}\n\n---\n")
|
process_wav_files()
|
||||||
|
summarize_transcripts()
|
||||||
print("Summarizing complete")
|
|
Loading…
Add table
Reference in a new issue