import sys import os import pyaudio import wave import threading import requests import datetime import tempfile import subprocess import json from openai import OpenAI from PyQt5.QtWidgets import (QApplication, QWidget, QPushButton, QVBoxLayout, QHBoxLayout, QInputDialog, QLabel, QComboBox, QDialog, QFormLayout, QLineEdit, QDoubleSpinBox, QSpinBox) from PyQt5.QtGui import QIcon, QPixmap class RecordingApp(QWidget): def __init__(self): super().__init__() self.recording_pixmap = QPixmap("recording.png") self.not_recording_pixmap = QPixmap("notrecording.png") self.is_recording = False self.audio_thread = None self.stream = None self.p = None self.wf = None # Configuration with defaults from sample.env self.config = { "WHISPERCPP_URL": "http://localhost:8081/inference", "LLAMACPP_URL": "http://localhost:8080", "SYSTEM_MESSAGE": "You are a friendly chatbot that summarizes call transcripts", "SUMMARY_PROMPT": "Call Transcript: {chunk}\n\nInstruction: Summarize the above call transcript but DO NOT MENTION THE TRANSCRIPT", "FACT_PROMPT": "Call Transcript: {chunk}\n\nInstruction: Summarize all the facts in the transcript, one per line bullet point", "SENTIMENT_PROMPT": "Call Transcript: {chunk}\n\nInstruction: Summarize the sentiment for topics in the above call transcript but DO NOT MENTION THE TRANSCRIPT", "CHUNK_SIZE": 12288, "TEMPERATURE": 0.6, "TOP_P": 0.9, "MAX_TOKENS": 2000 } # Try to load saved config self.load_config() self.initUI() def initUI(self): self.setWindowTitle('Meeting Recorder') self.setGeometry(500, 500, 500, 150) # Main vertical layout main_layout = QVBoxLayout() # Horizontal layout for the status label and device combo box top_layout = QHBoxLayout() self.status_label = QLabel(self) self.status_label.setPixmap(self.not_recording_pixmap) top_layout.addWidget(self.status_label) self.device_combo = QComboBox(self) self.device_combo.addItems(self.get_device_names()) top_layout.addWidget(self.device_combo) main_layout.addLayout(top_layout) # Horizontal layout for the buttons button_layout = QHBoxLayout() self.settings_button = QPushButton('Settings', self) self.settings_button.clicked.connect(self.show_settings) button_layout.addWidget(self.settings_button) self.record_button = QPushButton('Record', self) self.record_button.clicked.connect(self.toggle_recording) button_layout.addWidget(self.record_button) self.transcribe_button = QPushButton('Transcribe', self) self.transcribe_button.clicked.connect(self.transcribe) button_layout.addWidget(self.transcribe_button) self.clean_button = QPushButton('Clean', self) self.clean_button.clicked.connect(self.clean) button_layout.addWidget(self.clean_button) main_layout.addLayout(button_layout) self.setLayout(main_layout) def get_device_names(self): p = pyaudio.PyAudio() info = p.get_host_api_info_by_index(0) num_devices = info.get('deviceCount') device_names = [] for i in range(num_devices): device_info = p.get_device_info_by_host_api_device_index(0, i) if device_info.get('maxInputChannels') > 0: device_names.append(device_info.get('name')) p.terminate() return device_names def toggle_recording(self): if not self.is_recording: self.start_recording() else: self.stop_recording() def start_recording(self): filename, ok = QInputDialog.getText(self, 'Input Dialog', 'Enter filename:') if ok and filename: self.is_recording = True self.record_button.setText('Stop Recording') self.status_label.setPixmap(self.recording_pixmap) selected_device_index = self.device_combo.currentIndex() self.audio_thread = threading.Thread(target=self.record_audio, args=(filename, selected_device_index)) self.audio_thread.start() def stop_recording(self): if self.is_recording: self.is_recording = False self.record_button.setText('Record') self.status_label.setPixmap(self.not_recording_pixmap) if self.audio_thread: self.audio_thread.join() if self.stream: self.stream.stop_stream() self.stream.close() if self.p: self.p.terminate() if self.wf: self.wf.close() def record_audio(self, filename, device_index): chunk_size = 1024 sampling_rate = 16000 num_channels = 1 self.p = pyaudio.PyAudio() file_path = f"{filename}.wav" self.wf = wave.open(file_path, 'wb') self.wf.setnchannels(num_channels) self.wf.setsampwidth(self.p.get_sample_size(pyaudio.paInt16)) self.wf.setframerate(sampling_rate) self.stream = self.p.open( format=pyaudio.paInt16, channels=num_channels, rate=sampling_rate, input=True, frames_per_buffer=chunk_size, input_device_index=device_index ) print(f"Recording to {file_path}. Press 'Stop Recording' to stop...") while self.is_recording: data = self.stream.read(chunk_size) self.wf.writeframes(data) print(f"Audio saved to {file_path}") def transcribe(self): # Run the transcription and summarization in the background threading.Thread(target=self.run_transcription_and_summarization).start() def whisper_api(self, file): """Transcribe audio file using Whisper API.""" files = {"file": file} api_data = { "temperature": "0.0", "response_format": "json" } response = requests.post(self.config["WHISPERCPP_URL"], data=api_data, files=files) return response.json()["text"] def llm_local(self, prompt): client = OpenAI(api_key="doesntmatter", base_url=self.config["LLAMACPP_URL"]) messages=[{"role": "system", "content": self.config["SYSTEM_MESSAGE"]},{"role": "user", "content": prompt}] response = client.chat.completions.create(model="whatever", max_tokens=self.config["MAX_TOKENS"], temperature=self.config["TEMPERATURE"], top_p=self.config["TOP_P"], messages=messages) return response.choices[0].message.content def trim_silence(self, filename): """Trim silence from audio file using FFmpeg.""" with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: temp_filename = temp_file.name ffmpeg_command = [ "ffmpeg", "-i", filename, "-af", "silenceremove=stop_threshold=-40dB:stop_duration=1:stop_periods=-1", "-y", # Overwrite output file if it exists temp_filename ] result = subprocess.run(ffmpeg_command, capture_output=True, text=True, check=True) os.replace(temp_filename, filename) def process_wav_files(self): """Process WAV files: trim silence and transcribe.""" wav_files = [f for f in os.listdir(".") if f.endswith(".wav")] for wav_file in wav_files: # Generate the expected transcript filename transcript_file = os.path.splitext(wav_file)[0] + ".tns" # Check if transcript already exists if os.path.exists(transcript_file): print(f"Transcript already exists for {wav_file}, skipping transcription") continue print("Trimming silence: " + wav_file) self.trim_silence(wav_file) with open(wav_file, "rb") as file: print("Transcribing: " + wav_file) output_text = self.whisper_api(file) output_file = os.path.splitext(wav_file)[0] + ".tns" with open(output_file, "w") as output: output.write(output_text) def chunk_transcript(self, string, chunk_size): """Chunk the transcript to fit in the context window.""" chunks = [] lines = string.split("\n") current_chunk = "" for line in lines: current_chunk += line if len(current_chunk) >= chunk_size: chunks.append(current_chunk) current_chunk = "" if current_chunk: chunks.append(current_chunk) return chunks def summarize_transcripts(self): """Summarize transcript files.""" today = datetime.datetime.now().strftime('%Y%m%d') summary_filename = "summary-" + today + ".md" transcript_files = [f for f in os.listdir(".") if f.endswith(".tns")] for transcript in transcript_files: print("Summarizing: " + transcript) with open(transcript, "r") as file: transcript_data = file.read() chunked_data = self.chunk_transcript(transcript_data, self.config["CHUNK_SIZE"]) with open(summary_filename, "a") as md_file: for i, chunk in enumerate(chunked_data): print("Processing part " + str(i)) summary = self.llm_local(self.config["SUMMARY_PROMPT"].format(chunk=chunk)) facts = self.llm_local(self.config["FACT_PROMPT"].format(chunk=chunk)) sentiment = self.llm_local(self.config["SENTIMENT_PROMPT"].format(chunk=chunk)) md_file.write(f"# Call Transcript - {transcript} - Part {i + 1}\n\nSummary: {summary}\n\nFacts:\n{facts}\n\nSentiment: {sentiment}\n\n---\n") print("Summarizing complete") def run_transcription_and_summarization(self): self.process_wav_files() self.summarize_transcripts() def clean(self): print("Cleaning files...") try: for file in os.listdir('.'): if file.endswith(('.wav', '.tns')): os.remove(file) print(f"Deleted: {file}") print("Cleaning complete.") except Exception as e: print(f"An error occurred while cleaning files: {e}") def load_config(self): """Load configuration from config.json if it exists.""" try: if os.path.exists("config.json"): with open("config.json", "r") as f: saved_config = json.load(f) self.config.update(saved_config) except Exception as e: print(f"Error loading config: {e}") def save_config(self): """Save configuration to config.json.""" try: with open("config.json", "w") as f: json.dump(self.config, f, indent=4) except Exception as e: print(f"Error saving config: {e}") def show_settings(self): """Show configuration dialog.""" dialog = ConfigDialog(self.config, self) dialog.resize(600, 400) # Make dialog larger if dialog.exec_(): self.config.update(dialog.get_values()) self.save_config() def closeEvent(self, event): self.stop_recording() event.accept() class ConfigDialog(QDialog): def __init__(self, config, parent=None): super().__init__(parent) self.setWindowTitle("Settings") self.setModal(True) layout = QFormLayout() layout.setSpacing(15) # Add more spacing between rows self.whisper_url = QLineEdit(config["WHISPERCPP_URL"]) self.llama_url = QLineEdit(config["LLAMACPP_URL"]) self.system_msg = QLineEdit(config["SYSTEM_MESSAGE"]) self.summary_prompt = QLineEdit(config["SUMMARY_PROMPT"]) self.fact_prompt = QLineEdit(config["FACT_PROMPT"]) self.sentiment_prompt = QLineEdit(config["SENTIMENT_PROMPT"]) # Make all line edit fields taller for line_edit in [self.whisper_url, self.llama_url, self.system_msg, self.summary_prompt, self.fact_prompt, self.sentiment_prompt]: line_edit.setMinimumHeight(30) self.chunk_size = QSpinBox() self.chunk_size.setRange(1000, 32000) self.chunk_size.setValue(config["CHUNK_SIZE"]) self.temperature = QDoubleSpinBox() self.temperature.setRange(0.1, 1.0) self.temperature.setSingleStep(0.1) self.temperature.setValue(config["TEMPERATURE"]) self.top_p = QDoubleSpinBox() self.top_p.setRange(0.1, 1.0) self.top_p.setSingleStep(0.1) self.top_p.setValue(config["TOP_P"]) self.max_tokens = QSpinBox() self.max_tokens.setRange(512, 4096) self.max_tokens.setValue(config["MAX_TOKENS"]) layout.addRow("Whisper URL:", self.whisper_url) layout.addRow("LLaMA URL:", self.llama_url) layout.addRow("System Message:", self.system_msg) layout.addRow("Summary Prompt:", self.summary_prompt) layout.addRow("Fact Prompt:", self.fact_prompt) layout.addRow("Sentiment Prompt:", self.sentiment_prompt) layout.addRow("Chunk Size:", self.chunk_size) layout.addRow("Temperature:", self.temperature) layout.addRow("Top P:", self.top_p) layout.addRow("Max Tokens:", self.max_tokens) buttons = QHBoxLayout() ok_button = QPushButton("OK") ok_button.clicked.connect(self.accept) cancel_button = QPushButton("Cancel") cancel_button.clicked.connect(self.reject) buttons.addWidget(ok_button) buttons.addWidget(cancel_button) layout.addRow(buttons) self.setLayout(layout) def get_values(self): return { "WHISPERCPP_URL": self.whisper_url.text(), "LLAMACPP_URL": self.llama_url.text(), "SYSTEM_MESSAGE": self.system_msg.text(), "SUMMARY_PROMPT": self.summary_prompt.text(), "FACT_PROMPT": self.fact_prompt.text(), "SENTIMENT_PROMPT": self.sentiment_prompt.text(), "CHUNK_SIZE": self.chunk_size.value(), "TEMPERATURE": self.temperature.value(), "TOP_P": self.top_p.value(), "MAX_TOKENS": self.max_tokens.value() } if __name__ == '__main__': app = QApplication(sys.argv) app.setWindowIcon(QIcon("headphones.png")) ex = RecordingApp() ex.show() sys.exit(app.exec_())