agent-zero/webui/test.html
frdel 22f1a2b744 speech recognition prototype
using xenova web only tts
2024-11-10 14:25:20 +01:00

82 lines
2.7 KiB
HTML

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<title>Agent Zero</title>
<script type="module">
import { pipeline, read_audio } from './transformers@3.0.2.js';
let transcriber;
let mediaRecorder;
let audioChunks = [];
let isRecording = false;
// Initialize the transcriber
async function initTranscriber() {
transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
}
// Toggle recording state
async function toggleRecording() {
if (isRecording) {
stopRecording();
} else {
startRecording();
}
}
// Start recording from the microphone
async function startRecording() {
isRecording = true;
audioChunks = [];
document.getElementById("micButton").innerText = "Stop Recording";
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder = new MediaRecorder(stream);
mediaRecorder.ondataavailable = event => {
audioChunks.push(event.data);
};
mediaRecorder.onstop = async () => {
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
const audioUrl = URL.createObjectURL(audioBlob);
// Use read_audio to fetch and process the audio from the URL
const samplingRate = 16000; // Adjust as needed for the model
const audioData = await read_audio(audioUrl, samplingRate);
// Transcribe the audio
const output = await transcriber(audioData);
document.getElementById("transcript").innerText = output.text;
// Release the object URL after use
URL.revokeObjectURL(audioUrl);
};
mediaRecorder.start();
}
// Stop recording
function stopRecording() {
isRecording = false;
document.getElementById("micButton").innerText = "Start Recording";
mediaRecorder.stop();
}
// Expose functions to the global scope
window.toggleRecording = toggleRecording;
// Initialize the transcriber on page load
window.onload = initTranscriber;
</script>
</head>
<body>
<h1>Agent Zero Speech Transcription</h1>
<button id="micButton" onclick="toggleRecording()">Start Recording</button>
<p id="transcript">Transcript will appear here...</p>
</body>
</html>