Spaces:
Running
Running
import gradio as gr | |
from fastapi import FastAPI | |
from shared import DEFAULT_CHANGE_THRESHOLD, DEFAULT_MAX_SPEAKERS, ABSOLUTE_MAX_SPEAKERS | |
# Replace with your actual space URL when deployed | |
API_WS = "wss://your-space.hf.space/ws_inference" | |
def build_ui(): | |
"""Build Gradio UI for speaker diarization""" | |
with gr.Blocks(title="Real-time Speaker Diarization", theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# π€ Live Speaker Diarization") | |
gr.Markdown("Real-time speech recognition with automatic speaker identification") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
# Conversation display | |
output = gr.HTML( | |
value="<div style='padding: 20px; background: #f8f9fa; border-radius: 10px; min-height: 300px;'><i>Click 'Start' to begin listening...</i></div>", | |
label="Live Conversation", | |
elem_classes=["output"] | |
) | |
# Control buttons | |
with gr.Row(): | |
start_btn = gr.Button("βΆοΈ Start Listening", variant="primary", size="lg") | |
stop_btn = gr.Button("βΉοΈ Stop", variant="stop", size="lg") | |
clear_btn = gr.Button("ποΈ Clear", variant="secondary", size="lg") | |
# Status display | |
status_output = gr.Textbox( | |
label="System Status", | |
value="Click 'Start Listening' to begin...", | |
lines=8, | |
interactive=False | |
) | |
with gr.Column(scale=1): | |
# Settings | |
gr.Markdown("## βοΈ Settings") | |
threshold_slider = gr.Slider( | |
minimum=0.3, | |
maximum=0.9, | |
step=0.05, | |
value=DEFAULT_CHANGE_THRESHOLD, | |
label="Speaker Change Sensitivity", | |
info="Lower = more sensitive" | |
) | |
max_speakers_slider = gr.Slider( | |
minimum=2, | |
maximum=ABSOLUTE_MAX_SPEAKERS, | |
step=1, | |
value=DEFAULT_MAX_SPEAKERS, | |
label="Maximum Speakers" | |
) | |
update_btn = gr.Button("Update Settings", variant="secondary") | |
# Instructions | |
gr.Markdown(""" | |
## π Instructions | |
1. **Start Listening** - allows browser to access microphone | |
2. **Speak** - system will recognize different speakers | |
3. **Stop** when finished | |
## π¨ Speaker Colors | |
- π΄ Speaker 1 (Red) | |
- π’ Speaker 2 (Teal) | |
- π΅ Speaker 3 (Blue) | |
- π‘ Speaker 4 (Green) | |
""") | |
# JavaScript for browser audio capture and WebRTC | |
js = f""" | |
<script> | |
let ws, recorder, mediaStream; | |
async function startStream() {{ | |
try {{ | |
// Get user media | |
mediaStream = await navigator.mediaDevices.getUserMedia({{audio: true}}); | |
// Connect to WebSocket | |
ws = new WebSocket('{API_WS}'); | |
ws.onopen = () => {{ | |
console.log('WebSocket connected'); | |
document.getElementById('status').textContent = 'Connected to server'; | |
// Create MediaRecorder | |
recorder = new MediaRecorder(mediaStream, {{mimeType: 'audio/webm'}}); | |
// Send audio chunks to WebSocket | |
recorder.ondataavailable = (e) => {{ | |
if (ws && ws.readyState === 1 && e.data.size > 0) {{ | |
ws.send(e.data); | |
}} | |
}}; | |
// Start recording | |
recorder.start(200); // Send chunks every 200ms | |
}}; | |
ws.onmessage = (evt) => {{ | |
// Update the HTML output with conversation | |
document.querySelector('.output').innerHTML = evt.data; | |
}}; | |
ws.onerror = (error) => {{ | |
console.error('WebSocket error:', error); | |
document.getElementById('status').textContent = 'WebSocket error'; | |
}}; | |
ws.onclose = () => {{ | |
console.log('WebSocket closed'); | |
document.getElementById('status').textContent = 'Disconnected'; | |
stopStream(); | |
}}; | |
}} catch (error) {{ | |
console.error('Error starting stream:', error); | |
document.getElementById('status').textContent = `Error: ${{error.message}}`; | |
}} | |
}} | |
function stopStream() {{ | |
// Stop the MediaRecorder | |
if (recorder && recorder.state !== 'inactive') {{ | |
recorder.stop(); | |
}} | |
// Stop all audio tracks | |
if (mediaStream) {{ | |
mediaStream.getTracks().forEach(track => track.stop()); | |
}} | |
// Close the WebSocket | |
if (ws) {{ | |
ws.close(); | |
}} | |
document.getElementById('status').textContent = 'Stopped'; | |
}} | |
// Attach event listeners to buttons | |
document.addEventListener('DOMContentLoaded', function() {{ | |
// Add hidden status element | |
const statusElem = document.createElement('div'); | |
statusElem.id = 'status'; | |
statusElem.style.display = 'none'; | |
document.body.appendChild(statusElem); | |
}}); | |
// Connect buttons to functions | |
document.querySelector('button[aria-label="Start Listening"]').onclick = startStream; | |
document.querySelector('button[aria-label="Stop"]').onclick = stopStream; | |
</script> | |
""" | |
# Inject JavaScript | |
demo.load(js, outputs=[output, status_output]) | |
return demo | |
# Create Gradio interface | |
demo = build_ui() | |
def mount_ui(app: FastAPI): | |
"""Mount Gradio app to FastAPI""" | |
app.mount("/", demo.app) | |
# For standalone testing | |
if __name__ == "__main__": | |
demo.launch() |