Saiyaswanth007's picture
Code splitting
97a4ae5
raw
history blame
6.7 kB
import gradio as gr
from fastapi import FastAPI
from shared import DEFAULT_CHANGE_THRESHOLD, DEFAULT_MAX_SPEAKERS, ABSOLUTE_MAX_SPEAKERS
# Replace with your actual space URL when deployed
API_WS = "wss://your-space.hf.space/ws_inference"
def build_ui():
"""Build Gradio UI for speaker diarization"""
with gr.Blocks(title="Real-time Speaker Diarization", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🎀 Live Speaker Diarization")
gr.Markdown("Real-time speech recognition with automatic speaker identification")
with gr.Row():
with gr.Column(scale=2):
# Conversation display
output = gr.HTML(
value="<div style='padding: 20px; background: #f8f9fa; border-radius: 10px; min-height: 300px;'><i>Click 'Start' to begin listening...</i></div>",
label="Live Conversation",
elem_classes=["output"]
)
# Control buttons
with gr.Row():
start_btn = gr.Button("▢️ Start Listening", variant="primary", size="lg")
stop_btn = gr.Button("⏹️ Stop", variant="stop", size="lg")
clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary", size="lg")
# Status display
status_output = gr.Textbox(
label="System Status",
value="Click 'Start Listening' to begin...",
lines=8,
interactive=False
)
with gr.Column(scale=1):
# Settings
gr.Markdown("## βš™οΈ Settings")
threshold_slider = gr.Slider(
minimum=0.3,
maximum=0.9,
step=0.05,
value=DEFAULT_CHANGE_THRESHOLD,
label="Speaker Change Sensitivity",
info="Lower = more sensitive"
)
max_speakers_slider = gr.Slider(
minimum=2,
maximum=ABSOLUTE_MAX_SPEAKERS,
step=1,
value=DEFAULT_MAX_SPEAKERS,
label="Maximum Speakers"
)
update_btn = gr.Button("Update Settings", variant="secondary")
# Instructions
gr.Markdown("""
## πŸ“‹ Instructions
1. **Start Listening** - allows browser to access microphone
2. **Speak** - system will recognize different speakers
3. **Stop** when finished
## 🎨 Speaker Colors
- πŸ”΄ Speaker 1 (Red)
- 🟒 Speaker 2 (Teal)
- πŸ”΅ Speaker 3 (Blue)
- 🟑 Speaker 4 (Green)
""")
# JavaScript for browser audio capture and WebRTC
js = f"""
<script>
let ws, recorder, mediaStream;
async function startStream() {{
try {{
// Get user media
mediaStream = await navigator.mediaDevices.getUserMedia({{audio: true}});
// Connect to WebSocket
ws = new WebSocket('{API_WS}');
ws.onopen = () => {{
console.log('WebSocket connected');
document.getElementById('status').textContent = 'Connected to server';
// Create MediaRecorder
recorder = new MediaRecorder(mediaStream, {{mimeType: 'audio/webm'}});
// Send audio chunks to WebSocket
recorder.ondataavailable = (e) => {{
if (ws && ws.readyState === 1 && e.data.size > 0) {{
ws.send(e.data);
}}
}};
// Start recording
recorder.start(200); // Send chunks every 200ms
}};
ws.onmessage = (evt) => {{
// Update the HTML output with conversation
document.querySelector('.output').innerHTML = evt.data;
}};
ws.onerror = (error) => {{
console.error('WebSocket error:', error);
document.getElementById('status').textContent = 'WebSocket error';
}};
ws.onclose = () => {{
console.log('WebSocket closed');
document.getElementById('status').textContent = 'Disconnected';
stopStream();
}};
}} catch (error) {{
console.error('Error starting stream:', error);
document.getElementById('status').textContent = `Error: ${{error.message}}`;
}}
}}
function stopStream() {{
// Stop the MediaRecorder
if (recorder && recorder.state !== 'inactive') {{
recorder.stop();
}}
// Stop all audio tracks
if (mediaStream) {{
mediaStream.getTracks().forEach(track => track.stop());
}}
// Close the WebSocket
if (ws) {{
ws.close();
}}
document.getElementById('status').textContent = 'Stopped';
}}
// Attach event listeners to buttons
document.addEventListener('DOMContentLoaded', function() {{
// Add hidden status element
const statusElem = document.createElement('div');
statusElem.id = 'status';
statusElem.style.display = 'none';
document.body.appendChild(statusElem);
}});
// Connect buttons to functions
document.querySelector('button[aria-label="Start Listening"]').onclick = startStream;
document.querySelector('button[aria-label="Stop"]').onclick = stopStream;
</script>
"""
# Inject JavaScript
demo.load(js, outputs=[output, status_output])
return demo
# Create Gradio interface
demo = build_ui()
def mount_ui(app: FastAPI):
"""Mount Gradio app to FastAPI"""
app.mount("/", demo.app)
# For standalone testing
if __name__ == "__main__":
demo.launch()