Saiyaswanth007's picture
Code splitting
375c386
raw
history blame
5.16 kB
import gradio as gr
from fastapi import FastAPI
from shared import DEFAULT_CHANGE_THRESHOLD, DEFAULT_MAX_SPEAKERS, ABSOLUTE_MAX_SPEAKERS
# Replace with your actual space URL when deployed
API_WS = "wss://your-space.hf.space/ws_inference"
def build_ui():
"""Build Gradio UI for speaker diarization"""
with gr.Blocks(title="Real-time Speaker Diarization", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🎤 Live Speaker Diarization")
gr.Markdown("Real-time speech recognition with automatic speaker identification")
with gr.Row():
with gr.Column(scale=2):
# Conversation display with embedded JavaScript
output = gr.HTML(
"""
<div class='output' style='padding:20px; background:#f8f9fa; border-radius:10px; min-height:300px;'>
<i>Click 'Start Listening' to begin…</i>
</div>
<script>
const API_WS = 'wss://your-space.hf.space/ws_inference';
let ws, recorder, mediaStream;
async function startStream() {
try {
mediaStream = await navigator.mediaDevices.getUserMedia({audio:true});
ws = new WebSocket(API_WS);
ws.onopen = () => {
recorder = new MediaRecorder(mediaStream, {mimeType:'audio/webm'});
recorder.ondataavailable = e => {
if (ws.readyState===1 && e.data.size>0) ws.send(e.data);
};
recorder.start(200);
};
ws.onmessage = evt => {
document.querySelector('.output').innerHTML = evt.data;
};
ws.onerror = err => console.error('WebSocket error:', err);
ws.onclose = () => stopStream();
} catch (err) {
console.error('Error starting stream:', err);
alert(`Error: ${err.message}`);
}
}
function stopStream() {
recorder?.state!=='inactive' && recorder.stop();
mediaStream?.getTracks().forEach(t=>t.stop());
ws?.close();
}
document.addEventListener('DOMContentLoaded', () => {
document.querySelector('button[aria-label="Start Listening"]').onclick = startStream;
document.querySelector('button[aria-label="Stop"]').onclick = stopStream;
});
</script>
""",
label="Live Conversation"
)
# Control buttons
with gr.Row():
start_btn = gr.Button("▶️ Start Listening", variant="primary", size="lg")
stop_btn = gr.Button("⏹️ Stop", variant="stop", size="lg")
clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="lg")
# Status display
status_output = gr.Textbox(
label="System Status",
value="Click 'Start Listening' to begin...",
lines=8,
interactive=False
)
with gr.Column(scale=1):
# Settings
gr.Markdown("## ⚙️ Settings")
threshold_slider = gr.Slider(
minimum=0.3,
maximum=0.9,
step=0.05,
value=DEFAULT_CHANGE_THRESHOLD,
label="Speaker Change Sensitivity",
info="Lower = more sensitive"
)
max_speakers_slider = gr.Slider(
minimum=2,
maximum=ABSOLUTE_MAX_SPEAKERS,
step=1,
value=DEFAULT_MAX_SPEAKERS,
label="Maximum Speakers"
)
update_btn = gr.Button("Update Settings", variant="secondary")
# Instructions
gr.Markdown("""
## 📋 Instructions
1. **Start Listening** - allows browser to access microphone
2. **Speak** - system will recognize different speakers
3. **Stop** when finished
## 🎨 Speaker Colors
- 🔴 Speaker 1 (Red)
- 🟢 Speaker 2 (Teal)
- 🔵 Speaker 3 (Blue)
- 🟡 Speaker 4 (Green)
""")
return demo
# Create Gradio interface
demo = build_ui()
def mount_ui(app: FastAPI):
"""Mount Gradio app to FastAPI"""
app.mount("/", demo.app)
# For standalone testing
if __name__ == "__main__":
demo.launch()