Spaces:

AndroidGuy
/

Speaker-Diarization

Sleeping

App Files Files Community

Saiyaswanth007 commited on May 30

Commit

54ccef4

1 Parent(s): e1de00e

Removed complex code

Browse files

Files changed (1) hide show

ui.py +45 -18

ui.py CHANGED Viewed

@@ -9,12 +9,12 @@ import os
 from datetime import datetime
 import httpx
 import websockets
-# Configuration - use environment variables for deployment
 class Config:
     def __init__(self):
-        self.hf_space_url = os.getenv("HF_SPACE_URL", "https://your-space.hf.space")
-        self.render_url = os.getenv("RENDER_URL", "https://your-app.onrender.com")
         self.default_threshold = float(os.getenv("DEFAULT_THRESHOLD", "0.7"))
         self.default_max_speakers = int(os.getenv("DEFAULT_MAX_SPEAKERS", "4"))
         self.max_speakers_limit = int(os.getenv("MAX_SPEAKERS_LIMIT", "8"))
@@ -74,8 +74,9 @@ def create_gradio_app():
                 this.mediaStream = null;
                 this.mediaRecorder = null;
                 this.isRecording = false;
-                this.baseUrl = '{config.hf_space_url}';
-                this.wsUrl = this.baseUrl.replace('https://', 'wss://').replace('http://', 'ws://') + '/ws';
             }}
             async startRecording() {{
@@ -301,28 +302,53 @@ def create_gradio_app():
                 </div>
                 """)
                 # Control buttons
                 with gr.Row():
-                    gr.Button(
                         "▶️ Start Listening",
                         variant="primary",
                         size="lg",
                         elem_id="start-btn"
-                    ).click(fn=None, js="startListening()")
-                    gr.Button(
                         "⏹️ Stop",
                         variant="stop",
                         size="lg",
                         elem_id="stop-btn"
-                    ).click(fn=None, js="stopListening()")
-                    gr.Button(
                         "🗑️ Clear",
                         variant="secondary",
                         size="lg",
                         elem_id="clear-btn"
-                    ).click(fn=None, js="clearConversation()")
             with gr.Column(scale=1):
                 gr.Markdown("## ⚙️ Settings")
@@ -418,21 +444,22 @@ def create_fastapi_app():
 async def process_audio_chunk(audio_data: bytes) -> dict:
     """
-    Process audio chunk and return diarization result by sending it to the Speaker Diarization backend
     """
     try:
-        # Convert WebM audio to appropriate format if needed
-        # This step may require additional processing depending on your backend requirements
         # Connect to the Speaker Diarization backend via WebSocket
-        websocket_url = f"wss://{config.hf_space_url.replace('https://', '').replace('http://', '')}/ws_inference"
-        logger.info(f"Connecting to diarization backend at {websocket_url}")
         async with websockets.connect(websocket_url) as websocket:
             # Send audio data
             await websocket.send(audio_data)
-            # Receive the response (may need to handle multiple messages)
             response = await websocket.recv()
             # Parse the response

 from datetime import datetime
 import httpx
 import websockets
+from fastrtc import RTCComponent
 class Config:
     def __init__(self):
+        self.hf_space_url = os.getenv("HF_SPACE_URL", "androidguy-speaker-diarization.hf.space")
+        self.render_url = os.getenv("RENDER_URL", "render-signal-audio.onrender.com")
         self.default_threshold = float(os.getenv("DEFAULT_THRESHOLD", "0.7"))
         self.default_max_speakers = int(os.getenv("DEFAULT_MAX_SPEAKERS", "4"))
         self.max_speakers_limit = int(os.getenv("MAX_SPEAKERS_LIMIT", "8"))
                 this.mediaStream = null;
                 this.mediaRecorder = null;
                 this.isRecording = false;
+                this.baseUrl = 'https://{config.hf_space_url}';
+                this.wsUrl = 'wss://{config.hf_space_url}/ws';
+                this.renderUrl = 'wss://{config.render_url}/stream';
             }}
             async startRecording() {{
                 </div>
                 """)
+                # WebRTC component (hidden, but functional)
+                webrtc = RTCComponent(
+                    url=f"wss://{config.render_url}/stream",
+                    streaming=False,
+                    modality="audio",
+                    mode="send-receive",
+                    visible=False  # Hidden but functional
+                )
                 # Control buttons
                 with gr.Row():
+                    start_btn = gr.Button(
                         "▶️ Start Listening",
                         variant="primary",
                         size="lg",
                         elem_id="start-btn"
+                    )
+                    stop_btn = gr.Button(
                         "⏹️ Stop",
                         variant="stop",
                         size="lg",
                         elem_id="stop-btn"
+                    )
+                    clear_btn = gr.Button(
                         "🗑️ Clear",
                         variant="secondary",
                         size="lg",
                         elem_id="clear-btn"
+                    )
+                    # WebRTC control functions
+                    def start_webrtc():
+                        return {
+                            webrtc: gr.update(streaming=True)
+                        }
+                    def stop_webrtc():
+                        return {
+                            webrtc: gr.update(streaming=False)
+                        }
+                    # Connect buttons to both WebRTC and JavaScript functions
+                    start_btn.click(fn=start_webrtc, outputs=[webrtc], js="startListening()")
+                    stop_btn.click(fn=stop_webrtc, outputs=[webrtc], js="stopListening()")
+                    clear_btn.click(fn=None, js="clearConversation()")
             with gr.Column(scale=1):
                 gr.Markdown("## ⚙️ Settings")
 async def process_audio_chunk(audio_data: bytes) -> dict:
     """
+    Process audio chunk by forwarding to the backend.
+    This function is only used for the direct WebSocket API, not for the WebRTC component.
+    Note: In production, you should primarily use the WebRTC component which has its own
+    audio processing flow through the Render backend.
     """
     try:
         # Connect to the Speaker Diarization backend via WebSocket
+        websocket_url = f"wss://{config.hf_space_url}/ws_inference"
+        logger.info(f"Forwarding audio to diarization backend at {websocket_url}")
         async with websockets.connect(websocket_url) as websocket:
             # Send audio data
             await websocket.send(audio_data)
+            # Receive the response
             response = await websocket.recv()
             # Parse the response