Spaces:

siyah1
/

previsit

Sleeping

App Files Files Community

siyah1 commited on 26 days ago

Commit

35bedd7

verified ·

1 Parent(s): dbc2611

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -13

app.py CHANGED Viewed

@@ -140,10 +140,25 @@ class GeminiHandler(StreamHandler):
     def _initialize_websocket(self):
         assert self.config, "Config not set"
         try:
-            self.ws = websockets.sync.client.connect(self.config.ws_url, timeout=30)
             initial_request = {
                 "setup": {
                     "model": self.config.model,
                     "systemInstruction": {
                         "parts": [
                             {
@@ -171,10 +186,12 @@ Start by introducing yourself and asking how you can help them prepare for their
                     }
                 }
             }
             self.ws.send(json.dumps(initial_request))
             setup_response = json.loads(self.ws.recv())
             print(f"Setup response: {setup_response}")
             self.conversation_tracker.start_session()
         except websockets.exceptions.WebSocketException as e:
             print(f"WebSocket connection failed: {str(e)}")
             self.ws = None
@@ -192,19 +209,21 @@ Start by introducing yourself and asking how you can help them prepare for their
     def receive(self, frame: tuple[int, np.ndarray]) -> None:
         if not self.channel:
             return
         if not self.config:
             # Get API key from environment variable
             api_key = os.getenv('GEMINI_API_KEY')
             if not api_key:
                 print("Error: GEMINI_API_KEY environment variable not set")
-                if self.channel:
-                    self.channel.send("API key not configured. Please set GEMINI_API_KEY environment variable.")
                 return
             self.config = GeminiConfig(api_key)
         try:
             if not self.ws:
                 self._initialize_websocket()
             if not self.ws:
@@ -212,15 +231,26 @@ Start by introducing yourself and asking how you can help them prepare for their
                 return
             _, array = frame
             array = array.squeeze()
             audio_message = self.audio_processor.encode_audio(
-                array, self.output_sample_rate
             )
             self.ws.send(json.dumps(audio_message))
         except Exception as e:
             print(f"Error in receive: {str(e)}")
             if self.ws:
-                self.ws.close()
             self.ws = None
     def _process_server_content(self, content):
@@ -251,22 +281,46 @@ Start by introducing yourself and asking how you can help them prepare for their
     def generator(self):
         while True:
             if not self.ws or not self.config:
-                print("WebSocket not connected")
                 yield None
                 continue
             try:
-                message = self.ws.recv(timeout=5)
                 msg = json.loads(message)
                 if "serverContent" in msg:
                     content = msg["serverContent"].get("modelTurn", {})
-                    yield from self._process_server_content(content)
             except TimeoutError:
-                print("Timeout waiting for server response")
                 yield None
             except Exception as e:
                 print(f"Error in generator: {str(e)}")
                 yield None
     def emit(self) -> tuple[int, np.ndarray] | None:
@@ -331,6 +385,13 @@ class PreconsultationApp:
                     except Exception as e:
                         print(f"Warning: Could not get Twilio TURN credentials: {e}")
                         print("Using basic WebRTC configuration")
                     webrtc = WebRTC(
                         label="Voice Consultation",
@@ -338,23 +399,31 @@ class PreconsultationApp:
                         mode="send-receive",
                         rtc_configuration=rtc_config,
                     )
                 with gr.Column(scale=1):
                     gr.HTML("""
                         <div style='background-color: #f0f9ff; padding: 15px; border-radius: 8px; margin-bottom: 15px'>
                             <h3 style='margin-top: 0'>How it works:</h3>
                             <ol style='margin-bottom: 0'>
                                 <li>Click "Start" to begin the voice consultation</li>
-                                <li>Speak naturally with the AI agent</li>
                                 <li>Share your health concerns and questions</li>
-                                <li>End the session when ready</li>
                                 <li>Get a summary report for your healthcare provider</li>
                             </ol>
                         </div>
                         <div style='background-color: #fef3c7; padding: 10px; border-radius: 6px; margin-bottom: 15px'>
                             <p style='margin: 0; font-size: 12px; color: #92400e'>
-                                <strong>Note:</strong> If you see "Too many users", please wait a moment and try again.
-                                Multiple users can use the system simultaneously.
                             </p>
                         </div>
                     """)

     def _initialize_websocket(self):
         assert self.config, "Config not set"
         try:
+            print(f"Connecting to WebSocket: {self.config.ws_url}")
+            self.ws = websockets.sync.client.connect(
+                self.config.ws_url,
+                timeout=10,
+                additional_headers={"User-Agent": "PreconsultationAgent/1.0"}
+            )
             initial_request = {
                 "setup": {
                     "model": self.config.model,
+                    "generationConfig": {
+                        "responseModalities": ["AUDIO"],
+                        "speechConfig": {
+                            "voiceConfig": {
+                                "prebuiltVoiceConfig": {
+                                    "voiceName": "Puck"
+                                }
+                            }
+                        }
+                    },
                     "systemInstruction": {
                         "parts": [
                             {
                     }
                 }
             }
+            print("Sending setup request...")
             self.ws.send(json.dumps(initial_request))
             setup_response = json.loads(self.ws.recv())
             print(f"Setup response: {setup_response}")
             self.conversation_tracker.start_session()
+            print("WebSocket connection established successfully")
         except websockets.exceptions.WebSocketException as e:
             print(f"WebSocket connection failed: {str(e)}")
             self.ws = None
     def receive(self, frame: tuple[int, np.ndarray]) -> None:
         if not self.channel:
+            print("No channel available")
             return
         if not self.config:
             # Get API key from environment variable
             api_key = os.getenv('GEMINI_API_KEY')
             if not api_key:
                 print("Error: GEMINI_API_KEY environment variable not set")
                 return
+            print(f"Initializing with API key: {api_key[:10]}...")
             self.config = GeminiConfig(api_key)
         try:
             if not self.ws:
+                print("Initializing WebSocket connection...")
                 self._initialize_websocket()
             if not self.ws:
                 return
             _, array = frame
+            if array is None or array.size == 0:
+                return
             array = array.squeeze()
+            if array.size == 0:
+                return
             audio_message = self.audio_processor.encode_audio(
+                array, self.input_sample_rate
             )
             self.ws.send(json.dumps(audio_message))
+            print(f"Sent audio chunk of size: {array.size}")
         except Exception as e:
             print(f"Error in receive: {str(e)}")
             if self.ws:
+                try:
+                    self.ws.close()
+                except:
+                    pass
             self.ws = None
     def _process_server_content(self, content):
     def generator(self):
         while True:
             if not self.ws or not self.config:
+                print("WebSocket not connected, yielding None")
                 yield None
                 continue
             try:
+                # Check if WebSocket is still open
+                if self.ws.closed:
+                    print("WebSocket closed, reconnecting...")
+                    self._initialize_websocket()
+                    if not self.ws:
+                        yield None
+                        continue
+                message = self.ws.recv(timeout=2)
+                if not message:
+                    yield None
+                    continue
                 msg = json.loads(message)
+                print(f"Received message type: {list(msg.keys())}")
                 if "serverContent" in msg:
                     content = msg["serverContent"].get("modelTurn", {})
+                    if content:
+                        yield from self._process_server_content(content)
+                    else:
+                        yield None
+                else:
+                    yield None
             except TimeoutError:
+                # This is normal, just continue
+                yield None
+            except websockets.exceptions.ConnectionClosed:
+                print("WebSocket connection closed")
+                self.ws = None
                 yield None
             except Exception as e:
                 print(f"Error in generator: {str(e)}")
+                self.ws = None
                 yield None
     def emit(self) -> tuple[int, np.ndarray] | None:
                     except Exception as e:
                         print(f"Warning: Could not get Twilio TURN credentials: {e}")
                         print("Using basic WebRTC configuration")
+                        # Provide a basic STUN server configuration
+                        rtc_config = {
+                            "iceServers": [
+                                {"urls": ["stun:stun.l.google.com:19302"]},
+                                {"urls": ["stun:stun1.l.google.com:19302"]},
+                            ]
+                        }
                     webrtc = WebRTC(
                         label="Voice Consultation",
                         mode="send-receive",
                         rtc_configuration=rtc_config,
                     )
+                    # Status indicator
+                    status_display = gr.HTML("""
+                        <div id="status" style='padding: 10px; margin-top: 10px; border-radius: 6px; background-color: #f3f4f6; text-align: center'>
+                            <span style='color: #6b7280'>Ready to start consultation</span>
+                        </div>
+                    """)
                 with gr.Column(scale=1):
                     gr.HTML("""
                         <div style='background-color: #f0f9ff; padding: 15px; border-radius: 8px; margin-bottom: 15px'>
                             <h3 style='margin-top: 0'>How it works:</h3>
                             <ol style='margin-bottom: 0'>
+                                <li><strong>Allow microphone access</strong> when prompted by your browser</li>
                                 <li>Click "Start" to begin the voice consultation</li>
+                                <li>Speak clearly and naturally with the AI agent</li>
                                 <li>Share your health concerns and questions</li>
+                                <li>Click "End Session" when finished</li>
                                 <li>Get a summary report for your healthcare provider</li>
                             </ol>
                         </div>
                         <div style='background-color: #fef3c7; padding: 10px; border-radius: 6px; margin-bottom: 15px'>
                             <p style='margin: 0; font-size: 12px; color: #92400e'>
+                                <strong>Troubleshooting:</strong> If stuck on "waiting", ensure microphone access is granted
+                                and try refreshing the page. Check console logs for technical details.
                             </p>
                         </div>
                     """)