siyah1 commited on
Commit
35bedd7
·
verified ·
1 Parent(s): dbc2611

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -13
app.py CHANGED
@@ -140,10 +140,25 @@ class GeminiHandler(StreamHandler):
140
  def _initialize_websocket(self):
141
  assert self.config, "Config not set"
142
  try:
143
- self.ws = websockets.sync.client.connect(self.config.ws_url, timeout=30)
 
 
 
 
 
144
  initial_request = {
145
  "setup": {
146
  "model": self.config.model,
 
 
 
 
 
 
 
 
 
 
147
  "systemInstruction": {
148
  "parts": [
149
  {
@@ -171,10 +186,12 @@ Start by introducing yourself and asking how you can help them prepare for their
171
  }
172
  }
173
  }
 
174
  self.ws.send(json.dumps(initial_request))
175
  setup_response = json.loads(self.ws.recv())
176
  print(f"Setup response: {setup_response}")
177
  self.conversation_tracker.start_session()
 
178
  except websockets.exceptions.WebSocketException as e:
179
  print(f"WebSocket connection failed: {str(e)}")
180
  self.ws = None
@@ -192,19 +209,21 @@ Start by introducing yourself and asking how you can help them prepare for their
192
 
193
  def receive(self, frame: tuple[int, np.ndarray]) -> None:
194
  if not self.channel:
 
195
  return
 
196
  if not self.config:
197
  # Get API key from environment variable
198
  api_key = os.getenv('GEMINI_API_KEY')
199
  if not api_key:
200
  print("Error: GEMINI_API_KEY environment variable not set")
201
- if self.channel:
202
- self.channel.send("API key not configured. Please set GEMINI_API_KEY environment variable.")
203
  return
 
204
  self.config = GeminiConfig(api_key)
205
 
206
  try:
207
  if not self.ws:
 
208
  self._initialize_websocket()
209
 
210
  if not self.ws:
@@ -212,15 +231,26 @@ Start by introducing yourself and asking how you can help them prepare for their
212
  return
213
 
214
  _, array = frame
 
 
 
215
  array = array.squeeze()
 
 
 
216
  audio_message = self.audio_processor.encode_audio(
217
- array, self.output_sample_rate
218
  )
219
  self.ws.send(json.dumps(audio_message))
 
 
220
  except Exception as e:
221
  print(f"Error in receive: {str(e)}")
222
  if self.ws:
223
- self.ws.close()
 
 
 
224
  self.ws = None
225
 
226
  def _process_server_content(self, content):
@@ -251,22 +281,46 @@ Start by introducing yourself and asking how you can help them prepare for their
251
  def generator(self):
252
  while True:
253
  if not self.ws or not self.config:
254
- print("WebSocket not connected")
255
  yield None
256
  continue
257
 
258
  try:
259
- message = self.ws.recv(timeout=5)
 
 
 
 
 
 
 
 
 
 
 
 
260
  msg = json.loads(message)
 
261
 
262
  if "serverContent" in msg:
263
  content = msg["serverContent"].get("modelTurn", {})
264
- yield from self._process_server_content(content)
 
 
 
 
 
 
265
  except TimeoutError:
266
- print("Timeout waiting for server response")
 
 
 
 
267
  yield None
268
  except Exception as e:
269
  print(f"Error in generator: {str(e)}")
 
270
  yield None
271
 
272
  def emit(self) -> tuple[int, np.ndarray] | None:
@@ -331,6 +385,13 @@ class PreconsultationApp:
331
  except Exception as e:
332
  print(f"Warning: Could not get Twilio TURN credentials: {e}")
333
  print("Using basic WebRTC configuration")
 
 
 
 
 
 
 
334
 
335
  webrtc = WebRTC(
336
  label="Voice Consultation",
@@ -338,23 +399,31 @@ class PreconsultationApp:
338
  mode="send-receive",
339
  rtc_configuration=rtc_config,
340
  )
 
 
 
 
 
 
 
341
 
342
  with gr.Column(scale=1):
343
  gr.HTML("""
344
  <div style='background-color: #f0f9ff; padding: 15px; border-radius: 8px; margin-bottom: 15px'>
345
  <h3 style='margin-top: 0'>How it works:</h3>
346
  <ol style='margin-bottom: 0'>
 
347
  <li>Click "Start" to begin the voice consultation</li>
348
- <li>Speak naturally with the AI agent</li>
349
  <li>Share your health concerns and questions</li>
350
- <li>End the session when ready</li>
351
  <li>Get a summary report for your healthcare provider</li>
352
  </ol>
353
  </div>
354
  <div style='background-color: #fef3c7; padding: 10px; border-radius: 6px; margin-bottom: 15px'>
355
  <p style='margin: 0; font-size: 12px; color: #92400e'>
356
- <strong>Note:</strong> If you see "Too many users", please wait a moment and try again.
357
- Multiple users can use the system simultaneously.
358
  </p>
359
  </div>
360
  """)
 
140
  def _initialize_websocket(self):
141
  assert self.config, "Config not set"
142
  try:
143
+ print(f"Connecting to WebSocket: {self.config.ws_url}")
144
+ self.ws = websockets.sync.client.connect(
145
+ self.config.ws_url,
146
+ timeout=10,
147
+ additional_headers={"User-Agent": "PreconsultationAgent/1.0"}
148
+ )
149
  initial_request = {
150
  "setup": {
151
  "model": self.config.model,
152
+ "generationConfig": {
153
+ "responseModalities": ["AUDIO"],
154
+ "speechConfig": {
155
+ "voiceConfig": {
156
+ "prebuiltVoiceConfig": {
157
+ "voiceName": "Puck"
158
+ }
159
+ }
160
+ }
161
+ },
162
  "systemInstruction": {
163
  "parts": [
164
  {
 
186
  }
187
  }
188
  }
189
+ print("Sending setup request...")
190
  self.ws.send(json.dumps(initial_request))
191
  setup_response = json.loads(self.ws.recv())
192
  print(f"Setup response: {setup_response}")
193
  self.conversation_tracker.start_session()
194
+ print("WebSocket connection established successfully")
195
  except websockets.exceptions.WebSocketException as e:
196
  print(f"WebSocket connection failed: {str(e)}")
197
  self.ws = None
 
209
 
210
  def receive(self, frame: tuple[int, np.ndarray]) -> None:
211
  if not self.channel:
212
+ print("No channel available")
213
  return
214
+
215
  if not self.config:
216
  # Get API key from environment variable
217
  api_key = os.getenv('GEMINI_API_KEY')
218
  if not api_key:
219
  print("Error: GEMINI_API_KEY environment variable not set")
 
 
220
  return
221
+ print(f"Initializing with API key: {api_key[:10]}...")
222
  self.config = GeminiConfig(api_key)
223
 
224
  try:
225
  if not self.ws:
226
+ print("Initializing WebSocket connection...")
227
  self._initialize_websocket()
228
 
229
  if not self.ws:
 
231
  return
232
 
233
  _, array = frame
234
+ if array is None or array.size == 0:
235
+ return
236
+
237
  array = array.squeeze()
238
+ if array.size == 0:
239
+ return
240
+
241
  audio_message = self.audio_processor.encode_audio(
242
+ array, self.input_sample_rate
243
  )
244
  self.ws.send(json.dumps(audio_message))
245
+ print(f"Sent audio chunk of size: {array.size}")
246
+
247
  except Exception as e:
248
  print(f"Error in receive: {str(e)}")
249
  if self.ws:
250
+ try:
251
+ self.ws.close()
252
+ except:
253
+ pass
254
  self.ws = None
255
 
256
  def _process_server_content(self, content):
 
281
  def generator(self):
282
  while True:
283
  if not self.ws or not self.config:
284
+ print("WebSocket not connected, yielding None")
285
  yield None
286
  continue
287
 
288
  try:
289
+ # Check if WebSocket is still open
290
+ if self.ws.closed:
291
+ print("WebSocket closed, reconnecting...")
292
+ self._initialize_websocket()
293
+ if not self.ws:
294
+ yield None
295
+ continue
296
+
297
+ message = self.ws.recv(timeout=2)
298
+ if not message:
299
+ yield None
300
+ continue
301
+
302
  msg = json.loads(message)
303
+ print(f"Received message type: {list(msg.keys())}")
304
 
305
  if "serverContent" in msg:
306
  content = msg["serverContent"].get("modelTurn", {})
307
+ if content:
308
+ yield from self._process_server_content(content)
309
+ else:
310
+ yield None
311
+ else:
312
+ yield None
313
+
314
  except TimeoutError:
315
+ # This is normal, just continue
316
+ yield None
317
+ except websockets.exceptions.ConnectionClosed:
318
+ print("WebSocket connection closed")
319
+ self.ws = None
320
  yield None
321
  except Exception as e:
322
  print(f"Error in generator: {str(e)}")
323
+ self.ws = None
324
  yield None
325
 
326
  def emit(self) -> tuple[int, np.ndarray] | None:
 
385
  except Exception as e:
386
  print(f"Warning: Could not get Twilio TURN credentials: {e}")
387
  print("Using basic WebRTC configuration")
388
+ # Provide a basic STUN server configuration
389
+ rtc_config = {
390
+ "iceServers": [
391
+ {"urls": ["stun:stun.l.google.com:19302"]},
392
+ {"urls": ["stun:stun1.l.google.com:19302"]},
393
+ ]
394
+ }
395
 
396
  webrtc = WebRTC(
397
  label="Voice Consultation",
 
399
  mode="send-receive",
400
  rtc_configuration=rtc_config,
401
  )
402
+
403
+ # Status indicator
404
+ status_display = gr.HTML("""
405
+ <div id="status" style='padding: 10px; margin-top: 10px; border-radius: 6px; background-color: #f3f4f6; text-align: center'>
406
+ <span style='color: #6b7280'>Ready to start consultation</span>
407
+ </div>
408
+ """)
409
 
410
  with gr.Column(scale=1):
411
  gr.HTML("""
412
  <div style='background-color: #f0f9ff; padding: 15px; border-radius: 8px; margin-bottom: 15px'>
413
  <h3 style='margin-top: 0'>How it works:</h3>
414
  <ol style='margin-bottom: 0'>
415
+ <li><strong>Allow microphone access</strong> when prompted by your browser</li>
416
  <li>Click "Start" to begin the voice consultation</li>
417
+ <li>Speak clearly and naturally with the AI agent</li>
418
  <li>Share your health concerns and questions</li>
419
+ <li>Click "End Session" when finished</li>
420
  <li>Get a summary report for your healthcare provider</li>
421
  </ol>
422
  </div>
423
  <div style='background-color: #fef3c7; padding: 10px; border-radius: 6px; margin-bottom: 15px'>
424
  <p style='margin: 0; font-size: 12px; color: #92400e'>
425
+ <strong>Troubleshooting:</strong> If stuck on "waiting", ensure microphone access is granted
426
+ and try refreshing the page. Check console logs for technical details.
427
  </p>
428
  </div>
429
  """)