Update app.py
Browse files
app.py
CHANGED
@@ -140,10 +140,25 @@ class GeminiHandler(StreamHandler):
|
|
140 |
def _initialize_websocket(self):
|
141 |
assert self.config, "Config not set"
|
142 |
try:
|
143 |
-
|
|
|
|
|
|
|
|
|
|
|
144 |
initial_request = {
|
145 |
"setup": {
|
146 |
"model": self.config.model,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
"systemInstruction": {
|
148 |
"parts": [
|
149 |
{
|
@@ -171,10 +186,12 @@ Start by introducing yourself and asking how you can help them prepare for their
|
|
171 |
}
|
172 |
}
|
173 |
}
|
|
|
174 |
self.ws.send(json.dumps(initial_request))
|
175 |
setup_response = json.loads(self.ws.recv())
|
176 |
print(f"Setup response: {setup_response}")
|
177 |
self.conversation_tracker.start_session()
|
|
|
178 |
except websockets.exceptions.WebSocketException as e:
|
179 |
print(f"WebSocket connection failed: {str(e)}")
|
180 |
self.ws = None
|
@@ -192,19 +209,21 @@ Start by introducing yourself and asking how you can help them prepare for their
|
|
192 |
|
193 |
def receive(self, frame: tuple[int, np.ndarray]) -> None:
|
194 |
if not self.channel:
|
|
|
195 |
return
|
|
|
196 |
if not self.config:
|
197 |
# Get API key from environment variable
|
198 |
api_key = os.getenv('GEMINI_API_KEY')
|
199 |
if not api_key:
|
200 |
print("Error: GEMINI_API_KEY environment variable not set")
|
201 |
-
if self.channel:
|
202 |
-
self.channel.send("API key not configured. Please set GEMINI_API_KEY environment variable.")
|
203 |
return
|
|
|
204 |
self.config = GeminiConfig(api_key)
|
205 |
|
206 |
try:
|
207 |
if not self.ws:
|
|
|
208 |
self._initialize_websocket()
|
209 |
|
210 |
if not self.ws:
|
@@ -212,15 +231,26 @@ Start by introducing yourself and asking how you can help them prepare for their
|
|
212 |
return
|
213 |
|
214 |
_, array = frame
|
|
|
|
|
|
|
215 |
array = array.squeeze()
|
|
|
|
|
|
|
216 |
audio_message = self.audio_processor.encode_audio(
|
217 |
-
array, self.
|
218 |
)
|
219 |
self.ws.send(json.dumps(audio_message))
|
|
|
|
|
220 |
except Exception as e:
|
221 |
print(f"Error in receive: {str(e)}")
|
222 |
if self.ws:
|
223 |
-
|
|
|
|
|
|
|
224 |
self.ws = None
|
225 |
|
226 |
def _process_server_content(self, content):
|
@@ -251,22 +281,46 @@ Start by introducing yourself and asking how you can help them prepare for their
|
|
251 |
def generator(self):
|
252 |
while True:
|
253 |
if not self.ws or not self.config:
|
254 |
-
print("WebSocket not connected")
|
255 |
yield None
|
256 |
continue
|
257 |
|
258 |
try:
|
259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
msg = json.loads(message)
|
|
|
261 |
|
262 |
if "serverContent" in msg:
|
263 |
content = msg["serverContent"].get("modelTurn", {})
|
264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
except TimeoutError:
|
266 |
-
|
|
|
|
|
|
|
|
|
267 |
yield None
|
268 |
except Exception as e:
|
269 |
print(f"Error in generator: {str(e)}")
|
|
|
270 |
yield None
|
271 |
|
272 |
def emit(self) -> tuple[int, np.ndarray] | None:
|
@@ -331,6 +385,13 @@ class PreconsultationApp:
|
|
331 |
except Exception as e:
|
332 |
print(f"Warning: Could not get Twilio TURN credentials: {e}")
|
333 |
print("Using basic WebRTC configuration")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
334 |
|
335 |
webrtc = WebRTC(
|
336 |
label="Voice Consultation",
|
@@ -338,23 +399,31 @@ class PreconsultationApp:
|
|
338 |
mode="send-receive",
|
339 |
rtc_configuration=rtc_config,
|
340 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
341 |
|
342 |
with gr.Column(scale=1):
|
343 |
gr.HTML("""
|
344 |
<div style='background-color: #f0f9ff; padding: 15px; border-radius: 8px; margin-bottom: 15px'>
|
345 |
<h3 style='margin-top: 0'>How it works:</h3>
|
346 |
<ol style='margin-bottom: 0'>
|
|
|
347 |
<li>Click "Start" to begin the voice consultation</li>
|
348 |
-
<li>Speak naturally with the AI agent</li>
|
349 |
<li>Share your health concerns and questions</li>
|
350 |
-
<li>End
|
351 |
<li>Get a summary report for your healthcare provider</li>
|
352 |
</ol>
|
353 |
</div>
|
354 |
<div style='background-color: #fef3c7; padding: 10px; border-radius: 6px; margin-bottom: 15px'>
|
355 |
<p style='margin: 0; font-size: 12px; color: #92400e'>
|
356 |
-
<strong>
|
357 |
-
|
358 |
</p>
|
359 |
</div>
|
360 |
""")
|
|
|
140 |
def _initialize_websocket(self):
|
141 |
assert self.config, "Config not set"
|
142 |
try:
|
143 |
+
print(f"Connecting to WebSocket: {self.config.ws_url}")
|
144 |
+
self.ws = websockets.sync.client.connect(
|
145 |
+
self.config.ws_url,
|
146 |
+
timeout=10,
|
147 |
+
additional_headers={"User-Agent": "PreconsultationAgent/1.0"}
|
148 |
+
)
|
149 |
initial_request = {
|
150 |
"setup": {
|
151 |
"model": self.config.model,
|
152 |
+
"generationConfig": {
|
153 |
+
"responseModalities": ["AUDIO"],
|
154 |
+
"speechConfig": {
|
155 |
+
"voiceConfig": {
|
156 |
+
"prebuiltVoiceConfig": {
|
157 |
+
"voiceName": "Puck"
|
158 |
+
}
|
159 |
+
}
|
160 |
+
}
|
161 |
+
},
|
162 |
"systemInstruction": {
|
163 |
"parts": [
|
164 |
{
|
|
|
186 |
}
|
187 |
}
|
188 |
}
|
189 |
+
print("Sending setup request...")
|
190 |
self.ws.send(json.dumps(initial_request))
|
191 |
setup_response = json.loads(self.ws.recv())
|
192 |
print(f"Setup response: {setup_response}")
|
193 |
self.conversation_tracker.start_session()
|
194 |
+
print("WebSocket connection established successfully")
|
195 |
except websockets.exceptions.WebSocketException as e:
|
196 |
print(f"WebSocket connection failed: {str(e)}")
|
197 |
self.ws = None
|
|
|
209 |
|
210 |
def receive(self, frame: tuple[int, np.ndarray]) -> None:
|
211 |
if not self.channel:
|
212 |
+
print("No channel available")
|
213 |
return
|
214 |
+
|
215 |
if not self.config:
|
216 |
# Get API key from environment variable
|
217 |
api_key = os.getenv('GEMINI_API_KEY')
|
218 |
if not api_key:
|
219 |
print("Error: GEMINI_API_KEY environment variable not set")
|
|
|
|
|
220 |
return
|
221 |
+
print(f"Initializing with API key: {api_key[:10]}...")
|
222 |
self.config = GeminiConfig(api_key)
|
223 |
|
224 |
try:
|
225 |
if not self.ws:
|
226 |
+
print("Initializing WebSocket connection...")
|
227 |
self._initialize_websocket()
|
228 |
|
229 |
if not self.ws:
|
|
|
231 |
return
|
232 |
|
233 |
_, array = frame
|
234 |
+
if array is None or array.size == 0:
|
235 |
+
return
|
236 |
+
|
237 |
array = array.squeeze()
|
238 |
+
if array.size == 0:
|
239 |
+
return
|
240 |
+
|
241 |
audio_message = self.audio_processor.encode_audio(
|
242 |
+
array, self.input_sample_rate
|
243 |
)
|
244 |
self.ws.send(json.dumps(audio_message))
|
245 |
+
print(f"Sent audio chunk of size: {array.size}")
|
246 |
+
|
247 |
except Exception as e:
|
248 |
print(f"Error in receive: {str(e)}")
|
249 |
if self.ws:
|
250 |
+
try:
|
251 |
+
self.ws.close()
|
252 |
+
except:
|
253 |
+
pass
|
254 |
self.ws = None
|
255 |
|
256 |
def _process_server_content(self, content):
|
|
|
281 |
def generator(self):
|
282 |
while True:
|
283 |
if not self.ws or not self.config:
|
284 |
+
print("WebSocket not connected, yielding None")
|
285 |
yield None
|
286 |
continue
|
287 |
|
288 |
try:
|
289 |
+
# Check if WebSocket is still open
|
290 |
+
if self.ws.closed:
|
291 |
+
print("WebSocket closed, reconnecting...")
|
292 |
+
self._initialize_websocket()
|
293 |
+
if not self.ws:
|
294 |
+
yield None
|
295 |
+
continue
|
296 |
+
|
297 |
+
message = self.ws.recv(timeout=2)
|
298 |
+
if not message:
|
299 |
+
yield None
|
300 |
+
continue
|
301 |
+
|
302 |
msg = json.loads(message)
|
303 |
+
print(f"Received message type: {list(msg.keys())}")
|
304 |
|
305 |
if "serverContent" in msg:
|
306 |
content = msg["serverContent"].get("modelTurn", {})
|
307 |
+
if content:
|
308 |
+
yield from self._process_server_content(content)
|
309 |
+
else:
|
310 |
+
yield None
|
311 |
+
else:
|
312 |
+
yield None
|
313 |
+
|
314 |
except TimeoutError:
|
315 |
+
# This is normal, just continue
|
316 |
+
yield None
|
317 |
+
except websockets.exceptions.ConnectionClosed:
|
318 |
+
print("WebSocket connection closed")
|
319 |
+
self.ws = None
|
320 |
yield None
|
321 |
except Exception as e:
|
322 |
print(f"Error in generator: {str(e)}")
|
323 |
+
self.ws = None
|
324 |
yield None
|
325 |
|
326 |
def emit(self) -> tuple[int, np.ndarray] | None:
|
|
|
385 |
except Exception as e:
|
386 |
print(f"Warning: Could not get Twilio TURN credentials: {e}")
|
387 |
print("Using basic WebRTC configuration")
|
388 |
+
# Provide a basic STUN server configuration
|
389 |
+
rtc_config = {
|
390 |
+
"iceServers": [
|
391 |
+
{"urls": ["stun:stun.l.google.com:19302"]},
|
392 |
+
{"urls": ["stun:stun1.l.google.com:19302"]},
|
393 |
+
]
|
394 |
+
}
|
395 |
|
396 |
webrtc = WebRTC(
|
397 |
label="Voice Consultation",
|
|
|
399 |
mode="send-receive",
|
400 |
rtc_configuration=rtc_config,
|
401 |
)
|
402 |
+
|
403 |
+
# Status indicator
|
404 |
+
status_display = gr.HTML("""
|
405 |
+
<div id="status" style='padding: 10px; margin-top: 10px; border-radius: 6px; background-color: #f3f4f6; text-align: center'>
|
406 |
+
<span style='color: #6b7280'>Ready to start consultation</span>
|
407 |
+
</div>
|
408 |
+
""")
|
409 |
|
410 |
with gr.Column(scale=1):
|
411 |
gr.HTML("""
|
412 |
<div style='background-color: #f0f9ff; padding: 15px; border-radius: 8px; margin-bottom: 15px'>
|
413 |
<h3 style='margin-top: 0'>How it works:</h3>
|
414 |
<ol style='margin-bottom: 0'>
|
415 |
+
<li><strong>Allow microphone access</strong> when prompted by your browser</li>
|
416 |
<li>Click "Start" to begin the voice consultation</li>
|
417 |
+
<li>Speak clearly and naturally with the AI agent</li>
|
418 |
<li>Share your health concerns and questions</li>
|
419 |
+
<li>Click "End Session" when finished</li>
|
420 |
<li>Get a summary report for your healthcare provider</li>
|
421 |
</ol>
|
422 |
</div>
|
423 |
<div style='background-color: #fef3c7; padding: 10px; border-radius: 6px; margin-bottom: 15px'>
|
424 |
<p style='margin: 0; font-size: 12px; color: #92400e'>
|
425 |
+
<strong>Troubleshooting:</strong> If stuck on "waiting", ensure microphone access is granted
|
426 |
+
and try refreshing the page. Check console logs for technical details.
|
427 |
</p>
|
428 |
</div>
|
429 |
""")
|