arthrod commited on
Commit
71aeae5
Β·
verified Β·
1 Parent(s): 7a42892

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -23
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  """Real-Time Screen Assistant - Premium Edition with Complete Frontend Integration
2
 
3
  This is the PREMIUM, BEST WORKING version with comprehensive real-time handlers:
@@ -17,6 +19,22 @@ Features:
17
  - 300s timeout for real-time behavior
18
  """
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  import asyncio
21
  import os
22
  import time
@@ -38,6 +56,9 @@ import tempfile
38
  import os
39
  import json
40
 
 
 
 
41
  class ScreenRecorderData(GradioModel):
42
  video: Optional[FileData] = None
43
  duration: Optional[float] = None
@@ -302,8 +323,7 @@ class ScreenRecorder(Component):
302
  def get_status(self) -> str:
303
  """Get the current status of the recorder."""
304
  return self._status
305
- # Environment variable for API key
306
- API_KEY = os.getenv("GEMINI_API_KEY", "")
307
 
308
  class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
309
  """Premium Real-time screen assistant with complete frontend integration.
@@ -628,19 +648,39 @@ def initialize_real_time_assistant():
628
  handler = RealTimeScreenAssistant()
629
  app_state["handler"] = handler
630
 
631
- # PREMIUM: Enhanced stream configuration
632
- stream = Stream(
633
- handler=ReplyOnPause(handler), # Voice activity detection
634
- modality="audio-video",
635
- mode="send-receive",
636
- rtc_configuration=get_cloudflare_turn_credentials_async,
637
- time_limit=300, # 5 minutes - real-time optimized
638
- ui_args={
639
- "title": "Premium Real-Time Assistant",
640
- "subtitle": "Audio-Video Streaming with Gemini 2.0",
641
- "hide_title": False
642
- }
643
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
 
645
  app_state["stream"] = stream
646
  return stream
@@ -672,11 +712,28 @@ async def handle_connect_async():
672
 
673
  def handle_connect():
674
  """Sync wrapper for connection"""
 
 
675
  app_state["connected"] = True # Optimistic update for UI
676
  app_state["last_status"] = "Initiating connection..."
677
 
678
- # Start async connection
679
- asyncio.create_task(handle_connect_async())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  return "πŸ”„ Initiating connection to GenAI Live API..."
681
 
682
  async def handle_disconnect_async():
@@ -694,10 +751,25 @@ async def handle_disconnect_async():
694
 
695
  def handle_disconnect():
696
  """Sync wrapper for disconnect"""
 
 
697
  app_state["connected"] = False # Immediate update for UI
698
 
699
- # Start async disconnect
700
- asyncio.create_task(handle_disconnect_async())
 
 
 
 
 
 
 
 
 
 
 
 
 
701
  return "πŸ”„ Disconnecting from AI assistant..."
702
 
703
  def get_connection_status():
@@ -834,7 +906,7 @@ def create_interface():
834
  """Handle screen recording data and send to AI"""
835
  if not recording_data or not app_state["handler"] or not app_state["connected"]:
836
  return "⚠️ Not connected to AI or no recording data"
837
-
838
  try:
839
  # If we have video data, process it for the AI
840
  if recording_data and recording_data.get('video'):
@@ -843,14 +915,14 @@ def create_interface():
843
  duration = recording_data.get('duration', 0)
844
  size = recording_data.get('size', 0)
845
  print(f"πŸ“Ή Screen recording received: {duration}s, {size} bytes")
846
-
847
  # Update stats
848
  app_state["stats"]["frames_sent"] += 1
849
-
850
  return f"βœ… Screen recording processed: {duration:.1f}s"
851
  else:
852
  return "⚠️ No video data in recording"
853
-
854
  except Exception as e:
855
  print(f"❌ Error processing screen recording: {e}")
856
  return f"❌ Error: {e}"
 
1
+ # Environment variable for API key
2
+ API_KEY = os.getenv("GEMINI_API_KEY", "")
3
  """Real-Time Screen Assistant - Premium Edition with Complete Frontend Integration
4
 
5
  This is the PREMIUM, BEST WORKING version with comprehensive real-time handlers:
 
19
  - 300s timeout for real-time behavior
20
  """
21
 
22
+ import asyncio
23
+ import os
24
+ import time
25
+ import sys
26
+ from collections import deque
27
+
28
+ import cv2
29
+ import gradio as gr
30
+ import numpy as np
31
+ import numpy.typing as npt
32
+ from fastrtc import AsyncAudioVideoStreamHandler, ReplyOnPause, Stream, get_cloudflare_turn_credentials_async
33
+ from google import genai
34
+ from google.genai import types
35
+ from gradio.events import Dependency
36
+
37
+
38
  import asyncio
39
  import os
40
  import time
 
56
  import os
57
  import json
58
 
59
+ # Environment variable for API key
60
+ API_KEY = os.getenv("GEMINI_API_KEY", "")
61
+
62
  class ScreenRecorderData(GradioModel):
63
  video: Optional[FileData] = None
64
  duration: Optional[float] = None
 
323
  def get_status(self) -> str:
324
  """Get the current status of the recorder."""
325
  return self._status
326
+
 
327
 
328
  class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
329
  """Premium Real-time screen assistant with complete frontend integration.
 
648
  handler = RealTimeScreenAssistant()
649
  app_state["handler"] = handler
650
 
651
+ # PREMIUM: Enhanced stream configuration with fallback
652
+ try:
653
+ # Try with ReplyOnPause (requires fastrtc[vad])
654
+ stream = Stream(
655
+ handler=ReplyOnPause(handler), # Voice activity detection
656
+ modality="audio-video",
657
+ mode="send-receive",
658
+ rtc_configuration=get_cloudflare_turn_credentials_async,
659
+ time_limit=300, # 5 minutes - real-time optimized
660
+ ui_args={
661
+ "title": "Premium Real-Time Assistant",
662
+ "subtitle": "Audio-Video Streaming with Gemini 2.0",
663
+ "hide_title": False
664
+ }
665
+ )
666
+ print("βœ… Stream created with ReplyOnPause (VAD enabled)")
667
+ except Exception as vad_error:
668
+ print(f"⚠️ ReplyOnPause failed: {vad_error}")
669
+ print("πŸ”„ Falling back to basic handler...")
670
+ # Fallback to basic handler without VAD
671
+ stream = Stream(
672
+ handler=handler, # Direct handler without VAD
673
+ modality="audio-video",
674
+ mode="send-receive",
675
+ rtc_configuration=get_cloudflare_turn_credentials_async,
676
+ time_limit=300,
677
+ ui_args={
678
+ "title": "Real-Time Assistant (Basic)",
679
+ "subtitle": "Audio-Video Streaming with Gemini 2.0",
680
+ "hide_title": False
681
+ }
682
+ )
683
+ print("βœ… Stream created with basic handler")
684
 
685
  app_state["stream"] = stream
686
  return stream
 
712
 
713
  def handle_connect():
714
  """Sync wrapper for connection"""
715
+ import threading
716
+
717
  app_state["connected"] = True # Optimistic update for UI
718
  app_state["last_status"] = "Initiating connection..."
719
 
720
+ # Start async connection in a separate thread to avoid event loop issues
721
+ def run_async_connection():
722
+ try:
723
+ import asyncio
724
+ loop = asyncio.new_event_loop()
725
+ asyncio.set_event_loop(loop)
726
+ result = loop.run_until_complete(handle_connect_async())
727
+ print(f"Connection result: {result}")
728
+ loop.close()
729
+ except Exception as e:
730
+ print(f"Connection error: {e}")
731
+ app_state["connected"] = False
732
+ app_state["last_status"] = f"Connection failed: {e}"
733
+
734
+ thread = threading.Thread(target=run_async_connection, daemon=True)
735
+ thread.start()
736
+
737
  return "πŸ”„ Initiating connection to GenAI Live API..."
738
 
739
  async def handle_disconnect_async():
 
751
 
752
  def handle_disconnect():
753
  """Sync wrapper for disconnect"""
754
+ import threading
755
+
756
  app_state["connected"] = False # Immediate update for UI
757
 
758
+ # Start async disconnect in a separate thread to avoid event loop issues
759
+ def run_async_disconnect():
760
+ try:
761
+ import asyncio
762
+ loop = asyncio.new_event_loop()
763
+ asyncio.set_event_loop(loop)
764
+ result = loop.run_until_complete(handle_disconnect_async())
765
+ print(f"Disconnect result: {result}")
766
+ loop.close()
767
+ except Exception as e:
768
+ print(f"Disconnect error: {e}")
769
+
770
+ thread = threading.Thread(target=run_async_disconnect, daemon=True)
771
+ thread.start()
772
+
773
  return "πŸ”„ Disconnecting from AI assistant..."
774
 
775
  def get_connection_status():
 
906
  """Handle screen recording data and send to AI"""
907
  if not recording_data or not app_state["handler"] or not app_state["connected"]:
908
  return "⚠️ Not connected to AI or no recording data"
909
+
910
  try:
911
  # If we have video data, process it for the AI
912
  if recording_data and recording_data.get('video'):
 
915
  duration = recording_data.get('duration', 0)
916
  size = recording_data.get('size', 0)
917
  print(f"πŸ“Ή Screen recording received: {duration}s, {size} bytes")
918
+
919
  # Update stats
920
  app_state["stats"]["frames_sent"] += 1
921
+
922
  return f"βœ… Screen recording processed: {duration:.1f}s"
923
  else:
924
  return "⚠️ No video data in recording"
925
+
926
  except Exception as e:
927
  print(f"❌ Error processing screen recording: {e}")
928
  return f"❌ Error: {e}"