Spaces:

arthrod
/

testevideo

Sleeping

App Files Files Community

arthrod commited on Jun 8

Commit

dfcf7b9

verified ·

1 Parent(s): 0a778fe

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -93

app.py CHANGED Viewed

@@ -1,19 +1,19 @@
-"""
-Real-Time Screen Assistant - Refactored for Google GenAI Live API + FastRTC
 This application transforms the original screenshot analyzer into a real-time
-screen sharing assistant with voice interaction, following the refactoring
 instructions for live streaming capabilities.
 """
-import os
 import asyncio
 import time
-import numpy as np
-import numpy.typing as npt
 import cv2
 import gradio as gr
-from fastrtc import Stream, AsyncAudioVideoStreamHandler, get_cloudflare_turn_credentials_async, ReplyOnPause
 from google import genai
 from google.genai import types
@@ -21,9 +21,8 @@ from google.genai import types
 API_KEY = os.getenv("GEMINI_API_KEY", "")
 class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
-    """
-    Real-time screen assistant implementing the refactoring instructions.
     Features:
     - Google GenAI Live API integration
     - Real-time audio/video streaming via FastRTC
@@ -31,11 +30,11 @@ class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
     - Intelligent frame sampling for screen sharing
     - Cloudflare TURN server support for HF Spaces
     """
     def __init__(self):
         super().__init__(
-            expected_layout="mono",
-            output_sample_rate=24000,
             input_sample_rate=16000
         )
         self.session = None
@@ -43,7 +42,7 @@ class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
         self.audio_queue = asyncio.Queue()
         self.connected = False
         self.frame_interval = 1.0  # 1 FPS as per instructions
     async def start_up(self):
         """Initialize Google GenAI Live session as per Task 8-10"""
         try:
@@ -52,16 +51,16 @@ class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
             if not current_api_key:
                 print("❌ No GEMINI_API_KEY found in environment")
                 return
             # Initialize client with v1alpha API (Task 8)
             client = genai.Client(
                 api_key=current_api_key,
                 http_options={"api_version": "v1alpha"}
             )
             # Configure live session (Task 9) - minimal working config
             from google.genai.types import LiveConnectConfig
             # Start with minimal config to avoid WebSocket errors
             config = LiveConnectConfig(
                 system_instruction=(
@@ -70,24 +69,24 @@ class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
                     "you see and hear. Be proactive in offering assistance."
                 )
             )
             # Connect to Live API (Task 10) - using async context manager
             self.session_context = client.aio.live.connect(
-                model="gemini-2.0-flash-live-001",
                 config=config
             )
             self.session = await self.session_context.__aenter__()
             self.connected = True
             print("✅ Connected to Google GenAI Live API")
             # Start response handler (Task 13)
             self.response_task = asyncio.create_task(self._handle_responses())
         except Exception as e:
             print(f"❌ Failed to connect to GenAI: {e}")
             self.connected = False
     async def _handle_responses(self):
         """Handle AI responses as per Task 12-13"""
         try:
@@ -97,21 +96,21 @@ class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
                 try:
                     # Get the next message from the session
                     response_stream = self.session.receive()
                     # Check if this is an async iterator or needs to be awaited
                     if hasattr(response_stream, '__aiter__'):
                         # It's an async iterator
                         async for msg in response_stream:
                             if not self.connected:
                                 break
                             if msg.data:  # Audio response
                                 # Convert to numpy for FastRTC (Task 13)
                                 audio_array = np.frombuffer(msg.data, dtype=np.int16)
                                 if len(audio_array) > 0:
                                     audio_array = audio_array.reshape(1, -1)
                                     await self.audio_queue.put(audio_array)
                             if msg.text:  # Text response
                                 print(f"🤖 AI: {msg.text}")
                     else:
@@ -123,10 +122,10 @@ class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
                                 if len(audio_array) > 0:
                                     audio_array = audio_array.reshape(1, -1)
                                     await self.audio_queue.put(audio_array)
                             if msg.text:  # Text response
                                 print(f"🤖 AI: {msg.text}")
                 except Exception as inner_e:
                     if "connection" in str(inner_e).lower() or "closed" in str(inner_e).lower():
                         print("🔴 Connection closed, stopping response handler")
@@ -134,53 +133,53 @@ class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
                     else:
                         print(f"⚠️  Response handling error: {inner_e}")
                         await asyncio.sleep(0.1)  # Brief pause before retry
         except Exception as e:
             print(f"❌ Error handling AI responses: {e}")
     async def receive(self, frame: tuple[int, npt.NDArray[np.int16]]):
         """Handle microphone audio (Task 11)"""
         if not self.connected or not self.session:
             return
         try:
             _, audio_np = frame
             audio_bytes = audio_np.tobytes()
             # Send audio to GenAI Live API using new non-deprecated method
             await self.session.send_realtime_input(
                 input=types.Blob(
-                    data=audio_bytes,
                     mime_type="audio/pcm;rate=16000"
                 )
             )
         except Exception as e:
             print(f"❌ Error sending audio: {e}")
     async def video_receive(self, frame: npt.NDArray[np.float32]):
         """Handle screen video frames (Task 11-12)"""
         if not self.connected or not self.session:
             return
         try:
             # Throttle to 1 FPS as per instructions
             current_time = time.time()
             if current_time - self.last_frame_time < self.frame_interval:
                 return
             self.last_frame_time = current_time
             # Convert float32 frame to uint8 for JPEG encoding
             if frame.dtype == np.float32:
                 # Assuming frame is in range [0, 1], convert to [0, 255]
                 frame_uint8 = (frame * 255).astype(np.uint8)
             else:
                 frame_uint8 = frame.astype(np.uint8)
             # Check for empty frame before encoding
             if frame_uint8.size == 0 or frame_uint8.shape[0] == 0 or frame_uint8.shape[1] == 0:
                 return
             # Encode as JPEG (Task 12)
             try:
                 success, jpg_bytes = cv2.imencode('.jpg', frame_uint8, [cv2.IMWRITE_JPEG_QUALITY, 80])
@@ -189,18 +188,18 @@ class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
             except cv2.error:
                 # Handle OpenCV encoding errors gracefully
                 return
             # Send to GenAI using new non-deprecated method
             await self.session.send_realtime_input(
                 input=types.Blob(
-                    data=jpg_bytes.tobytes(),
                     mime_type="image/jpeg"
                 )
             )
         except Exception as e:
             print(f"❌ Error sending video frame: {e}")
     async def emit(self):
         """Emit audio back to user (Task 13)"""
         try:
@@ -208,23 +207,23 @@ class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
             return (24000, audio_chunk)
         except asyncio.QueueEmpty:
             return None
     def copy(self):
         """Copy method required by FastRTC AsyncAudioVideoStreamHandler"""
         # Return a new instance with same configuration
         new_instance = RealTimeScreenAssistant()
         new_instance.frame_interval = self.frame_interval
         return new_instance
     async def video_emit(self):
         """Video emit method required by FastRTC AsyncAudioVideoStreamHandler"""
         # For this use case, we don't emit video back to user
         return None
     async def shutdown(self):
         """Clean shutdown (Task 17)"""
         self.connected = False
         # Cancel response handler task if it exists
         if hasattr(self, 'response_task') and not self.response_task.done():
             self.response_task.cancel()
@@ -232,7 +231,7 @@ class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
                 await self.response_task
             except asyncio.CancelledError:
                 pass
         if self.session:
             try:
                 # Properly close the session using context manager
@@ -243,7 +242,7 @@ class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
                 print("🔴 Disconnected from GenAI Live API")
             except Exception as e:
                 print(f"❌ Error during shutdown: {e}")
         self.session = None
         if hasattr(self, 'session_context'):
             self.session_context = None
@@ -257,19 +256,19 @@ def initialize_real_time_assistant():
         # Create handler
         handler = RealTimeScreenAssistant()
         app_state["handler"] = handler
         # Create stream with Cloudflare TURN (Task 22-23)
         stream = Stream(
             handler=ReplyOnPause(handler),  # Voice activity detection (Task 3)
-            modality="audio-video",
             mode="send-receive",
             rtc_configuration=get_cloudflare_turn_credentials_async,
             time_limit=300  # 5 minute limit for Spaces
         )
         app_state["stream"] = stream
         return stream
     except Exception as e:
         print(f"❌ Error creating stream: {e}")
         return None
@@ -280,10 +279,10 @@ def handle_connect():
     current_api_key = os.getenv("GEMINI_API_KEY", "")
     if not current_api_key:
         return "❌ Please set GEMINI_API_KEY environment variable"
     if app_state["connected"]:
         return "✅ Already connected - session is active"
     app_state["connected"] = True
     return "✅ Connecting... Please allow microphone and camera permissions"
@@ -293,7 +292,7 @@ def handle_disconnect():
         asyncio.create_task(app_state["handler"].shutdown())
         app_state["connected"] = False
         return "🔴 Disconnected from AI assistant"
     return "Already disconnected"
 # Screen sharing JavaScript - Fixed syntax for HF Spaces
@@ -303,7 +302,7 @@ screen_share_js = '''
         if (!navigator.mediaDevices || !navigator.mediaDevices.getDisplayMedia) {
             return "❌ Screen sharing not supported in this browser";
         }
         const stream = await navigator.mediaDevices.getDisplayMedia({
             video: {
                 width: { ideal: 1920 },
@@ -311,36 +310,36 @@ screen_share_js = '''
             },
             audio: false
         });
         // Find the video element from FastRTC
         const videos = document.querySelectorAll('video');
         let targetVideo = null;
         for (let video of videos) {
             if (video.srcObject && video.srcObject.getVideoTracks().length > 0) {
                 targetVideo = video;
                 break;
             }
         }
         if (targetVideo && targetVideo.srcObject) {
             // Replace the camera track with screen track
             const screenTrack = stream.getVideoTracks()[0];
             const sender = targetVideo.srcObject.getVideoTracks()[0];
             // Remove old track and add screen track
             targetVideo.srcObject.removeTrack(sender);
             targetVideo.srcObject.addTrack(screenTrack);
             screenTrack.onended = () => {
                 console.log("Screen sharing ended");
             };
             return "🖥️ Screen sharing started successfully!";
         } else {
             return "❌ Could not find video stream to replace";
         }
     } catch (error) {
         console.error("Screen sharing error:", error);
         if (error.name === "NotAllowedError") {
@@ -355,46 +354,45 @@ screen_share_js = '''
 def create_interface():
     """Create main interface (Task 26-30)"""
     # Initialize stream
     stream = initialize_real_time_assistant()
     with gr.Blocks(
-        title="Real-Time Screen Assistant",
         theme=gr.themes.Soft()
     ) as demo:
         gr.Markdown("# 🖥️ Real-Time Screen Assistant")
         gr.Markdown("""
         **🎯 LIVE AI that sees your screen and provides real-time guidance!**
         **How it works:**
         1. **Connect** - Links to Google's GenAI Live API for real-time AI processing
-        2. **Share Screen** - AI can see exactly what you're doing on your screen
         3. **Voice Chat** - Talk naturally, AI responds with voice and sees everything
         4. **Get Help** - Real-time assistance with software, coding, troubleshooting
         **Tech Stack:**
         - 🧠 Google GenAI Live API (multimodal real-time AI)
         - 📹 FastRTC (low-latency screen/audio streaming)
-        - 🎙️ Voice activity detection
         - 🌐 Cloudflare TURN servers (HF Spaces optimized)
         """)
         # Status display
         status_display = gr.Textbox(
             label="Status",
             value="Ready to connect - Click Connect to start real-time session",
             interactive=False
         )
         # Control buttons (Task 3, 16-17)
         with gr.Row():
             connect_btn = gr.Button("🔗 Connect", variant="primary")
             mic_btn = gr.Button("🎙️ Test Microphone", variant="secondary")
-            screen_btn = gr.Button("🖥️ Show Your Screen", variant="secondary")
             disconnect_btn = gr.Button("🔴 Disconnect", variant="stop")
         # Stream interface - FastRTC UI for microphone and video
         gr.Markdown("### 📡 Live Audio/Video Stream")
         if stream:
@@ -407,7 +405,7 @@ def create_interface():
             """)
         else:
             gr.HTML("<div>⚠️ Stream initialization failed - Check console for errors</div>")
         # Microphone activation JavaScript
         microphone_js = '''
         (async function() {
@@ -433,19 +431,19 @@ def create_interface():
             }
         })()
         '''
         # Instructions (Task 1-3)
         with gr.Accordion("📋 Instructions", open=True):
             gr.Markdown("""
             **How to use the real-time assistant:**
             1. **Connect**: Click Connect to start the AI session
             2. **Permissions**: Allow microphone and camera access
             3. **Show Screen**: Click "Show Your Screen" to share your screen
             4. **Voice Interaction**: Simply speak - the AI will respond
             5. **Real-time Guidance**: AI sees your screen and provides live help
             6. **Disconnect**: Click Disconnect when finished
             **Features implemented from refactoring instructions:**
             - ✅ FastRTC WebRTC streaming (Task 2)
             - ✅ Google GenAI Live API integration (Task 7-15)
@@ -455,7 +453,7 @@ def create_interface():
             - ✅ Real-time advice generation (Task 18-21)
             - ✅ Cloudflare TURN for HF Spaces (Task 22-23)
             """)
         # Privacy notice (Task 24-25)
         with gr.Accordion("🔒 Privacy & Security", open=False):
             gr.Markdown("""
@@ -464,37 +462,37 @@ def create_interface():
             - Data is transmitted securely via encrypted WebRTC connections
             - No permanent storage - all processing is real-time
             - You control what is shared and can disconnect anytime
             **Technical Details:**
             - Uses Google Gemini Live API for real-time multimodal processing
             - FastRTC provides low-latency WebRTC streaming
             - Cloudflare TURN servers ensure reliable connectivity on HF Spaces
             - Voice activity detection prevents interruptions
             """)
         # Wire up controls
         connect_btn.click(
             fn=handle_connect,
             outputs=[status_display]
         )
         mic_btn.click(
             fn=lambda: "🎙️ Testing microphone...",
             outputs=[status_display],
             js=microphone_js
         )
         screen_btn.click(
             fn=lambda: "🖥️ Requesting screen share...",
             outputs=[status_display],
             js=screen_share_js
         )
         disconnect_btn.click(
             fn=handle_disconnect,
             outputs=[status_display]
         )
         return demo
 # Main execution
@@ -503,20 +501,20 @@ if __name__ == "__main__":
     print("=" * 50)
     print("Refactored according to instructions for:")
     print("- Google GenAI Live API integration")
-    print("- FastRTC real-time streaming")
     print("- Voice activity detection")
     print("- Screen sharing capabilities")
     print("- Cloudflare TURN for HF Spaces")
     if not API_KEY:
         print("\n⚠️  No GEMINI_API_KEY environment variable found")
         print("Please set your Google AI API key:")
         print("export GEMINI_API_KEY='your-api-key-here'")
     else:
         print(f"\n✅ API key configured (length: {len(API_KEY)})")
     print("\n🚀 Starting real-time assistant...")
     try:
         demo = create_interface()
         demo.launch(
@@ -527,4 +525,4 @@ if __name__ == "__main__":
         )
     except Exception as e:
         print(f"❌ Failed to launch: {e}")
-        print("Ensure all dependencies are installed: pip install -r requirements.txt")

+"""Real-Time Screen Assistant - Refactored for Google GenAI Live API + FastRTC
+xxx
 This application transforms the original screenshot analyzer into a real-time
+screen sharing assistant with voice interaction, following the refactoring
 instructions for live streaming capabilities.
 """
 import asyncio
+import os
 import time
 import cv2
 import gradio as gr
+import numpy as np
+import numpy.typing as npt
+from fastrtc import AsyncAudioVideoStreamHandler, ReplyOnPause, Stream, get_cloudflare_turn_credentials_async
 from google import genai
 from google.genai import types
 API_KEY = os.getenv("GEMINI_API_KEY", "")
 class RealTimeScreenAssistant(AsyncAudioVideoStreamHandler):
+    """Real-time screen assistant implementing the refactoring instructions.
     Features:
     - Google GenAI Live API integration
     - Real-time audio/video streaming via FastRTC
     - Intelligent frame sampling for screen sharing
     - Cloudflare TURN server support for HF Spaces
     """
     def __init__(self):
         super().__init__(
+            expected_layout="mono",
+            output_sample_rate=24000,
             input_sample_rate=16000
         )
         self.session = None
         self.audio_queue = asyncio.Queue()
         self.connected = False
         self.frame_interval = 1.0  # 1 FPS as per instructions
     async def start_up(self):
         """Initialize Google GenAI Live session as per Task 8-10"""
         try:
             if not current_api_key:
                 print("❌ No GEMINI_API_KEY found in environment")
                 return
             # Initialize client with v1alpha API (Task 8)
             client = genai.Client(
                 api_key=current_api_key,
                 http_options={"api_version": "v1alpha"}
             )
             # Configure live session (Task 9) - minimal working config
             from google.genai.types import LiveConnectConfig
             # Start with minimal config to avoid WebSocket errors
             config = LiveConnectConfig(
                 system_instruction=(
                     "you see and hear. Be proactive in offering assistance."
                 )
             )
             # Connect to Live API (Task 10) - using async context manager
             self.session_context = client.aio.live.connect(
+                model="gemini-2.0-flash-live-001",
                 config=config
             )
             self.session = await self.session_context.__aenter__()
             self.connected = True
             print("✅ Connected to Google GenAI Live API")
             # Start response handler (Task 13)
             self.response_task = asyncio.create_task(self._handle_responses())
         except Exception as e:
             print(f"❌ Failed to connect to GenAI: {e}")
             self.connected = False
     async def _handle_responses(self):
         """Handle AI responses as per Task 12-13"""
         try:
                 try:
                     # Get the next message from the session
                     response_stream = self.session.receive()
                     # Check if this is an async iterator or needs to be awaited
                     if hasattr(response_stream, '__aiter__'):
                         # It's an async iterator
                         async for msg in response_stream:
                             if not self.connected:
                                 break
                             if msg.data:  # Audio response
                                 # Convert to numpy for FastRTC (Task 13)
                                 audio_array = np.frombuffer(msg.data, dtype=np.int16)
                                 if len(audio_array) > 0:
                                     audio_array = audio_array.reshape(1, -1)
                                     await self.audio_queue.put(audio_array)
                             if msg.text:  # Text response
                                 print(f"🤖 AI: {msg.text}")
                     else:
                                 if len(audio_array) > 0:
                                     audio_array = audio_array.reshape(1, -1)
                                     await self.audio_queue.put(audio_array)
                             if msg.text:  # Text response
                                 print(f"🤖 AI: {msg.text}")
                 except Exception as inner_e:
                     if "connection" in str(inner_e).lower() or "closed" in str(inner_e).lower():
                         print("🔴 Connection closed, stopping response handler")
                     else:
                         print(f"⚠️  Response handling error: {inner_e}")
                         await asyncio.sleep(0.1)  # Brief pause before retry
         except Exception as e:
             print(f"❌ Error handling AI responses: {e}")
     async def receive(self, frame: tuple[int, npt.NDArray[np.int16]]):
         """Handle microphone audio (Task 11)"""
         if not self.connected or not self.session:
             return
         try:
             _, audio_np = frame
             audio_bytes = audio_np.tobytes()
             # Send audio to GenAI Live API using new non-deprecated method
             await self.session.send_realtime_input(
                 input=types.Blob(
+                    data=audio_bytes,
                     mime_type="audio/pcm;rate=16000"
                 )
             )
         except Exception as e:
             print(f"❌ Error sending audio: {e}")
     async def video_receive(self, frame: npt.NDArray[np.float32]):
         """Handle screen video frames (Task 11-12)"""
         if not self.connected or not self.session:
             return
         try:
             # Throttle to 1 FPS as per instructions
             current_time = time.time()
             if current_time - self.last_frame_time < self.frame_interval:
                 return
             self.last_frame_time = current_time
             # Convert float32 frame to uint8 for JPEG encoding
             if frame.dtype == np.float32:
                 # Assuming frame is in range [0, 1], convert to [0, 255]
                 frame_uint8 = (frame * 255).astype(np.uint8)
             else:
                 frame_uint8 = frame.astype(np.uint8)
             # Check for empty frame before encoding
             if frame_uint8.size == 0 or frame_uint8.shape[0] == 0 or frame_uint8.shape[1] == 0:
                 return
             # Encode as JPEG (Task 12)
             try:
                 success, jpg_bytes = cv2.imencode('.jpg', frame_uint8, [cv2.IMWRITE_JPEG_QUALITY, 80])
             except cv2.error:
                 # Handle OpenCV encoding errors gracefully
                 return
             # Send to GenAI using new non-deprecated method
             await self.session.send_realtime_input(
                 input=types.Blob(
+                    data=jpg_bytes.tobytes(),
                     mime_type="image/jpeg"
                 )
             )
         except Exception as e:
             print(f"❌ Error sending video frame: {e}")
     async def emit(self):
         """Emit audio back to user (Task 13)"""
         try:
             return (24000, audio_chunk)
         except asyncio.QueueEmpty:
             return None
     def copy(self):
         """Copy method required by FastRTC AsyncAudioVideoStreamHandler"""
         # Return a new instance with same configuration
         new_instance = RealTimeScreenAssistant()
         new_instance.frame_interval = self.frame_interval
         return new_instance
     async def video_emit(self):
         """Video emit method required by FastRTC AsyncAudioVideoStreamHandler"""
         # For this use case, we don't emit video back to user
         return None
     async def shutdown(self):
         """Clean shutdown (Task 17)"""
         self.connected = False
         # Cancel response handler task if it exists
         if hasattr(self, 'response_task') and not self.response_task.done():
             self.response_task.cancel()
                 await self.response_task
             except asyncio.CancelledError:
                 pass
         if self.session:
             try:
                 # Properly close the session using context manager
                 print("🔴 Disconnected from GenAI Live API")
             except Exception as e:
                 print(f"❌ Error during shutdown: {e}")
         self.session = None
         if hasattr(self, 'session_context'):
             self.session_context = None
         # Create handler
         handler = RealTimeScreenAssistant()
         app_state["handler"] = handler
         # Create stream with Cloudflare TURN (Task 22-23)
         stream = Stream(
             handler=ReplyOnPause(handler),  # Voice activity detection (Task 3)
+            modality="audio-video",
             mode="send-receive",
             rtc_configuration=get_cloudflare_turn_credentials_async,
             time_limit=300  # 5 minute limit for Spaces
         )
         app_state["stream"] = stream
         return stream
     except Exception as e:
         print(f"❌ Error creating stream: {e}")
         return None
     current_api_key = os.getenv("GEMINI_API_KEY", "")
     if not current_api_key:
         return "❌ Please set GEMINI_API_KEY environment variable"
     if app_state["connected"]:
         return "✅ Already connected - session is active"
     app_state["connected"] = True
     return "✅ Connecting... Please allow microphone and camera permissions"
         asyncio.create_task(app_state["handler"].shutdown())
         app_state["connected"] = False
         return "🔴 Disconnected from AI assistant"
     return "Already disconnected"
 # Screen sharing JavaScript - Fixed syntax for HF Spaces
         if (!navigator.mediaDevices || !navigator.mediaDevices.getDisplayMedia) {
             return "❌ Screen sharing not supported in this browser";
         }
         const stream = await navigator.mediaDevices.getDisplayMedia({
             video: {
                 width: { ideal: 1920 },
             },
             audio: false
         });
         // Find the video element from FastRTC
         const videos = document.querySelectorAll('video');
         let targetVideo = null;
         for (let video of videos) {
             if (video.srcObject && video.srcObject.getVideoTracks().length > 0) {
                 targetVideo = video;
                 break;
             }
         }
         if (targetVideo && targetVideo.srcObject) {
             // Replace the camera track with screen track
             const screenTrack = stream.getVideoTracks()[0];
             const sender = targetVideo.srcObject.getVideoTracks()[0];
             // Remove old track and add screen track
             targetVideo.srcObject.removeTrack(sender);
             targetVideo.srcObject.addTrack(screenTrack);
             screenTrack.onended = () => {
                 console.log("Screen sharing ended");
             };
             return "🖥️ Screen sharing started successfully!";
         } else {
             return "❌ Could not find video stream to replace";
         }
     } catch (error) {
         console.error("Screen sharing error:", error);
         if (error.name === "NotAllowedError") {
 def create_interface():
     """Create main interface (Task 26-30)"""
     # Initialize stream
     stream = initialize_real_time_assistant()
     with gr.Blocks(
+        title="Real-Time Screen Assistant",
         theme=gr.themes.Soft()
     ) as demo:
         gr.Markdown("# 🖥️ Real-Time Screen Assistant")
         gr.Markdown("""
         **🎯 LIVE AI that sees your screen and provides real-time guidance!**
         **How it works:**
         1. **Connect** - Links to Google's GenAI Live API for real-time AI processing
+        2. **Share Screen** - AI can see exactly what you're doing on your screen
         3. **Voice Chat** - Talk naturally, AI responds with voice and sees everything
         4. **Get Help** - Real-time assistance with software, coding, troubleshooting
         **Tech Stack:**
         - 🧠 Google GenAI Live API (multimodal real-time AI)
         - 📹 FastRTC (low-latency screen/audio streaming)
+        - 🎙️ Voice activity detection
         - 🌐 Cloudflare TURN servers (HF Spaces optimized)
         """)
         # Status display
         status_display = gr.Textbox(
             label="Status",
             value="Ready to connect - Click Connect to start real-time session",
             interactive=False
         )
         # Control buttons (Task 3, 16-17)
         with gr.Row():
             connect_btn = gr.Button("🔗 Connect", variant="primary")
             mic_btn = gr.Button("🎙️ Test Microphone", variant="secondary")
+            screen_btn = gr.Button("🖥️ Show Your Screen", variant="secondary")
             disconnect_btn = gr.Button("🔴 Disconnect", variant="stop")
         # Stream interface - FastRTC UI for microphone and video
         gr.Markdown("### 📡 Live Audio/Video Stream")
         if stream:
             """)
         else:
             gr.HTML("<div>⚠️ Stream initialization failed - Check console for errors</div>")
         # Microphone activation JavaScript
         microphone_js = '''
         (async function() {
             }
         })()
         '''
         # Instructions (Task 1-3)
         with gr.Accordion("📋 Instructions", open=True):
             gr.Markdown("""
             **How to use the real-time assistant:**
             1. **Connect**: Click Connect to start the AI session
             2. **Permissions**: Allow microphone and camera access
             3. **Show Screen**: Click "Show Your Screen" to share your screen
             4. **Voice Interaction**: Simply speak - the AI will respond
             5. **Real-time Guidance**: AI sees your screen and provides live help
             6. **Disconnect**: Click Disconnect when finished
             **Features implemented from refactoring instructions:**
             - ✅ FastRTC WebRTC streaming (Task 2)
             - ✅ Google GenAI Live API integration (Task 7-15)
             - ✅ Real-time advice generation (Task 18-21)
             - ✅ Cloudflare TURN for HF Spaces (Task 22-23)
             """)
         # Privacy notice (Task 24-25)
         with gr.Accordion("🔒 Privacy & Security", open=False):
             gr.Markdown("""
             - Data is transmitted securely via encrypted WebRTC connections
             - No permanent storage - all processing is real-time
             - You control what is shared and can disconnect anytime
             **Technical Details:**
             - Uses Google Gemini Live API for real-time multimodal processing
             - FastRTC provides low-latency WebRTC streaming
             - Cloudflare TURN servers ensure reliable connectivity on HF Spaces
             - Voice activity detection prevents interruptions
             """)
         # Wire up controls
         connect_btn.click(
             fn=handle_connect,
             outputs=[status_display]
         )
         mic_btn.click(
             fn=lambda: "🎙️ Testing microphone...",
             outputs=[status_display],
             js=microphone_js
         )
         screen_btn.click(
             fn=lambda: "🖥️ Requesting screen share...",
             outputs=[status_display],
             js=screen_share_js
         )
         disconnect_btn.click(
             fn=handle_disconnect,
             outputs=[status_display]
         )
         return demo
 # Main execution
     print("=" * 50)
     print("Refactored according to instructions for:")
     print("- Google GenAI Live API integration")
+    print("- FastRTC real-time streaming")
     print("- Voice activity detection")
     print("- Screen sharing capabilities")
     print("- Cloudflare TURN for HF Spaces")
     if not API_KEY:
         print("\n⚠️  No GEMINI_API_KEY environment variable found")
         print("Please set your Google AI API key:")
         print("export GEMINI_API_KEY='your-api-key-here'")
     else:
         print(f"\n✅ API key configured (length: {len(API_KEY)})")
     print("\n🚀 Starting real-time assistant...")
     try:
         demo = create_interface()
         demo.launch(
         )
     except Exception as e:
         print(f"❌ Failed to launch: {e}")
+        print("Ensure all dependencies are installed: pip install -r requirements.txt")