Testys commited on
Commit
fcccf01
·
verified ·
1 Parent(s): 89d5f02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -3
app.py CHANGED
@@ -7,6 +7,8 @@ import os
7
  import yaml
8
  from dotenv import load_dotenv
9
  from threading import Thread
 
 
10
 
11
  # --- TTS & AI Imports ---
12
  from parler_tts import ParlerTTSForConditionalGeneration
@@ -32,6 +34,22 @@ detector = get_detector(config)
32
  alerter = get_alerter(config, secrets["gemini_api_key"])
33
  print("Initialization complete. Launching UI...")
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  # --- Parler-TTS Model Setup (Requires GPU) ---
36
  print("Loading Parler-TTS model. This may take a moment...")
37
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -48,6 +66,7 @@ feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
48
  print("Parler-TTS model loaded.")
49
 
50
  # --- Audio Streaming Generator Function ---
 
51
  def stream_alert_audio(text_prompt):
52
  """
53
  A generator function that yields audio chunks for a given text prompt.
@@ -87,6 +106,7 @@ def stream_alert_audio(text_prompt):
87
  alerter.reset_alert()
88
 
89
  # --- Main Webcam Processing Function ---
 
90
  def process_live_frame(frame):
91
  """
92
  Processes each webcam frame, performs drowsiness detection, and
@@ -130,7 +150,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as app:
130
 
131
  with gr.Row():
132
  with gr.Column(scale=2):
133
- webcam_input = gr.Image(sources=["webcam"], streaming=True, label="Live Camera Feed")
134
  with gr.Column(scale=1):
135
  processed_output = gr.Image(label="Processed Feed")
136
  status_output = gr.Textbox(label="Live Status", lines=3, interactive=False)
@@ -139,14 +159,15 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as app:
139
  audio_alert_output = gr.Audio(
140
  label="Alert System",
141
  autoplay=True,
142
- visible=False, # Hide the player controls
143
  streaming=True
144
  )
145
 
146
  webcam_input.stream(
147
  fn=process_live_frame,
148
  inputs=[webcam_input],
149
- outputs=[processed_output, status_output, audio_alert_output]
 
150
  )
151
 
152
 
 
7
  import yaml
8
  from dotenv import load_dotenv
9
  from threading import Thread
10
+ from gradio_webrtc import WebRTC
11
+ from twilio.rest import Client
12
 
13
  # --- TTS & AI Imports ---
14
  from parler_tts import ParlerTTSForConditionalGeneration
 
34
  alerter = get_alerter(config, secrets["gemini_api_key"])
35
  print("Initialization complete. Launching UI...")
36
 
37
+ account_sid = os.environ.get("TURN_USERNAME")
38
+ auth_token = os.environ.get("TURN_CREDENTIAL)
39
+
40
+ if account_sid and auth_token:
41
+ client = Client(account_sid, auth_token)
42
+
43
+ token = client.tokens.create()
44
+
45
+ rtc_configuration = {
46
+ "iceServers": token.ice_servers,
47
+ "iceTransportPolicy": "relay",
48
+ }
49
+ else:
50
+ rtc_configuration = None
51
+
52
+
53
  # --- Parler-TTS Model Setup (Requires GPU) ---
54
  print("Loading Parler-TTS model. This may take a moment...")
55
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
66
  print("Parler-TTS model loaded.")
67
 
68
  # --- Audio Streaming Generator Function ---
69
+ @spaces.GPU
70
  def stream_alert_audio(text_prompt):
71
  """
72
  A generator function that yields audio chunks for a given text prompt.
 
106
  alerter.reset_alert()
107
 
108
  # --- Main Webcam Processing Function ---
109
+ @spaces.GPU
110
  def process_live_frame(frame):
111
  """
112
  Processes each webcam frame, performs drowsiness detection, and
 
150
 
151
  with gr.Row():
152
  with gr.Column(scale=2):
153
+ webcam_input = WebRTC(label="Stream", rtc_configuration=rtc_configuration)
154
  with gr.Column(scale=1):
155
  processed_output = gr.Image(label="Processed Feed")
156
  status_output = gr.Textbox(label="Live Status", lines=3, interactive=False)
 
159
  audio_alert_output = gr.Audio(
160
  label="Alert System",
161
  autoplay=True,
162
+ visible=True, # Hide the player controls
163
  streaming=True
164
  )
165
 
166
  webcam_input.stream(
167
  fn=process_live_frame,
168
  inputs=[webcam_input],
169
+ outputs=[status_output, audio_alert_output],
170
+ time_limit=10
171
  )
172
 
173