app update
Browse files
app.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
-
import cv2
|
3 |
import numpy as np
|
4 |
-
import librosa
|
5 |
import pandas as pd
|
6 |
import plotly.graph_objects as go
|
7 |
import plotly.express as px
|
@@ -13,6 +11,22 @@ from collections import deque
|
|
13 |
import warnings
|
14 |
warnings.filterwarnings("ignore")
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# Try to import transformers and torch, with fallbacks
|
17 |
try:
|
18 |
from transformers import pipeline
|
@@ -22,6 +36,13 @@ except ImportError:
|
|
22 |
HF_AVAILABLE = False
|
23 |
print("Transformers not available - using mock emotion detection")
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
class EmotionRecognitionSystem:
|
26 |
def __init__(self):
|
27 |
self.emotion_history = deque(maxlen=100) # Store last 100 emotion readings
|
@@ -72,8 +93,22 @@ class EmotionRecognitionSystem:
|
|
72 |
return dict(zip(emotions, scores))
|
73 |
|
74 |
try:
|
75 |
-
#
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
# Use face emotion model
|
79 |
results = self.face_emotion_pipeline(rgb_frame)
|
@@ -113,6 +148,16 @@ class EmotionRecognitionSystem:
|
|
113 |
|
114 |
def extract_audio_features(self, audio_data, sample_rate):
|
115 |
"""Extract audio features for emotion analysis"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
try:
|
117 |
# Extract basic audio features
|
118 |
mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
|
@@ -224,8 +269,14 @@ def process_video_audio(video_frame, audio_data):
|
|
224 |
sample_rate = 16000
|
225 |
if audio_data is not None:
|
226 |
audio_array, sr = audio_data
|
227 |
-
if sr != sample_rate:
|
228 |
audio_array = librosa.resample(audio_array, orig_sr=sr, target_sr=sample_rate)
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
else:
|
230 |
audio_array = None
|
231 |
|
@@ -244,22 +295,56 @@ def process_video_audio(video_frame, audio_data):
|
|
244 |
|
245 |
def create_emotion_overlay(frame, emotion_record):
|
246 |
"""Add emotion information overlay to video frame"""
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
|
264 |
def format_clinical_emotions(clinical_emotions):
|
265 |
"""Format clinical emotions for display"""
|
|
|
1 |
import gradio as gr
|
|
|
2 |
import numpy as np
|
|
|
3 |
import pandas as pd
|
4 |
import plotly.graph_objects as go
|
5 |
import plotly.express as px
|
|
|
11 |
import warnings
|
12 |
warnings.filterwarnings("ignore")
|
13 |
|
14 |
+
# Try to import OpenCV with fallback
|
15 |
+
try:
|
16 |
+
import cv2
|
17 |
+
CV2_AVAILABLE = True
|
18 |
+
except ImportError:
|
19 |
+
CV2_AVAILABLE = False
|
20 |
+
print("OpenCV not available - using PIL for image processing")
|
21 |
+
|
22 |
+
# Try to import librosa with fallback
|
23 |
+
try:
|
24 |
+
import librosa
|
25 |
+
LIBROSA_AVAILABLE = True
|
26 |
+
except ImportError:
|
27 |
+
LIBROSA_AVAILABLE = False
|
28 |
+
print("Librosa not available - using basic audio processing")
|
29 |
+
|
30 |
# Try to import transformers and torch, with fallbacks
|
31 |
try:
|
32 |
from transformers import pipeline
|
|
|
36 |
HF_AVAILABLE = False
|
37 |
print("Transformers not available - using mock emotion detection")
|
38 |
|
39 |
+
# Additional imports for image processing if OpenCV fails
|
40 |
+
try:
|
41 |
+
from PIL import Image, ImageDraw, ImageFont
|
42 |
+
PIL_AVAILABLE = True
|
43 |
+
except ImportError:
|
44 |
+
PIL_AVAILABLE = False
|
45 |
+
|
46 |
class EmotionRecognitionSystem:
|
47 |
def __init__(self):
|
48 |
self.emotion_history = deque(maxlen=100) # Store last 100 emotion readings
|
|
|
93 |
return dict(zip(emotions, scores))
|
94 |
|
95 |
try:
|
96 |
+
# Handle different image formats
|
97 |
+
if isinstance(frame, np.ndarray):
|
98 |
+
if CV2_AVAILABLE:
|
99 |
+
# Convert frame to RGB if it's BGR
|
100 |
+
if len(frame.shape) == 3 and frame.shape[2] == 3:
|
101 |
+
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
102 |
+
else:
|
103 |
+
rgb_frame = frame
|
104 |
+
else:
|
105 |
+
# Use numpy operations for color conversion
|
106 |
+
if len(frame.shape) == 3 and frame.shape[2] == 3:
|
107 |
+
rgb_frame = frame[:, :, ::-1] # BGR to RGB
|
108 |
+
else:
|
109 |
+
rgb_frame = frame
|
110 |
+
else:
|
111 |
+
rgb_frame = frame
|
112 |
|
113 |
# Use face emotion model
|
114 |
results = self.face_emotion_pipeline(rgb_frame)
|
|
|
148 |
|
149 |
def extract_audio_features(self, audio_data, sample_rate):
|
150 |
"""Extract audio features for emotion analysis"""
|
151 |
+
if not LIBROSA_AVAILABLE:
|
152 |
+
# Return mock features if librosa is not available
|
153 |
+
return {
|
154 |
+
'mfcc_mean': np.random.random(),
|
155 |
+
'mfcc_std': np.random.random(),
|
156 |
+
'spectral_centroid_mean': np.random.random(),
|
157 |
+
'zcr_mean': np.random.random(),
|
158 |
+
'spectral_rolloff_mean': np.random.random()
|
159 |
+
}
|
160 |
+
|
161 |
try:
|
162 |
# Extract basic audio features
|
163 |
mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
|
|
|
269 |
sample_rate = 16000
|
270 |
if audio_data is not None:
|
271 |
audio_array, sr = audio_data
|
272 |
+
if LIBROSA_AVAILABLE and sr != sample_rate:
|
273 |
audio_array = librosa.resample(audio_array, orig_sr=sr, target_sr=sample_rate)
|
274 |
+
elif not LIBROSA_AVAILABLE:
|
275 |
+
# Simple resampling if librosa not available
|
276 |
+
if sr != sample_rate:
|
277 |
+
# Basic downsampling
|
278 |
+
step = sr // sample_rate
|
279 |
+
audio_array = audio_array[::step] if step > 1 else audio_array
|
280 |
else:
|
281 |
audio_array = None
|
282 |
|
|
|
295 |
|
296 |
def create_emotion_overlay(frame, emotion_record):
|
297 |
"""Add emotion information overlay to video frame"""
|
298 |
+
try:
|
299 |
+
if CV2_AVAILABLE:
|
300 |
+
annotated_frame = frame.copy()
|
301 |
+
|
302 |
+
# Get top emotion
|
303 |
+
clinical_emotions = emotion_record['clinical_emotions']
|
304 |
+
top_emotion = max(clinical_emotions.items(), key=lambda x: x[1])
|
305 |
+
|
306 |
+
# Add text overlay
|
307 |
+
cv2.putText(annotated_frame, f"Primary: {top_emotion[0]} ({top_emotion[1]:.2f})",
|
308 |
+
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
309 |
+
|
310 |
+
# Add alert indicator
|
311 |
+
if emotion_record['alerts']:
|
312 |
+
cv2.putText(annotated_frame, "ALERT!", (10, 60),
|
313 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
|
314 |
+
|
315 |
+
return annotated_frame
|
316 |
+
|
317 |
+
elif PIL_AVAILABLE:
|
318 |
+
# Use PIL for image annotation
|
319 |
+
pil_image = Image.fromarray(frame)
|
320 |
+
draw = ImageDraw.Draw(pil_image)
|
321 |
+
|
322 |
+
# Get top emotion
|
323 |
+
clinical_emotions = emotion_record['clinical_emotions']
|
324 |
+
top_emotion = max(clinical_emotions.items(), key=lambda x: x[1])
|
325 |
+
|
326 |
+
# Add text overlay
|
327 |
+
try:
|
328 |
+
font = ImageFont.load_default()
|
329 |
+
except:
|
330 |
+
font = None
|
331 |
+
|
332 |
+
text = f"Primary: {top_emotion[0]} ({top_emotion[1]:.2f})"
|
333 |
+
draw.text((10, 10), text, fill=(0, 255, 0), font=font)
|
334 |
+
|
335 |
+
# Add alert indicator
|
336 |
+
if emotion_record['alerts']:
|
337 |
+
draw.text((10, 40), "ALERT!", fill=(255, 0, 0), font=font)
|
338 |
+
|
339 |
+
return np.array(pil_image)
|
340 |
+
|
341 |
+
else:
|
342 |
+
# Return original frame if no image processing available
|
343 |
+
return frame
|
344 |
+
|
345 |
+
except Exception as e:
|
346 |
+
print(f"Error creating emotion overlay: {e}")
|
347 |
+
return frame
|
348 |
|
349 |
def format_clinical_emotions(clinical_emotions):
|
350 |
"""Format clinical emotions for display"""
|