Spaces:
Sleeping
Sleeping
app upgrade
Browse filesupgrade some features
app.py
CHANGED
@@ -1,104 +1,133 @@
|
|
1 |
import os
|
2 |
import cv2
|
3 |
-
import torch
|
4 |
-
import tempfile
|
5 |
import io
|
|
|
6 |
import numpy as np
|
7 |
import pandas as pd
|
8 |
import matplotlib.pyplot as plt
|
9 |
from PIL import Image
|
|
|
|
|
10 |
from torchvision import transforms
|
11 |
from facenet_pytorch import MTCNN
|
12 |
import gradio as gr
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
19 |
torch.nn.Flatten(),
|
20 |
-
torch.nn.Linear(
|
21 |
torch.nn.ReLU(),
|
22 |
-
torch.nn.Linear(
|
23 |
)
|
24 |
|
25 |
def forward(self, x):
|
26 |
-
return self.
|
27 |
|
28 |
class EmotionDetector:
|
29 |
-
def __init__(self):
|
30 |
-
self.device =
|
|
|
|
|
31 |
self.emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
|
32 |
-
|
33 |
-
# Load models
|
34 |
self.face_detector = MTCNN(keep_all=False, device=self.device)
|
35 |
-
self.emotion_model = SimpleEmotionClassifier(num_classes=len(self.emotions)).to(self.device)
|
36 |
-
self.emotion_model.eval()
|
37 |
-
|
38 |
-
# Dummy weights — you can replace this with trained weights
|
39 |
-
# torch.load('emotion_model.pth') to load a trained one
|
40 |
-
|
41 |
self.transform = transforms.Compose([
|
42 |
-
transforms.Resize((48, 48))
|
43 |
-
|
44 |
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
fps = cap.get(cv2.CAP_PROP_FPS)
|
49 |
-
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
50 |
-
frame_indices = range(0, frame_count, sample_rate)
|
51 |
|
52 |
-
|
|
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
if face is not None:
|
64 |
-
face = self.transform(face).unsqueeze(0).to(self.device)
|
65 |
with torch.no_grad():
|
66 |
-
|
67 |
-
probs =
|
68 |
|
69 |
-
emotion_data = {
|
70 |
-
emotion_data['timestamp'] =
|
71 |
emotions_over_time.append(emotion_data)
|
72 |
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
77 |
|
78 |
-
|
|
|
|
|
|
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
for emotion in self.emotions:
|
83 |
-
plt.plot(df['timestamp'], df[emotion], label=emotion)
|
84 |
-
plt.xlabel("Time (s)")
|
85 |
-
plt.ylabel("Emotion Confidence (%)")
|
86 |
-
plt.legend()
|
87 |
-
plt.tight_layout()
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
94 |
|
95 |
-
|
96 |
-
avg_emotions = df[self.emotions].mean().sort_values(ascending=False)
|
97 |
-
result_text = "**Average Emotions:**\n"
|
98 |
-
for emo, val in avg_emotions.items():
|
99 |
-
result_text += f"• {emo.title()}: {val:.1f}%\n"
|
100 |
|
101 |
-
|
|
|
102 |
|
103 |
def create_interface():
|
104 |
detector = EmotionDetector()
|
@@ -112,14 +141,14 @@ def create_interface():
|
|
112 |
fn=process,
|
113 |
inputs=[
|
114 |
gr.Video(label="Upload Video"),
|
115 |
-
gr.Slider(1, 60, step=1, value=30, label="Sample Rate (Frames)")
|
116 |
],
|
117 |
outputs=[
|
118 |
gr.Image(type="pil", label="Emotion Chart"),
|
119 |
-
gr.Textbox(label="Summary")
|
120 |
],
|
121 |
-
title="
|
122 |
-
description="
|
123 |
)
|
124 |
|
125 |
if __name__ == "__main__":
|
|
|
1 |
import os
|
2 |
import cv2
|
|
|
|
|
3 |
import io
|
4 |
+
import tempfile
|
5 |
import numpy as np
|
6 |
import pandas as pd
|
7 |
import matplotlib.pyplot as plt
|
8 |
from PIL import Image
|
9 |
+
import torch
|
10 |
+
import torch.nn.functional as F
|
11 |
from torchvision import transforms
|
12 |
from facenet_pytorch import MTCNN
|
13 |
import gradio as gr
|
14 |
|
15 |
+
class EmotionModel(torch.nn.Module):
|
16 |
+
def __init__(self):
|
17 |
+
super().__init__()
|
18 |
+
self.net = torch.nn.Sequential(
|
19 |
+
torch.nn.Conv2d(1, 32, 3, padding=1),
|
20 |
+
torch.nn.ReLU(),
|
21 |
+
torch.nn.MaxPool2d(2),
|
22 |
+
torch.nn.Conv2d(32, 64, 3, padding=1),
|
23 |
+
torch.nn.ReLU(),
|
24 |
+
torch.nn.MaxPool2d(2),
|
25 |
torch.nn.Flatten(),
|
26 |
+
torch.nn.Linear(64 * 12 * 12, 128),
|
27 |
torch.nn.ReLU(),
|
28 |
+
torch.nn.Linear(128, 7) # 7 emotion classes
|
29 |
)
|
30 |
|
31 |
def forward(self, x):
|
32 |
+
return self.net(x)
|
33 |
|
34 |
class EmotionDetector:
|
35 |
+
def __init__(self, device='cpu'):
|
36 |
+
self.device = device
|
37 |
+
self.model = EmotionModel().to(self.device)
|
38 |
+
self.model.eval()
|
39 |
self.emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
|
|
|
|
|
40 |
self.face_detector = MTCNN(keep_all=False, device=self.device)
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
self.transform = transforms.Compose([
|
42 |
+
transforms.Resize((48, 48))
|
43 |
+
# No ToTensor() here – face already comes as tensor
|
44 |
])
|
45 |
+
self.softmax = torch.nn.Softmax(dim=1)
|
46 |
+
|
47 |
+
# OPTIONAL: load pre-trained weights if available
|
48 |
+
# self.model.load_state_dict(torch.load("emotion_model.pt", map_location=self.device))
|
49 |
+
|
50 |
+
def detect_emotions_video(self, video_path, sample_rate=30, max_size_mb=50):
|
51 |
+
try:
|
52 |
+
if video_path is None:
|
53 |
+
return None, "No video provided"
|
54 |
+
|
55 |
+
if os.path.getsize(video_path) / (1024 * 1024) > max_size_mb:
|
56 |
+
return None, f"File too large (>{max_size_mb} MB)."
|
57 |
+
|
58 |
+
cap = cv2.VideoCapture(video_path)
|
59 |
+
fps = int(cap.get(cv2.CAP_PROP_FPS))
|
60 |
+
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
61 |
|
62 |
+
if frame_count == 0:
|
63 |
+
return None, "Invalid video file"
|
|
|
|
|
|
|
64 |
|
65 |
+
frame_indices = range(0, frame_count, sample_rate)
|
66 |
+
emotions_over_time = []
|
67 |
|
68 |
+
for frame_idx in frame_indices:
|
69 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
|
70 |
+
ret, frame = cap.read()
|
71 |
+
if not ret:
|
72 |
+
continue
|
73 |
|
74 |
+
img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
75 |
+
img_pil = Image.fromarray(img_rgb)
|
76 |
+
|
77 |
+
face_tensor = self.face_detector(img_pil)
|
78 |
+
if face_tensor is None:
|
79 |
+
continue
|
80 |
+
|
81 |
+
face_tensor = self.transform(face_tensor) # Resize
|
82 |
+
face_tensor = face_tensor.mean(dim=0, keepdim=True) # convert to grayscale
|
83 |
+
face_tensor = face_tensor.unsqueeze(0).to(self.device) # Add batch + channel
|
84 |
|
|
|
|
|
85 |
with torch.no_grad():
|
86 |
+
output = self.model(face_tensor)
|
87 |
+
probs = self.softmax(output).cpu().numpy()[0]
|
88 |
|
89 |
+
emotion_data = {self.emotions[i]: float(probs[i]) * 100 for i in range(len(self.emotions))}
|
90 |
+
emotion_data['timestamp'] = frame_idx / fps
|
91 |
emotions_over_time.append(emotion_data)
|
92 |
|
93 |
+
cap.release()
|
94 |
+
|
95 |
+
if not emotions_over_time:
|
96 |
+
return None, "No emotions detected."
|
97 |
+
|
98 |
+
df = pd.DataFrame(emotions_over_time)
|
99 |
+
|
100 |
+
plt.figure(figsize=(12, 8))
|
101 |
+
for emotion in self.emotions:
|
102 |
+
if emotion in df.columns:
|
103 |
+
plt.plot(df['timestamp'], df[emotion], label=emotion.title(), linewidth=2)
|
104 |
|
105 |
+
plt.xlabel('Time (seconds)')
|
106 |
+
plt.ylabel('Confidence (%)')
|
107 |
+
plt.title('Emotions Over Time')
|
108 |
+
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
|
109 |
+
plt.grid(True)
|
110 |
+
plt.tight_layout()
|
111 |
|
112 |
+
img_buf = io.BytesIO()
|
113 |
+
plt.savefig(img_buf, format='png', dpi=150, bbox_inches='tight')
|
114 |
+
img_buf.seek(0)
|
115 |
+
plt.close()
|
116 |
|
117 |
+
chart_image = Image.open(img_buf)
|
118 |
+
avg_emotions = df[self.emotions].mean().sort_values(ascending=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
+
result_text = f"**Video Analysis Complete**\n"
|
121 |
+
result_text += f"**Frames Analyzed:** {len(emotions_over_time)}\n"
|
122 |
+
result_text += f"**Duration:** {df['timestamp'].max():.1f} seconds\n\n"
|
123 |
+
result_text += "**Average Emotions:**\n"
|
124 |
+
for emotion, confidence in avg_emotions.items():
|
125 |
+
result_text += f"• {emotion.title()}: {confidence:.1f}%\n"
|
126 |
|
127 |
+
return chart_image, result_text
|
|
|
|
|
|
|
|
|
128 |
|
129 |
+
except Exception as e:
|
130 |
+
return None, f"Error: {str(e)}"
|
131 |
|
132 |
def create_interface():
|
133 |
detector = EmotionDetector()
|
|
|
141 |
fn=process,
|
142 |
inputs=[
|
143 |
gr.Video(label="Upload Video"),
|
144 |
+
gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Sample Rate (Frames)")
|
145 |
],
|
146 |
outputs=[
|
147 |
gr.Image(type="pil", label="Emotion Chart"),
|
148 |
+
gr.Textbox(label="Analysis Summary")
|
149 |
],
|
150 |
+
title="PyTorch-Only Emotion Detection",
|
151 |
+
description="Analyze emotions from a video using PyTorch and facenet-pytorch."
|
152 |
)
|
153 |
|
154 |
if __name__ == "__main__":
|