Spaces:
Sleeping
Sleeping
final version
Browse files
app.py
CHANGED
@@ -1,70 +1,155 @@
|
|
1 |
-
import
|
2 |
import cv2
|
|
|
|
|
|
|
3 |
import pandas as pd
|
4 |
import matplotlib.pyplot as plt
|
5 |
-
from
|
6 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
frame_rate = cap.get(cv2.CAP_PROP_FPS)
|
11 |
-
frame_interval = int(frame_rate * 2) # Analyze every 2 seconds
|
12 |
-
|
13 |
-
emotion_data = []
|
14 |
-
frame_count = 0
|
15 |
-
|
16 |
-
while True:
|
17 |
-
ret, frame = cap.read()
|
18 |
-
if not ret:
|
19 |
-
break
|
20 |
-
if frame_count % frame_interval == 0:
|
21 |
-
try:
|
22 |
-
analysis = DeepFace.analyze(
|
23 |
-
frame,
|
24 |
-
actions=["emotion"],
|
25 |
-
detector_backend="opencv", # TensorFlow-free
|
26 |
-
enforce_detection=False
|
27 |
-
)
|
28 |
-
emotion_data.append(analysis[0]["emotion"])
|
29 |
-
except Exception as e:
|
30 |
-
print("Error analyzing frame:", e)
|
31 |
-
frame_count += 1
|
32 |
-
|
33 |
-
cap.release()
|
34 |
-
|
35 |
-
if not emotion_data:
|
36 |
-
return "No faces detected."
|
37 |
-
|
38 |
-
df = pd.DataFrame(emotion_data)
|
39 |
-
avg_emotions = df.mean().sort_values(ascending=False)
|
40 |
-
|
41 |
-
# Plot
|
42 |
-
fig, ax = plt.subplots(figsize=(6, 3))
|
43 |
-
avg_emotions.plot(kind='bar', ax=ax, color='skyblue')
|
44 |
-
ax.set_title("Average Emotions Across Video")
|
45 |
-
ax.set_ylabel("Confidence (%)")
|
46 |
-
ax.set_ylim(0, 100)
|
47 |
-
plt.tight_layout()
|
48 |
-
|
49 |
-
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
|
50 |
-
plt.savefig(tmpfile.name)
|
51 |
-
plot_path = tmpfile.name
|
52 |
-
|
53 |
-
summary = f"""
|
54 |
-
**Video Analysis Complete**
|
55 |
-
**Frames Analyzed:** {len(df)}
|
56 |
-
**Duration:** {round(frame_count / frame_rate, 1)} seconds
|
57 |
-
|
58 |
-
**Average Emotions:**\n""" + "\n".join([f"• {emotion}: {round(score, 1)}%" for emotion, score in avg_emotions.items()])
|
59 |
-
|
60 |
-
return summary, plot_path
|
61 |
-
|
62 |
-
|
63 |
-
iface = gr.Interface(
|
64 |
-
fn=analyze_emotions,
|
65 |
-
inputs=gr.Video(label="Upload a Video"),
|
66 |
-
outputs=[gr.Markdown(), gr.Image(type="filepath", label="Emotion Summary Chart")],
|
67 |
-
title="Emotion Analysis from Video (No TensorFlow)"
|
68 |
-
)
|
69 |
-
|
70 |
-
iface.launch()
|
|
|
1 |
+
import os
|
2 |
import cv2
|
3 |
+
import io
|
4 |
+
import tempfile
|
5 |
+
import numpy as np
|
6 |
import pandas as pd
|
7 |
import matplotlib.pyplot as plt
|
8 |
+
from PIL import Image
|
9 |
+
import torch
|
10 |
+
import torch.nn.functional as F
|
11 |
+
from torchvision import transforms
|
12 |
+
from facenet_pytorch import MTCNN
|
13 |
+
import gradio as gr
|
14 |
+
|
15 |
+
class EmotionModel(torch.nn.Module):
|
16 |
+
def __init__(self):
|
17 |
+
super().__init__()
|
18 |
+
self.net = torch.nn.Sequential(
|
19 |
+
torch.nn.Conv2d(1, 32, 3, padding=1),
|
20 |
+
torch.nn.ReLU(),
|
21 |
+
torch.nn.MaxPool2d(2),
|
22 |
+
torch.nn.Conv2d(32, 64, 3, padding=1),
|
23 |
+
torch.nn.ReLU(),
|
24 |
+
torch.nn.MaxPool2d(2),
|
25 |
+
torch.nn.Flatten(),
|
26 |
+
torch.nn.Linear(64 * 12 * 12, 128),
|
27 |
+
torch.nn.ReLU(),
|
28 |
+
torch.nn.Linear(128, 7) # 7 emotion classes
|
29 |
+
)
|
30 |
+
|
31 |
+
def forward(self, x):
|
32 |
+
return self.net(x)
|
33 |
+
|
34 |
+
class EmotionDetector:
|
35 |
+
def __init__(self, device='cpu'):
|
36 |
+
self.device = device
|
37 |
+
self.model = EmotionModel().to(self.device)
|
38 |
+
self.model.eval()
|
39 |
+
self.emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
|
40 |
+
self.face_detector = MTCNN(keep_all=False, device=self.device)
|
41 |
+
self.transform = transforms.Compose([
|
42 |
+
transforms.Resize((48, 48))
|
43 |
+
# No ToTensor() here – face already comes as tensor
|
44 |
+
])
|
45 |
+
self.softmax = torch.nn.Softmax(dim=1)
|
46 |
+
|
47 |
+
# OPTIONAL: load pre-trained weights if available
|
48 |
+
# self.model.load_state_dict(torch.load("emotion_model.pt", map_location=self.device))
|
49 |
+
|
50 |
+
def detect_emotions_video(self, video_path, sample_rate=30, max_size_mb=50):
|
51 |
+
try:
|
52 |
+
if video_path is None:
|
53 |
+
return None, "No video provided"
|
54 |
+
|
55 |
+
if os.path.getsize(video_path) / (1024 * 1024) > max_size_mb:
|
56 |
+
return None, f"File too large (>{max_size_mb} MB)."
|
57 |
+
|
58 |
+
cap = cv2.VideoCapture(video_path)
|
59 |
+
fps = int(cap.get(cv2.CAP_PROP_FPS))
|
60 |
+
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
61 |
+
|
62 |
+
if frame_count == 0:
|
63 |
+
return None, "Invalid video file"
|
64 |
+
|
65 |
+
frame_indices = range(0, frame_count, sample_rate)
|
66 |
+
emotions_over_time = []
|
67 |
+
|
68 |
+
for frame_idx in frame_indices:
|
69 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
|
70 |
+
ret, frame = cap.read()
|
71 |
+
if not ret:
|
72 |
+
continue
|
73 |
+
|
74 |
+
img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
75 |
+
img_pil = Image.fromarray(img_rgb)
|
76 |
+
|
77 |
+
face_tensor = self.face_detector(img_pil)
|
78 |
+
if face_tensor is None:
|
79 |
+
continue
|
80 |
+
|
81 |
+
face_tensor = self.transform(face_tensor) # Resize
|
82 |
+
face_tensor = face_tensor.mean(dim=0, keepdim=True) # convert to grayscale
|
83 |
+
face_tensor = face_tensor.unsqueeze(0).to(self.device) # Add batch + channel
|
84 |
+
|
85 |
+
with torch.no_grad():
|
86 |
+
output = self.model(face_tensor)
|
87 |
+
probs = self.softmax(output).cpu().numpy()[0]
|
88 |
+
|
89 |
+
emotion_data = {self.emotions[i]: float(probs[i]) * 100 for i in range(len(self.emotions))}
|
90 |
+
emotion_data['timestamp'] = frame_idx / fps
|
91 |
+
emotions_over_time.append(emotion_data)
|
92 |
+
|
93 |
+
cap.release()
|
94 |
+
|
95 |
+
if not emotions_over_time:
|
96 |
+
return None, "No emotions detected."
|
97 |
+
|
98 |
+
df = pd.DataFrame(emotions_over_time)
|
99 |
+
|
100 |
+
plt.figure(figsize=(12, 8))
|
101 |
+
for emotion in self.emotions:
|
102 |
+
if emotion in df.columns:
|
103 |
+
plt.plot(df['timestamp'], df[emotion], label=emotion.title(), linewidth=2)
|
104 |
+
|
105 |
+
plt.xlabel('Time (seconds)')
|
106 |
+
plt.ylabel('Confidence (%)')
|
107 |
+
plt.title('Emotions Over Time')
|
108 |
+
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
|
109 |
+
plt.grid(True)
|
110 |
+
plt.tight_layout()
|
111 |
+
|
112 |
+
img_buf = io.BytesIO()
|
113 |
+
plt.savefig(img_buf, format='png', dpi=150, bbox_inches='tight')
|
114 |
+
img_buf.seek(0)
|
115 |
+
plt.close()
|
116 |
+
|
117 |
+
chart_image = Image.open(img_buf)
|
118 |
+
avg_emotions = df[self.emotions].mean().sort_values(ascending=False)
|
119 |
+
|
120 |
+
result_text = f"**Video Analysis Complete**\n"
|
121 |
+
result_text += f"**Frames Analyzed:** {len(emotions_over_time)}\n"
|
122 |
+
result_text += f"**Duration:** {df['timestamp'].max():.1f} seconds\n\n"
|
123 |
+
result_text += "**Average Emotions:**\n"
|
124 |
+
for emotion, confidence in avg_emotions.items():
|
125 |
+
result_text += f"• {emotion.title()}: {confidence:.1f}%\n"
|
126 |
+
|
127 |
+
return chart_image, result_text
|
128 |
+
|
129 |
+
except Exception as e:
|
130 |
+
return None, f"Error: {str(e)}"
|
131 |
+
|
132 |
+
def create_interface():
|
133 |
+
detector = EmotionDetector()
|
134 |
+
|
135 |
+
def process(video, sample_rate):
|
136 |
+
if video is None:
|
137 |
+
return None, "Please upload a video."
|
138 |
+
return detector.detect_emotions_video(video, sample_rate)
|
139 |
+
|
140 |
+
return gr.Interface(
|
141 |
+
fn=process,
|
142 |
+
inputs=[
|
143 |
+
gr.Video(label="Upload Video"),
|
144 |
+
gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Sample Rate (Frames)")
|
145 |
+
],
|
146 |
+
outputs=[
|
147 |
+
gr.Image(type="pil", label="Emotion Chart"),
|
148 |
+
gr.Textbox(label="Analysis Summary")
|
149 |
+
],
|
150 |
+
title="PyTorch-Only Emotion Detection",
|
151 |
+
description="Analyze emotions from a video using PyTorch and facenet-pytorch."
|
152 |
+
)
|
153 |
|
154 |
+
if __name__ == "__main__":
|
155 |
+
create_interface().launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|