Spaces:
Sleeping
Sleeping
import os | |
import cv2 | |
import io | |
import tempfile | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from PIL import Image | |
import torch | |
import torch.nn.functional as F | |
from torchvision import transforms | |
from facenet_pytorch import MTCNN | |
import gradio as gr | |
import seaborn as sns | |
class EmotionModel(torch.nn.Module): | |
def __init__(self): | |
super().__init__() | |
self.net = torch.nn.Sequential( | |
torch.nn.Conv2d(1, 32, 3, padding=1), | |
torch.nn.ReLU(), | |
torch.nn.MaxPool2d(2), | |
torch.nn.Conv2d(32, 64, 3, padding=1), | |
torch.nn.ReLU(), | |
torch.nn.MaxPool2d(2), | |
torch.nn.Flatten(), | |
torch.nn.Linear(64 * 12 * 12, 128), | |
torch.nn.ReLU(), | |
torch.nn.Linear(128, 7) # 7 emotion classes | |
) | |
def forward(self, x): | |
return self.net(x) | |
class EmotionDetector: | |
def __init__(self, device='cpu'): | |
self.device = device | |
self.model = EmotionModel().to(self.device) | |
self.model.eval() | |
self.emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'] | |
self.face_detector = MTCNN(keep_all=False, device=self.device) | |
self.transform = transforms.Compose([ | |
transforms.Resize((48, 48)) | |
]) | |
self.softmax = torch.nn.Softmax(dim=1) | |
def detect_emotions_video(self, video_path, sample_rate=30, max_size_mb=50): | |
try: | |
if video_path is None: | |
return None, "No video provided" | |
if os.path.getsize(video_path) / (1024 * 1024) > max_size_mb: | |
return None, f"File too large (>{max_size_mb} MB)." | |
cap = cv2.VideoCapture(video_path) | |
fps = int(cap.get(cv2.CAP_PROP_FPS)) | |
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
if frame_count == 0: | |
return None, "Invalid video file" | |
frame_indices = range(0, frame_count, sample_rate) | |
emotions_over_time = [] | |
for frame_idx in frame_indices: | |
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) | |
ret, frame = cap.read() | |
if not ret: | |
continue | |
img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
img_pil = Image.fromarray(img_rgb) | |
face_tensor = self.face_detector(img_pil) | |
if face_tensor is None: | |
continue | |
face_tensor = self.transform(face_tensor) # Resize | |
face_tensor = face_tensor.mean(dim=0, keepdim=True) # grayscale | |
face_tensor = face_tensor.unsqueeze(0).to(self.device) | |
with torch.no_grad(): | |
output = self.model(face_tensor) | |
probs = self.softmax(output).cpu().numpy()[0] | |
emotion_data = {self.emotions[i]: float(probs[i]) * 100 for i in range(len(self.emotions))} | |
emotion_data['timestamp'] = frame_idx / fps | |
emotions_over_time.append(emotion_data) | |
cap.release() | |
if not emotions_over_time: | |
return None, "No emotions detected." | |
df = pd.DataFrame(emotions_over_time) | |
df['dominant_emotion'] = df[self.emotions].idxmax(axis=1) | |
# --- Chart Plotting --- | |
fig, axs = plt.subplots(2, 1, figsize=(12, 10), constrained_layout=True) | |
# 1. Stacked Area Chart | |
df_sorted = df.sort_values("timestamp") | |
axs[0].stackplot(df_sorted["timestamp"], [df_sorted[e] for e in self.emotions], labels=[e.title() for e in self.emotions]) | |
axs[0].set_title("Emotions Over Time") | |
axs[0].set_xlabel("Time (seconds)") | |
axs[0].set_ylabel("Confidence (%)") | |
axs[0].legend(loc="upper right") | |
axs[0].grid(True) | |
# 2. Dominant Emotion Timeline (Bar Chart) | |
color_palette = sns.color_palette("husl", len(self.emotions)) | |
emotion_color_map = {e: color_palette[i] for i, e in enumerate(self.emotions)} | |
colors = df['dominant_emotion'].map(emotion_color_map) | |
axs[1].bar(df['timestamp'], 1, color=colors, width=sample_rate / fps) | |
axs[1].set_title("Dominant Emotion Timeline") | |
axs[1].set_xlabel("Time (seconds)") | |
axs[1].set_yticks([]) | |
axs[1].legend(handles=[plt.Rectangle((0, 0), 1, 1, color=emotion_color_map[e]) for e in self.emotions], | |
labels=[e.title() for e in self.emotions], loc="upper right", title="Emotion") | |
# Save chart | |
buf = io.BytesIO() | |
plt.savefig(buf, format="png") | |
buf.seek(0) | |
plt.close() | |
chart_image = Image.open(buf) | |
avg_emotions = df[self.emotions].mean().sort_values(ascending=False) | |
result_text = f"**Video Analysis Complete**\n" | |
result_text += f"**Frames Analyzed:** {len(emotions_over_time)}\n" | |
result_text += f"**Duration:** {df['timestamp'].max():.1f} seconds\n\n" | |
result_text += "**Average Emotions:**\n" | |
for emotion, confidence in avg_emotions.items(): | |
result_text += f"• {emotion.title()}: {confidence:.1f}%\n" | |
return chart_image, result_text | |
except Exception as e: | |
return None, f"Error: {str(e)}" | |
# Gradio interface | |
def create_interface(): | |
detector = EmotionDetector() | |
def process(video_path, sample_rate): | |
if video_path is None or not os.path.exists(video_path): | |
return None, "Invalid video path or no video uploaded." | |
return detector.detect_emotions_video(video_path, sample_rate) | |
return gr.Interface( | |
fn=process, | |
inputs=[ | |
gr.Video(label="Upload Video"), | |
gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Sample Rate (Frames)") | |
], | |
outputs=[ | |
gr.Image(type="pil", label="Emotion Chart"), | |
gr.Textbox(label="Analysis Summary") | |
], | |
title="AI Emotion Detection", | |
description="Upload a video to analyze emotions over time." | |
) | |
if __name__ == "__main__": | |
create_interface().launch() | |