import os import cv2 import io import tempfile import numpy as np import pandas as pd import matplotlib.pyplot as plt from PIL import Image import torch import torch.nn.functional as F from torchvision import transforms from facenet_pytorch import MTCNN import gradio as gr import seaborn as sns class EmotionModel(torch.nn.Module): def __init__(self): super().__init__() self.net = torch.nn.Sequential( torch.nn.Conv2d(1, 32, 3, padding=1), torch.nn.ReLU(), torch.nn.MaxPool2d(2), torch.nn.Conv2d(32, 64, 3, padding=1), torch.nn.ReLU(), torch.nn.MaxPool2d(2), torch.nn.Flatten(), torch.nn.Linear(64 * 12 * 12, 128), torch.nn.ReLU(), torch.nn.Linear(128, 7) # 7 emotion classes ) def forward(self, x): return self.net(x) class EmotionDetector: def __init__(self, device='cpu'): self.device = device self.model = EmotionModel().to(self.device) self.model.eval() self.emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'] self.face_detector = MTCNN(keep_all=False, device=self.device) self.transform = transforms.Compose([ transforms.Resize((48, 48)) ]) self.softmax = torch.nn.Softmax(dim=1) def detect_emotions_video(self, video_path, sample_rate=30, max_size_mb=50): try: if video_path is None: return None, "No video provided" if os.path.getsize(video_path) / (1024 * 1024) > max_size_mb: return None, f"File too large (>{max_size_mb} MB)." cap = cv2.VideoCapture(video_path) fps = int(cap.get(cv2.CAP_PROP_FPS)) frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if frame_count == 0: return None, "Invalid video file" frame_indices = range(0, frame_count, sample_rate) emotions_over_time = [] for frame_idx in frame_indices: cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) ret, frame = cap.read() if not ret: continue img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img_pil = Image.fromarray(img_rgb) face_tensor = self.face_detector(img_pil) if face_tensor is None: continue face_tensor = self.transform(face_tensor) # Resize face_tensor = face_tensor.mean(dim=0, keepdim=True) # grayscale face_tensor = face_tensor.unsqueeze(0).to(self.device) with torch.no_grad(): output = self.model(face_tensor) probs = self.softmax(output).cpu().numpy()[0] emotion_data = {self.emotions[i]: float(probs[i]) * 100 for i in range(len(self.emotions))} emotion_data['timestamp'] = frame_idx / fps emotions_over_time.append(emotion_data) cap.release() if not emotions_over_time: return None, "No emotions detected." df = pd.DataFrame(emotions_over_time) df['dominant_emotion'] = df[self.emotions].idxmax(axis=1) # --- Chart Plotting --- fig, axs = plt.subplots(2, 1, figsize=(12, 10), constrained_layout=True) # 1. Stacked Area Chart df_sorted = df.sort_values("timestamp") axs[0].stackplot(df_sorted["timestamp"], [df_sorted[e] for e in self.emotions], labels=[e.title() for e in self.emotions]) axs[0].set_title("Emotions Over Time") axs[0].set_xlabel("Time (seconds)") axs[0].set_ylabel("Confidence (%)") axs[0].legend(loc="upper right") axs[0].grid(True) # 2. Dominant Emotion Timeline (Bar Chart) color_palette = sns.color_palette("husl", len(self.emotions)) emotion_color_map = {e: color_palette[i] for i, e in enumerate(self.emotions)} colors = df['dominant_emotion'].map(emotion_color_map) axs[1].bar(df['timestamp'], 1, color=colors, width=sample_rate / fps) axs[1].set_title("Dominant Emotion Timeline") axs[1].set_xlabel("Time (seconds)") axs[1].set_yticks([]) axs[1].legend(handles=[plt.Rectangle((0, 0), 1, 1, color=emotion_color_map[e]) for e in self.emotions], labels=[e.title() for e in self.emotions], loc="upper right", title="Emotion") # Save chart buf = io.BytesIO() plt.savefig(buf, format="png") buf.seek(0) plt.close() chart_image = Image.open(buf) avg_emotions = df[self.emotions].mean().sort_values(ascending=False) result_text = f"**Video Analysis Complete**\n" result_text += f"**Frames Analyzed:** {len(emotions_over_time)}\n" result_text += f"**Duration:** {df['timestamp'].max():.1f} seconds\n\n" result_text += "**Average Emotions:**\n" for emotion, confidence in avg_emotions.items(): result_text += f"• {emotion.title()}: {confidence:.1f}%\n" return chart_image, result_text except Exception as e: return None, f"Error: {str(e)}" # Gradio interface def create_interface(): detector = EmotionDetector() def process(video_path, sample_rate): if video_path is None or not os.path.exists(video_path): return None, "Invalid video path or no video uploaded." return detector.detect_emotions_video(video_path, sample_rate) return gr.Interface( fn=process, inputs=[ gr.Video(label="Upload Video"), gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Sample Rate (Frames)") ], outputs=[ gr.Image(type="pil", label="Emotion Chart"), gr.Textbox(label="Analysis Summary") ], title="AI Emotion Detection", description="Upload a video to analyze emotions over time." ) if __name__ == "__main__": create_interface().launch()