|
|
import os |
|
|
import cv2 |
|
|
import io |
|
|
import tempfile |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import matplotlib.pyplot as plt |
|
|
from PIL import Image |
|
|
import torch |
|
|
import torch.nn.functional as F |
|
|
from torchvision import transforms |
|
|
from facenet_pytorch import MTCNN |
|
|
import gradio as gr |
|
|
import seaborn as sns |
|
|
|
|
|
class EmotionModel(torch.nn.Module): |
|
|
def __init__(self): |
|
|
super().__init__() |
|
|
self.net = torch.nn.Sequential( |
|
|
torch.nn.Conv2d(1, 32, 3, padding=1), |
|
|
torch.nn.ReLU(), |
|
|
torch.nn.MaxPool2d(2), |
|
|
torch.nn.Conv2d(32, 64, 3, padding=1), |
|
|
torch.nn.ReLU(), |
|
|
torch.nn.MaxPool2d(2), |
|
|
torch.nn.Flatten(), |
|
|
torch.nn.Linear(64 * 12 * 12, 128), |
|
|
torch.nn.ReLU(), |
|
|
torch.nn.Linear(128, 7) |
|
|
) |
|
|
|
|
|
def forward(self, x): |
|
|
return self.net(x) |
|
|
|
|
|
class EmotionDetector: |
|
|
def __init__(self, device='cpu'): |
|
|
self.device = device |
|
|
self.model = EmotionModel().to(self.device) |
|
|
self.model.eval() |
|
|
self.emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'] |
|
|
self.face_detector = MTCNN(keep_all=False, device=self.device) |
|
|
self.transform = transforms.Compose([ |
|
|
transforms.Resize((48, 48)) |
|
|
]) |
|
|
self.softmax = torch.nn.Softmax(dim=1) |
|
|
|
|
|
def detect_emotions_video(self, video_path, sample_rate=30, max_size_mb=50): |
|
|
try: |
|
|
if video_path is None: |
|
|
return None, "No video provided" |
|
|
|
|
|
if os.path.getsize(video_path) / (1024 * 1024) > max_size_mb: |
|
|
return None, f"File too large (>{max_size_mb} MB)." |
|
|
|
|
|
cap = cv2.VideoCapture(video_path) |
|
|
fps = int(cap.get(cv2.CAP_PROP_FPS)) |
|
|
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
|
if frame_count == 0: |
|
|
return None, "Invalid video file" |
|
|
|
|
|
frame_indices = range(0, frame_count, sample_rate) |
|
|
emotions_over_time = [] |
|
|
|
|
|
for frame_idx in frame_indices: |
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) |
|
|
ret, frame = cap.read() |
|
|
if not ret: |
|
|
continue |
|
|
|
|
|
img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
|
img_pil = Image.fromarray(img_rgb) |
|
|
|
|
|
face_tensor = self.face_detector(img_pil) |
|
|
if face_tensor is None: |
|
|
continue |
|
|
|
|
|
face_tensor = self.transform(face_tensor) |
|
|
face_tensor = face_tensor.mean(dim=0, keepdim=True) |
|
|
face_tensor = face_tensor.unsqueeze(0).to(self.device) |
|
|
|
|
|
with torch.no_grad(): |
|
|
output = self.model(face_tensor) |
|
|
probs = self.softmax(output).cpu().numpy()[0] |
|
|
|
|
|
emotion_data = {self.emotions[i]: float(probs[i]) * 100 for i in range(len(self.emotions))} |
|
|
emotion_data['timestamp'] = frame_idx / fps |
|
|
emotions_over_time.append(emotion_data) |
|
|
|
|
|
cap.release() |
|
|
|
|
|
if not emotions_over_time: |
|
|
return None, "No emotions detected." |
|
|
|
|
|
df = pd.DataFrame(emotions_over_time) |
|
|
df['dominant_emotion'] = df[self.emotions].idxmax(axis=1) |
|
|
|
|
|
|
|
|
fig, axs = plt.subplots(2, 1, figsize=(12, 10), constrained_layout=True) |
|
|
|
|
|
|
|
|
df_sorted = df.sort_values("timestamp") |
|
|
axs[0].stackplot(df_sorted["timestamp"], [df_sorted[e] for e in self.emotions], labels=[e.title() for e in self.emotions]) |
|
|
axs[0].set_title("Emotions Over Time") |
|
|
axs[0].set_xlabel("Time (seconds)") |
|
|
axs[0].set_ylabel("Confidence (%)") |
|
|
axs[0].legend(loc="upper right") |
|
|
axs[0].grid(True) |
|
|
|
|
|
|
|
|
color_palette = sns.color_palette("husl", len(self.emotions)) |
|
|
emotion_color_map = {e: color_palette[i] for i, e in enumerate(self.emotions)} |
|
|
|
|
|
colors = df['dominant_emotion'].map(emotion_color_map) |
|
|
axs[1].bar(df['timestamp'], 1, color=colors, width=sample_rate / fps) |
|
|
axs[1].set_title("Dominant Emotion Timeline") |
|
|
axs[1].set_xlabel("Time (seconds)") |
|
|
axs[1].set_yticks([]) |
|
|
axs[1].legend(handles=[plt.Rectangle((0, 0), 1, 1, color=emotion_color_map[e]) for e in self.emotions], |
|
|
labels=[e.title() for e in self.emotions], loc="upper right", title="Emotion") |
|
|
|
|
|
|
|
|
buf = io.BytesIO() |
|
|
plt.savefig(buf, format="png") |
|
|
buf.seek(0) |
|
|
plt.close() |
|
|
|
|
|
chart_image = Image.open(buf) |
|
|
avg_emotions = df[self.emotions].mean().sort_values(ascending=False) |
|
|
|
|
|
result_text = f"**Video Analysis Complete**\n" |
|
|
result_text += f"**Frames Analyzed:** {len(emotions_over_time)}\n" |
|
|
result_text += f"**Duration:** {df['timestamp'].max():.1f} seconds\n\n" |
|
|
result_text += "**Average Emotions:**\n" |
|
|
for emotion, confidence in avg_emotions.items(): |
|
|
result_text += f"• {emotion.title()}: {confidence:.1f}%\n" |
|
|
|
|
|
return chart_image, result_text |
|
|
|
|
|
except Exception as e: |
|
|
return None, f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
def create_interface(): |
|
|
detector = EmotionDetector() |
|
|
|
|
|
def process(video_path, sample_rate): |
|
|
if video_path is None or not os.path.exists(video_path): |
|
|
return None, "Invalid video path or no video uploaded." |
|
|
|
|
|
return detector.detect_emotions_video(video_path, sample_rate) |
|
|
|
|
|
return gr.Interface( |
|
|
fn=process, |
|
|
inputs=[ |
|
|
gr.Video(label="Upload Video"), |
|
|
gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Sample Rate (Frames)") |
|
|
], |
|
|
outputs=[ |
|
|
gr.Image(type="pil", label="Emotion Chart"), |
|
|
gr.Textbox(label="Analysis Summary") |
|
|
], |
|
|
title="AI Emotion Detection", |
|
|
description="Upload a video to analyze emotions over time." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
create_interface().launch() |
|
|
|