Spaces:
Sleeping
Sleeping
File size: 6,270 Bytes
8e3b22f 421b67d 8e3b22f 421b67d 8e3b22f c85c25e 8e3b22f ca017b9 c85c25e 8e3b22f c85c25e 8e3b22f c85c25e 8e3b22f c85c25e 8e3b22f 4c293c6 ca017b9 4c293c6 8e3b22f c85c25e 8e3b22f ca017b9 8e3b22f 421b67d 8e3b22f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import os
import cv2
import io
import tempfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn.functional as F
from torchvision import transforms
from facenet_pytorch import MTCNN
import gradio as gr
import seaborn as sns
class EmotionModel(torch.nn.Module):
def __init__(self):
super().__init__()
self.net = torch.nn.Sequential(
torch.nn.Conv2d(1, 32, 3, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2),
torch.nn.Conv2d(32, 64, 3, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2),
torch.nn.Flatten(),
torch.nn.Linear(64 * 12 * 12, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 7) # 7 emotion classes
)
def forward(self, x):
return self.net(x)
class EmotionDetector:
def __init__(self, device='cpu'):
self.device = device
self.model = EmotionModel().to(self.device)
self.model.eval()
self.emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
self.face_detector = MTCNN(keep_all=False, device=self.device)
self.transform = transforms.Compose([
transforms.Resize((48, 48))
])
self.softmax = torch.nn.Softmax(dim=1)
def detect_emotions_video(self, video_path, sample_rate=30, max_size_mb=50):
try:
if video_path is None:
return None, "No video provided"
if os.path.getsize(video_path) / (1024 * 1024) > max_size_mb:
return None, f"File too large (>{max_size_mb} MB)."
cap = cv2.VideoCapture(video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if frame_count == 0:
return None, "Invalid video file"
frame_indices = range(0, frame_count, sample_rate)
emotions_over_time = []
for frame_idx in frame_indices:
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
ret, frame = cap.read()
if not ret:
continue
img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
img_pil = Image.fromarray(img_rgb)
face_tensor = self.face_detector(img_pil)
if face_tensor is None:
continue
face_tensor = self.transform(face_tensor) # Resize
face_tensor = face_tensor.mean(dim=0, keepdim=True) # grayscale
face_tensor = face_tensor.unsqueeze(0).to(self.device)
with torch.no_grad():
output = self.model(face_tensor)
probs = self.softmax(output).cpu().numpy()[0]
emotion_data = {self.emotions[i]: float(probs[i]) * 100 for i in range(len(self.emotions))}
emotion_data['timestamp'] = frame_idx / fps
emotions_over_time.append(emotion_data)
cap.release()
if not emotions_over_time:
return None, "No emotions detected."
df = pd.DataFrame(emotions_over_time)
df['dominant_emotion'] = df[self.emotions].idxmax(axis=1)
# --- Chart Plotting ---
fig, axs = plt.subplots(2, 1, figsize=(12, 10), constrained_layout=True)
# 1. Stacked Area Chart
df_sorted = df.sort_values("timestamp")
axs[0].stackplot(df_sorted["timestamp"], [df_sorted[e] for e in self.emotions], labels=[e.title() for e in self.emotions])
axs[0].set_title("Emotions Over Time")
axs[0].set_xlabel("Time (seconds)")
axs[0].set_ylabel("Confidence (%)")
axs[0].legend(loc="upper right")
axs[0].grid(True)
# 2. Dominant Emotion Timeline (Bar Chart)
color_palette = sns.color_palette("husl", len(self.emotions))
emotion_color_map = {e: color_palette[i] for i, e in enumerate(self.emotions)}
colors = df['dominant_emotion'].map(emotion_color_map)
axs[1].bar(df['timestamp'], 1, color=colors, width=sample_rate / fps)
axs[1].set_title("Dominant Emotion Timeline")
axs[1].set_xlabel("Time (seconds)")
axs[1].set_yticks([])
axs[1].legend(handles=[plt.Rectangle((0, 0), 1, 1, color=emotion_color_map[e]) for e in self.emotions],
labels=[e.title() for e in self.emotions], loc="upper right", title="Emotion")
# Save chart
buf = io.BytesIO()
plt.savefig(buf, format="png")
buf.seek(0)
plt.close()
chart_image = Image.open(buf)
avg_emotions = df[self.emotions].mean().sort_values(ascending=False)
result_text = f"**Video Analysis Complete**\n"
result_text += f"**Frames Analyzed:** {len(emotions_over_time)}\n"
result_text += f"**Duration:** {df['timestamp'].max():.1f} seconds\n\n"
result_text += "**Average Emotions:**\n"
for emotion, confidence in avg_emotions.items():
result_text += f"• {emotion.title()}: {confidence:.1f}%\n"
return chart_image, result_text
except Exception as e:
return None, f"Error: {str(e)}"
# Gradio interface
def create_interface():
detector = EmotionDetector()
def process(video_path, sample_rate):
if video_path is None or not os.path.exists(video_path):
return None, "Invalid video path or no video uploaded."
return detector.detect_emotions_video(video_path, sample_rate)
return gr.Interface(
fn=process,
inputs=[
gr.Video(label="Upload Video"),
gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Sample Rate (Frames)")
],
outputs=[
gr.Image(type="pil", label="Emotion Chart"),
gr.Textbox(label="Analysis Summary")
],
title="AI Emotion Detection",
description="Upload a video to analyze emotions over time."
)
if __name__ == "__main__":
create_interface().launch()
|