File size: 6,270 Bytes
8e3b22f
421b67d
8e3b22f
 
 
421b67d
 
8e3b22f
 
 
 
 
 
c85c25e
8e3b22f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca017b9
c85c25e
8e3b22f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c85c25e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e3b22f
 
c85c25e
8e3b22f
 
 
 
 
 
 
 
 
 
 
 
 
 
c85c25e
8e3b22f
 
 
4c293c6
 
 
ca017b9
4c293c6
8e3b22f
 
 
 
c85c25e
8e3b22f
 
 
 
 
 
ca017b9
 
8e3b22f
421b67d
8e3b22f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import os
import cv2
import io
import tempfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn.functional as F
from torchvision import transforms
from facenet_pytorch import MTCNN
import gradio as gr
import seaborn as sns

class EmotionModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.net = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, 3, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2),
            torch.nn.Conv2d(32, 64, 3, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2),
            torch.nn.Flatten(),
            torch.nn.Linear(64 * 12 * 12, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 7)  # 7 emotion classes
        )

    def forward(self, x):
        return self.net(x)

class EmotionDetector:
    def __init__(self, device='cpu'):
        self.device = device
        self.model = EmotionModel().to(self.device)
        self.model.eval()
        self.emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
        self.face_detector = MTCNN(keep_all=False, device=self.device)
        self.transform = transforms.Compose([
            transforms.Resize((48, 48))
        ])
        self.softmax = torch.nn.Softmax(dim=1)

    def detect_emotions_video(self, video_path, sample_rate=30, max_size_mb=50):
        try:
            if video_path is None:
                return None, "No video provided"

            if os.path.getsize(video_path) / (1024 * 1024) > max_size_mb:
                return None, f"File too large (>{max_size_mb} MB)."

            cap = cv2.VideoCapture(video_path)
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            if frame_count == 0:
                return None, "Invalid video file"

            frame_indices = range(0, frame_count, sample_rate)
            emotions_over_time = []

            for frame_idx in frame_indices:
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
                ret, frame = cap.read()
                if not ret:
                    continue

                img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img_pil = Image.fromarray(img_rgb)

                face_tensor = self.face_detector(img_pil)
                if face_tensor is None:
                    continue

                face_tensor = self.transform(face_tensor)  # Resize
                face_tensor = face_tensor.mean(dim=0, keepdim=True)  # grayscale
                face_tensor = face_tensor.unsqueeze(0).to(self.device)

                with torch.no_grad():
                    output = self.model(face_tensor)
                    probs = self.softmax(output).cpu().numpy()[0]

                emotion_data = {self.emotions[i]: float(probs[i]) * 100 for i in range(len(self.emotions))}
                emotion_data['timestamp'] = frame_idx / fps
                emotions_over_time.append(emotion_data)

            cap.release()

            if not emotions_over_time:
                return None, "No emotions detected."

            df = pd.DataFrame(emotions_over_time)
            df['dominant_emotion'] = df[self.emotions].idxmax(axis=1)

            # --- Chart Plotting ---
            fig, axs = plt.subplots(2, 1, figsize=(12, 10), constrained_layout=True)

            # 1. Stacked Area Chart
            df_sorted = df.sort_values("timestamp")
            axs[0].stackplot(df_sorted["timestamp"], [df_sorted[e] for e in self.emotions], labels=[e.title() for e in self.emotions])
            axs[0].set_title("Emotions Over Time")
            axs[0].set_xlabel("Time (seconds)")
            axs[0].set_ylabel("Confidence (%)")
            axs[0].legend(loc="upper right")
            axs[0].grid(True)

            # 2. Dominant Emotion Timeline (Bar Chart)
            color_palette = sns.color_palette("husl", len(self.emotions))
            emotion_color_map = {e: color_palette[i] for i, e in enumerate(self.emotions)}

            colors = df['dominant_emotion'].map(emotion_color_map)
            axs[1].bar(df['timestamp'], 1, color=colors, width=sample_rate / fps)
            axs[1].set_title("Dominant Emotion Timeline")
            axs[1].set_xlabel("Time (seconds)")
            axs[1].set_yticks([])
            axs[1].legend(handles=[plt.Rectangle((0, 0), 1, 1, color=emotion_color_map[e]) for e in self.emotions],
                          labels=[e.title() for e in self.emotions], loc="upper right", title="Emotion")

            # Save chart
            buf = io.BytesIO()
            plt.savefig(buf, format="png")
            buf.seek(0)
            plt.close()

            chart_image = Image.open(buf)
            avg_emotions = df[self.emotions].mean().sort_values(ascending=False)

            result_text = f"**Video Analysis Complete**\n"
            result_text += f"**Frames Analyzed:** {len(emotions_over_time)}\n"
            result_text += f"**Duration:** {df['timestamp'].max():.1f} seconds\n\n"
            result_text += "**Average Emotions:**\n"
            for emotion, confidence in avg_emotions.items():
                result_text += f"• {emotion.title()}: {confidence:.1f}%\n"

            return chart_image, result_text

        except Exception as e:
            return None, f"Error: {str(e)}"

# Gradio interface
def create_interface():
    detector = EmotionDetector()

    def process(video_path, sample_rate):
        if video_path is None or not os.path.exists(video_path):
            return None, "Invalid video path or no video uploaded."

        return detector.detect_emotions_video(video_path, sample_rate)

    return gr.Interface(
        fn=process,
        inputs=[
            gr.Video(label="Upload Video"),
            gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Sample Rate (Frames)")
        ],
        outputs=[
            gr.Image(type="pil", label="Emotion Chart"),
            gr.Textbox(label="Analysis Summary")
        ],
        title="AI Emotion Detection",
        description="Upload a video to analyze emotions over time."
    )

if __name__ == "__main__":
    create_interface().launch()