yunusajib commited on
Commit
d4c1346
·
verified ·
1 Parent(s): 0262090

app upgrade

Browse files

upgrade some features

Files changed (1) hide show
  1. app.py +97 -68
app.py CHANGED
@@ -1,104 +1,133 @@
1
  import os
2
  import cv2
3
- import torch
4
- import tempfile
5
  import io
 
6
  import numpy as np
7
  import pandas as pd
8
  import matplotlib.pyplot as plt
9
  from PIL import Image
 
 
10
  from torchvision import transforms
11
  from facenet_pytorch import MTCNN
12
  import gradio as gr
13
 
14
- # Dummy emotion model (replace with trained PyTorch model)
15
- class SimpleEmotionClassifier(torch.nn.Module):
16
- def __init__(self, num_classes=7):
17
- super(SimpleEmotionClassifier, self).__init__()
18
- self.model = torch.nn.Sequential(
 
 
 
 
 
19
  torch.nn.Flatten(),
20
- torch.nn.Linear(3 * 48 * 48, 256),
21
  torch.nn.ReLU(),
22
- torch.nn.Linear(256, num_classes)
23
  )
24
 
25
  def forward(self, x):
26
- return self.model(x)
27
 
28
  class EmotionDetector:
29
- def __init__(self):
30
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
31
  self.emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
32
-
33
- # Load models
34
  self.face_detector = MTCNN(keep_all=False, device=self.device)
35
- self.emotion_model = SimpleEmotionClassifier(num_classes=len(self.emotions)).to(self.device)
36
- self.emotion_model.eval()
37
-
38
- # Dummy weights — you can replace this with trained weights
39
- # torch.load('emotion_model.pth') to load a trained one
40
-
41
  self.transform = transforms.Compose([
42
- transforms.Resize((48, 48)),
43
- transforms.ToTensor(),
44
  ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- def detect_emotions_video(self, video_path, sample_rate=30):
47
- cap = cv2.VideoCapture(video_path)
48
- fps = cap.get(cv2.CAP_PROP_FPS)
49
- frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
50
- frame_indices = range(0, frame_count, sample_rate)
51
 
52
- emotions_over_time = []
 
53
 
54
- for idx in frame_indices:
55
- cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
56
- ret, frame = cap.read()
57
- if not ret:
58
- continue
59
 
60
- img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
61
- face = self.face_detector(Image.fromarray(img_rgb))
 
 
 
 
 
 
 
 
62
 
63
- if face is not None:
64
- face = self.transform(face).unsqueeze(0).to(self.device)
65
  with torch.no_grad():
66
- logits = self.emotion_model(face)
67
- probs = torch.nn.functional.softmax(logits, dim=1)[0].cpu().numpy()
68
 
69
- emotion_data = {emotion: float(probs[i]) * 100 for i, emotion in enumerate(self.emotions)}
70
- emotion_data['timestamp'] = idx / fps
71
  emotions_over_time.append(emotion_data)
72
 
73
- cap.release()
 
 
 
 
 
 
 
 
 
 
74
 
75
- if not emotions_over_time:
76
- return None, "No faces/emotions detected."
 
 
 
 
77
 
78
- df = pd.DataFrame(emotions_over_time)
 
 
 
79
 
80
- # Plot
81
- plt.figure(figsize=(12, 6))
82
- for emotion in self.emotions:
83
- plt.plot(df['timestamp'], df[emotion], label=emotion)
84
- plt.xlabel("Time (s)")
85
- plt.ylabel("Emotion Confidence (%)")
86
- plt.legend()
87
- plt.tight_layout()
88
 
89
- buf = io.BytesIO()
90
- plt.savefig(buf, format='png')
91
- buf.seek(0)
92
- chart_img = Image.open(buf)
93
- plt.close('all')
 
94
 
95
- # Text Summary
96
- avg_emotions = df[self.emotions].mean().sort_values(ascending=False)
97
- result_text = "**Average Emotions:**\n"
98
- for emo, val in avg_emotions.items():
99
- result_text += f"• {emo.title()}: {val:.1f}%\n"
100
 
101
- return chart_img, result_text
 
102
 
103
  def create_interface():
104
  detector = EmotionDetector()
@@ -112,14 +141,14 @@ def create_interface():
112
  fn=process,
113
  inputs=[
114
  gr.Video(label="Upload Video"),
115
- gr.Slider(1, 60, step=1, value=30, label="Sample Rate (Frames)")
116
  ],
117
  outputs=[
118
  gr.Image(type="pil", label="Emotion Chart"),
119
- gr.Textbox(label="Summary")
120
  ],
121
- title="AI Video Emotion Detector",
122
- description="Detects facial emotions in a video you provided"
123
  )
124
 
125
  if __name__ == "__main__":
 
1
  import os
2
  import cv2
 
 
3
  import io
4
+ import tempfile
5
  import numpy as np
6
  import pandas as pd
7
  import matplotlib.pyplot as plt
8
  from PIL import Image
9
+ import torch
10
+ import torch.nn.functional as F
11
  from torchvision import transforms
12
  from facenet_pytorch import MTCNN
13
  import gradio as gr
14
 
15
+ class EmotionModel(torch.nn.Module):
16
+ def __init__(self):
17
+ super().__init__()
18
+ self.net = torch.nn.Sequential(
19
+ torch.nn.Conv2d(1, 32, 3, padding=1),
20
+ torch.nn.ReLU(),
21
+ torch.nn.MaxPool2d(2),
22
+ torch.nn.Conv2d(32, 64, 3, padding=1),
23
+ torch.nn.ReLU(),
24
+ torch.nn.MaxPool2d(2),
25
  torch.nn.Flatten(),
26
+ torch.nn.Linear(64 * 12 * 12, 128),
27
  torch.nn.ReLU(),
28
+ torch.nn.Linear(128, 7) # 7 emotion classes
29
  )
30
 
31
  def forward(self, x):
32
+ return self.net(x)
33
 
34
  class EmotionDetector:
35
+ def __init__(self, device='cpu'):
36
+ self.device = device
37
+ self.model = EmotionModel().to(self.device)
38
+ self.model.eval()
39
  self.emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
 
 
40
  self.face_detector = MTCNN(keep_all=False, device=self.device)
 
 
 
 
 
 
41
  self.transform = transforms.Compose([
42
+ transforms.Resize((48, 48))
43
+ # No ToTensor() here – face already comes as tensor
44
  ])
45
+ self.softmax = torch.nn.Softmax(dim=1)
46
+
47
+ # OPTIONAL: load pre-trained weights if available
48
+ # self.model.load_state_dict(torch.load("emotion_model.pt", map_location=self.device))
49
+
50
+ def detect_emotions_video(self, video_path, sample_rate=30, max_size_mb=50):
51
+ try:
52
+ if video_path is None:
53
+ return None, "No video provided"
54
+
55
+ if os.path.getsize(video_path) / (1024 * 1024) > max_size_mb:
56
+ return None, f"File too large (>{max_size_mb} MB)."
57
+
58
+ cap = cv2.VideoCapture(video_path)
59
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
60
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
61
 
62
+ if frame_count == 0:
63
+ return None, "Invalid video file"
 
 
 
64
 
65
+ frame_indices = range(0, frame_count, sample_rate)
66
+ emotions_over_time = []
67
 
68
+ for frame_idx in frame_indices:
69
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
70
+ ret, frame = cap.read()
71
+ if not ret:
72
+ continue
73
 
74
+ img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
75
+ img_pil = Image.fromarray(img_rgb)
76
+
77
+ face_tensor = self.face_detector(img_pil)
78
+ if face_tensor is None:
79
+ continue
80
+
81
+ face_tensor = self.transform(face_tensor) # Resize
82
+ face_tensor = face_tensor.mean(dim=0, keepdim=True) # convert to grayscale
83
+ face_tensor = face_tensor.unsqueeze(0).to(self.device) # Add batch + channel
84
 
 
 
85
  with torch.no_grad():
86
+ output = self.model(face_tensor)
87
+ probs = self.softmax(output).cpu().numpy()[0]
88
 
89
+ emotion_data = {self.emotions[i]: float(probs[i]) * 100 for i in range(len(self.emotions))}
90
+ emotion_data['timestamp'] = frame_idx / fps
91
  emotions_over_time.append(emotion_data)
92
 
93
+ cap.release()
94
+
95
+ if not emotions_over_time:
96
+ return None, "No emotions detected."
97
+
98
+ df = pd.DataFrame(emotions_over_time)
99
+
100
+ plt.figure(figsize=(12, 8))
101
+ for emotion in self.emotions:
102
+ if emotion in df.columns:
103
+ plt.plot(df['timestamp'], df[emotion], label=emotion.title(), linewidth=2)
104
 
105
+ plt.xlabel('Time (seconds)')
106
+ plt.ylabel('Confidence (%)')
107
+ plt.title('Emotions Over Time')
108
+ plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
109
+ plt.grid(True)
110
+ plt.tight_layout()
111
 
112
+ img_buf = io.BytesIO()
113
+ plt.savefig(img_buf, format='png', dpi=150, bbox_inches='tight')
114
+ img_buf.seek(0)
115
+ plt.close()
116
 
117
+ chart_image = Image.open(img_buf)
118
+ avg_emotions = df[self.emotions].mean().sort_values(ascending=False)
 
 
 
 
 
 
119
 
120
+ result_text = f"**Video Analysis Complete**\n"
121
+ result_text += f"**Frames Analyzed:** {len(emotions_over_time)}\n"
122
+ result_text += f"**Duration:** {df['timestamp'].max():.1f} seconds\n\n"
123
+ result_text += "**Average Emotions:**\n"
124
+ for emotion, confidence in avg_emotions.items():
125
+ result_text += f"• {emotion.title()}: {confidence:.1f}%\n"
126
 
127
+ return chart_image, result_text
 
 
 
 
128
 
129
+ except Exception as e:
130
+ return None, f"Error: {str(e)}"
131
 
132
  def create_interface():
133
  detector = EmotionDetector()
 
141
  fn=process,
142
  inputs=[
143
  gr.Video(label="Upload Video"),
144
+ gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Sample Rate (Frames)")
145
  ],
146
  outputs=[
147
  gr.Image(type="pil", label="Emotion Chart"),
148
+ gr.Textbox(label="Analysis Summary")
149
  ],
150
+ title="PyTorch-Only Emotion Detection",
151
+ description="Analyze emotions from a video using PyTorch and facenet-pytorch."
152
  )
153
 
154
  if __name__ == "__main__":