vitorcalvi commited on
Commit
da18c29
·
1 Parent(s): b20a621

12 Oct Gitex 2024

Browse files
.DS_Store ADDED
Binary file (8.2 kB). View file
 
__pycache__/ui_components.cpython-310.pyc ADDED
Binary file (3.82 kB). View file
 
app/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/__init__.cpython-310.pyc and b/app/__pycache__/__init__.cpython-310.pyc differ
 
app/__pycache__/app_utils.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/app_utils.cpython-310.pyc and b/app/__pycache__/app_utils.cpython-310.pyc differ
 
app/__pycache__/config.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/config.cpython-310.pyc and b/app/__pycache__/config.cpython-310.pyc differ
 
app/__pycache__/face_utils.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/face_utils.cpython-310.pyc and b/app/__pycache__/face_utils.cpython-310.pyc differ
 
app/__pycache__/model.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/model.cpython-310.pyc and b/app/__pycache__/model.cpython-310.pyc differ
 
app/__pycache__/model_architectures.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/model_architectures.cpython-310.pyc and b/app/__pycache__/model_architectures.cpython-310.pyc differ
 
app/__pycache__/plot.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/plot.cpython-310.pyc and b/app/__pycache__/plot.cpython-310.pyc differ
 
app/app_utils.py CHANGED
@@ -1,5 +1,3 @@
1
-
2
-
3
  import torch
4
  import numpy as np
5
  import mediapipe as mp
@@ -16,6 +14,21 @@ from app.plot import statistics_plot
16
 
17
  mp_face_mesh = mp.solutions.face_mesh
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def preprocess_image_and_predict(inp):
20
  inp = np.array(inp)
21
 
@@ -38,11 +51,12 @@ def preprocess_image_and_predict(inp):
38
  for fl in results.multi_face_landmarks:
39
  startX, startY, endX, endY = get_box(fl, w, h)
40
  cur_face = inp[startY:endY, startX:endX]
41
- cur_face_n = pth_processing(Image.fromarray(cur_face))
42
  with torch.no_grad():
43
  prediction = (
44
  torch.nn.functional.softmax(pth_model_static(cur_face_n), dim=1)
45
  .detach()
 
46
  .numpy()[0]
47
  )
48
  confidences = {DICT_EMO[i]: float(prediction[i]) for i in range(7)}
@@ -73,7 +87,7 @@ def preprocess_frame_and_predict_aus(frame):
73
  for fl in results.multi_face_landmarks:
74
  startX, startY, endX, endY = get_box(fl, w, h)
75
  cur_face = frame[startY:endY, startX:endX]
76
- cur_face_n = pth_processing(Image.fromarray(cur_face))
77
 
78
  with torch.no_grad():
79
  features = pth_model_static(cur_face_n)
@@ -139,9 +153,9 @@ def preprocess_video_and_predict(video):
139
  cur_face = frame_copy[startY:endY, startX: endX]
140
 
141
  if count_face%config_data.FRAME_DOWNSAMPLING == 0:
142
- cur_face_copy = pth_processing(Image.fromarray(cur_face))
143
  with torch.no_grad():
144
- features = torch.nn.functional.relu(pth_model_static.extract_features(cur_face_copy)).detach().numpy()
145
  au_intensities = features_to_au_intensities(pth_model_static(cur_face_copy))
146
 
147
  grayscale_cam = cam(input_tensor=cur_face_copy)
@@ -157,10 +171,10 @@ def preprocess_video_and_predict(video):
157
  else:
158
  lstm_features = lstm_features[1:] + [features]
159
 
160
- lstm_f = torch.from_numpy(np.vstack(lstm_features))
161
  lstm_f = torch.unsqueeze(lstm_f, 0)
162
  with torch.no_grad():
163
- output = pth_model_dynamic(lstm_f).detach().numpy()
164
  last_output = output
165
 
166
  if count_face == 0:
@@ -214,6 +228,9 @@ def preprocess_video_and_predict(video):
214
 
215
  return video, path_save_video_face, path_save_video_hm, stat, au_stat
216
 
 
 
 
217
  def au_statistics_plot(frames, au_intensities_list):
218
  fig, ax = plt.subplots(figsize=(12, 6))
219
  au_intensities_array = np.array(au_intensities_list)
 
 
 
1
  import torch
2
  import numpy as np
3
  import mediapipe as mp
 
14
 
15
  mp_face_mesh = mp.solutions.face_mesh
16
 
17
+ def get_device():
18
+ if torch.backends.mps.is_available():
19
+ return torch.device("mps")
20
+ elif torch.cuda.is_available():
21
+ return torch.device("cuda")
22
+ else:
23
+ return torch.device("cpu")
24
+
25
+ device = get_device()
26
+ print(f"Using device: {device}")
27
+
28
+ # Move models to the selected device
29
+ pth_model_static = pth_model_static.to(device)
30
+ pth_model_dynamic = pth_model_dynamic.to(device)
31
+
32
  def preprocess_image_and_predict(inp):
33
  inp = np.array(inp)
34
 
 
51
  for fl in results.multi_face_landmarks:
52
  startX, startY, endX, endY = get_box(fl, w, h)
53
  cur_face = inp[startY:endY, startX:endX]
54
+ cur_face_n = pth_processing(Image.fromarray(cur_face)).to(device)
55
  with torch.no_grad():
56
  prediction = (
57
  torch.nn.functional.softmax(pth_model_static(cur_face_n), dim=1)
58
  .detach()
59
+ .cpu()
60
  .numpy()[0]
61
  )
62
  confidences = {DICT_EMO[i]: float(prediction[i]) for i in range(7)}
 
87
  for fl in results.multi_face_landmarks:
88
  startX, startY, endX, endY = get_box(fl, w, h)
89
  cur_face = frame[startY:endY, startX:endX]
90
+ cur_face_n = pth_processing(Image.fromarray(cur_face)).to(device)
91
 
92
  with torch.no_grad():
93
  features = pth_model_static(cur_face_n)
 
153
  cur_face = frame_copy[startY:endY, startX: endX]
154
 
155
  if count_face%config_data.FRAME_DOWNSAMPLING == 0:
156
+ cur_face_copy = pth_processing(Image.fromarray(cur_face)).to(device)
157
  with torch.no_grad():
158
+ features = torch.nn.functional.relu(pth_model_static.extract_features(cur_face_copy)).detach().cpu().numpy()
159
  au_intensities = features_to_au_intensities(pth_model_static(cur_face_copy))
160
 
161
  grayscale_cam = cam(input_tensor=cur_face_copy)
 
171
  else:
172
  lstm_features = lstm_features[1:] + [features]
173
 
174
+ lstm_f = torch.from_numpy(np.vstack(lstm_features)).to(device)
175
  lstm_f = torch.unsqueeze(lstm_f, 0)
176
  with torch.no_grad():
177
+ output = pth_model_dynamic(lstm_f).detach().cpu().numpy()
178
  last_output = output
179
 
180
  if count_face == 0:
 
228
 
229
  return video, path_save_video_face, path_save_video_hm, stat, au_stat
230
 
231
+ # The rest of the functions remain the same
232
+ # ...
233
+
234
  def au_statistics_plot(frames, au_intensities_list):
235
  fig, ax = plt.subplots(figsize=(12, 6))
236
  au_intensities_array = np.array(au_intensities_list)
app/model.py CHANGED
@@ -1,64 +1,78 @@
1
- """
2
- File: model.py
3
- Author: Elena Ryumina and Dmitry Ryumin
4
- Description: This module provides functions for loading and processing a pre-trained deep learning model
5
- for facial expression recognition.
6
- License: MIT License
7
- """
8
-
9
  import torch
10
- import requests
11
- from PIL import Image
12
- from torchvision import transforms
13
  from pytorch_grad_cam import GradCAM
14
-
15
- # Importing necessary components for the Gradio app
16
- from app.config import config_data
17
  from app.model_architectures import ResNet50, LSTMPyTorch
18
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- def load_model(model_url, model_path):
21
- try:
22
- with requests.get(model_url, stream=True) as response:
23
- with open(model_path, "wb") as file:
24
- for chunk in response.iter_content(chunk_size=8192):
25
- file.write(chunk)
26
- return model_path
27
- except Exception as e:
28
- print(f"Error loading model: {e}")
29
- return None
 
 
 
30
 
31
- path_static = load_model(config_data.model_static_url, config_data.model_static_path)
32
- pth_model_static = ResNet50(7, channels=3)
33
- pth_model_static.load_state_dict(torch.load(path_static))
34
- pth_model_static.eval()
35
 
36
- path_dynamic = load_model(config_data.model_dynamic_url, config_data.model_dynamic_path)
37
- pth_model_dynamic = LSTMPyTorch()
38
- pth_model_dynamic.load_state_dict(torch.load(path_dynamic))
39
- pth_model_dynamic.eval()
40
 
41
- target_layers = [pth_model_static.layer4]
 
42
  cam = GradCAM(model=pth_model_static, target_layers=target_layers)
43
 
44
- def pth_processing(fp):
45
- class PreprocessInput(torch.nn.Module):
46
- def init(self):
47
- super(PreprocessInput, self).init()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- def forward(self, x):
50
- x = x.to(torch.float32)
51
- x = torch.flip(x, dims=(0,))
52
- x[0, :, :] -= 91.4953
53
- x[1, :, :] -= 103.8827
54
- x[2, :, :] -= 131.0912
55
- return x
56
 
57
- def get_img_torch(img, target_size=(224, 224)):
58
- transform = transforms.Compose([transforms.PILToTensor(), PreprocessInput()])
59
- img = img.resize(target_size, Image.Resampling.NEAREST)
60
- img = transform(img)
61
- img = torch.unsqueeze(img, 0)
62
- return img
63
 
64
- return get_img_torch(fp)
 
 
 
1
+ import os
 
 
 
 
 
 
 
2
  import torch
3
+ import torch.nn as nn
4
+ import torchvision.transforms as transforms
 
5
  from pytorch_grad_cam import GradCAM
6
+ from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
7
+ import logging
 
8
  from app.model_architectures import ResNet50, LSTMPyTorch
9
 
10
+ # Set up logging
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Determine the device
15
+ device = torch.device('mps' if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu')
16
+ logger.info(f"Using device: {device}")
17
+
18
+ # Define paths
19
+ STATIC_MODEL_PATH = 'assets/models/FER_static_ResNet50_AffectNet.pt'
20
+ DYNAMIC_MODEL_PATH = 'assets/models/FER_dynamic_LSTM.pt'
21
 
22
+ def load_model(model_class, model_path, *args, **kwargs):
23
+ model = model_class(*args, **kwargs).to(device)
24
+ if os.path.exists(model_path):
25
+ try:
26
+ model.load_state_dict(torch.load(model_path, map_location=device))
27
+ model.eval()
28
+ logger.info(f"Model loaded successfully from {model_path}")
29
+ except Exception as e:
30
+ logger.error(f"Error loading model from {model_path}: {str(e)}")
31
+ logger.info("Initializing with random weights.")
32
+ else:
33
+ logger.warning(f"Model file not found at {model_path}. Initializing with random weights.")
34
+ return model
35
 
36
+ # Load the static model
37
+ pth_model_static = load_model(ResNet50, STATIC_MODEL_PATH, num_classes=7, channels=3)
 
 
38
 
39
+ # Load the dynamic model
40
+ pth_model_dynamic = load_model(LSTMPyTorch, DYNAMIC_MODEL_PATH, input_size=2048, hidden_size=256, num_layers=2, num_classes=7)
 
 
41
 
42
+ # Set up GradCAM
43
+ target_layers = [pth_model_static.resnet.layer4[-1]]
44
  cam = GradCAM(model=pth_model_static, target_layers=target_layers)
45
 
46
+ # Define image preprocessing
47
+ pth_transform = transforms.Compose([
48
+ transforms.Resize((224, 224)),
49
+ transforms.ToTensor(),
50
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
51
+ ])
52
+
53
+ def pth_processing(img):
54
+ img = pth_transform(img).unsqueeze(0).to(device)
55
+ return img
56
+
57
+ def predict_emotion(img):
58
+ with torch.no_grad():
59
+ output = pth_model_static(pth_processing(img))
60
+ _, predicted = torch.max(output, 1)
61
+ return predicted.item()
62
+
63
+ def get_emotion_probabilities(img):
64
+ with torch.no_grad():
65
+ output = nn.functional.softmax(pth_model_static(pth_processing(img)), dim=1)
66
+ return output.squeeze().cpu().numpy()
67
 
68
+ def generate_cam(img):
69
+ input_tensor = pth_processing(img)
70
+ targets = [ClassifierOutputTarget(predict_emotion(img))]
71
+ grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
72
+ return grayscale_cam[0, :]
 
 
73
 
74
+ # Add any other necessary functions or variables here
 
 
 
 
 
75
 
76
+ if __name__ == "__main__":
77
+ logger.info("Model initialization complete.")
78
+ # You can add some test code here to verify everything is working correctly
app/model_architectures.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torchvision.models as models
4
+
5
+ class ResNet50(nn.Module):
6
+ def __init__(self, num_classes=7, channels=3):
7
+ super(ResNet50, self).__init__()
8
+ self.resnet = models.resnet50(pretrained=True)
9
+ # Modify the first convolutional layer if channels != 3
10
+ if channels != 3:
11
+ self.resnet.conv1 = nn.Conv2d(channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
12
+ num_features = self.resnet.fc.in_features
13
+ self.resnet.fc = nn.Linear(num_features, num_classes)
14
+
15
+ def forward(self, x):
16
+ return self.resnet(x)
17
+
18
+ def extract_features(self, x):
19
+ x = self.resnet.conv1(x)
20
+ x = self.resnet.bn1(x)
21
+ x = self.resnet.relu(x)
22
+ x = self.resnet.maxpool(x)
23
+
24
+ x = self.resnet.layer1(x)
25
+ x = self.resnet.layer2(x)
26
+ x = self.resnet.layer3(x)
27
+ x = self.resnet.layer4(x)
28
+
29
+ x = self.resnet.avgpool(x)
30
+ x = torch.flatten(x, 1)
31
+ return x
32
+
33
+ class LSTMPyTorch(nn.Module):
34
+ def __init__(self, input_size, hidden_size, num_layers, num_classes):
35
+ super(LSTMPyTorch, self).__init__()
36
+ self.hidden_size = hidden_size
37
+ self.num_layers = num_layers
38
+ self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
39
+ self.fc = nn.Linear(hidden_size, num_classes)
40
+
41
+ def forward(self, x):
42
+ h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
43
+ c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
44
+ out, _ = self.lstm(x, (h0, c0))
45
+ out = self.fc(out[:, -1, :])
46
+ return out
tabs/__pycache__/FACS_analysis.cpython-310.pyc CHANGED
Binary files a/tabs/__pycache__/FACS_analysis.cpython-310.pyc and b/tabs/__pycache__/FACS_analysis.cpython-310.pyc differ
 
tabs/__pycache__/speech_emotion_recognition.cpython-310.pyc CHANGED
Binary files a/tabs/__pycache__/speech_emotion_recognition.cpython-310.pyc and b/tabs/__pycache__/speech_emotion_recognition.cpython-310.pyc differ
 
tabs/__pycache__/speech_stress_analysis.cpython-310.pyc CHANGED
Binary files a/tabs/__pycache__/speech_stress_analysis.cpython-310.pyc and b/tabs/__pycache__/speech_stress_analysis.cpython-310.pyc differ