Spaces:

vitorcalvi
/

mmesa-gitex

Build error

App Files Files Community

vitorcalvi commited on Oct 12, 2024

Commit

da18c29

1 Parent(s): b20a621

12 Oct Gitex 2024

Browse files

Files changed (15) hide show

.DS_Store +0 -0
__pycache__/ui_components.cpython-310.pyc +0 -0
app/__pycache__/__init__.cpython-310.pyc +0 -0
app/__pycache__/app_utils.cpython-310.pyc +0 -0
app/__pycache__/config.cpython-310.pyc +0 -0
app/__pycache__/face_utils.cpython-310.pyc +0 -0
app/__pycache__/model.cpython-310.pyc +0 -0
app/__pycache__/model_architectures.cpython-310.pyc +0 -0
app/__pycache__/plot.cpython-310.pyc +0 -0
app/app_utils.py +25 -8
app/model.py +65 -51
app/model_architectures.py +46 -0
tabs/__pycache__/FACS_analysis.cpython-310.pyc +0 -0
tabs/__pycache__/speech_emotion_recognition.cpython-310.pyc +0 -0
tabs/__pycache__/speech_stress_analysis.cpython-310.pyc +0 -0

.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

__pycache__/ui_components.cpython-310.pyc ADDED Viewed

Binary file (3.82 kB). View file

app/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/__init__.cpython-310.pyc and b/app/__pycache__/__init__.cpython-310.pyc differ

app/__pycache__/app_utils.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/app_utils.cpython-310.pyc and b/app/__pycache__/app_utils.cpython-310.pyc differ

app/__pycache__/config.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/config.cpython-310.pyc and b/app/__pycache__/config.cpython-310.pyc differ

app/__pycache__/face_utils.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/face_utils.cpython-310.pyc and b/app/__pycache__/face_utils.cpython-310.pyc differ

app/__pycache__/model.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/model.cpython-310.pyc and b/app/__pycache__/model.cpython-310.pyc differ

app/__pycache__/model_architectures.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/model_architectures.cpython-310.pyc and b/app/__pycache__/model_architectures.cpython-310.pyc differ

app/__pycache__/plot.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/plot.cpython-310.pyc and b/app/__pycache__/plot.cpython-310.pyc differ

app/app_utils.py CHANGED Viewed

@@ -1,5 +1,3 @@
 import torch
 import numpy as np
 import mediapipe as mp
@@ -16,6 +14,21 @@ from app.plot import statistics_plot
 mp_face_mesh = mp.solutions.face_mesh
 def preprocess_image_and_predict(inp):
     inp = np.array(inp)
@@ -38,11 +51,12 @@ def preprocess_image_and_predict(inp):
             for fl in results.multi_face_landmarks:
                 startX, startY, endX, endY = get_box(fl, w, h)
                 cur_face = inp[startY:endY, startX:endX]
-                cur_face_n = pth_processing(Image.fromarray(cur_face))
                 with torch.no_grad():
                     prediction = (
                         torch.nn.functional.softmax(pth_model_static(cur_face_n), dim=1)
                         .detach()
                         .numpy()[0]
                     )
                 confidences = {DICT_EMO[i]: float(prediction[i]) for i in range(7)}
@@ -73,7 +87,7 @@ def preprocess_frame_and_predict_aus(frame):
             for fl in results.multi_face_landmarks:
                 startX, startY, endX, endY = get_box(fl, w, h)
                 cur_face = frame[startY:endY, startX:endX]
-                cur_face_n = pth_processing(Image.fromarray(cur_face))
                 with torch.no_grad():
                     features = pth_model_static(cur_face_n)
@@ -139,9 +153,9 @@ def preprocess_video_and_predict(video):
                     cur_face = frame_copy[startY:endY, startX: endX]
                     if count_face%config_data.FRAME_DOWNSAMPLING == 0:
-                        cur_face_copy = pth_processing(Image.fromarray(cur_face))
                         with torch.no_grad():
-                            features = torch.nn.functional.relu(pth_model_static.extract_features(cur_face_copy)).detach().numpy()
                             au_intensities = features_to_au_intensities(pth_model_static(cur_face_copy))
                         grayscale_cam = cam(input_tensor=cur_face_copy)
@@ -157,10 +171,10 @@ def preprocess_video_and_predict(video):
                         else:
                             lstm_features = lstm_features[1:] + [features]
-                        lstm_f = torch.from_numpy(np.vstack(lstm_features))
                         lstm_f = torch.unsqueeze(lstm_f, 0)
                         with torch.no_grad():
-                            output = pth_model_dynamic(lstm_f).detach().numpy()
                         last_output = output
                         if count_face == 0:
@@ -214,6 +228,9 @@ def preprocess_video_and_predict(video):
     return video, path_save_video_face, path_save_video_hm, stat, au_stat
 def au_statistics_plot(frames, au_intensities_list):
     fig, ax = plt.subplots(figsize=(12, 6))
     au_intensities_array = np.array(au_intensities_list)

 import torch
 import numpy as np
 import mediapipe as mp
 mp_face_mesh = mp.solutions.face_mesh
+def get_device():
+    if torch.backends.mps.is_available():
+        return torch.device("mps")
+    elif torch.cuda.is_available():
+        return torch.device("cuda")
+    else:
+        return torch.device("cpu")
+device = get_device()
+print(f"Using device: {device}")
+# Move models to the selected device
+pth_model_static = pth_model_static.to(device)
+pth_model_dynamic = pth_model_dynamic.to(device)
 def preprocess_image_and_predict(inp):
     inp = np.array(inp)
             for fl in results.multi_face_landmarks:
                 startX, startY, endX, endY = get_box(fl, w, h)
                 cur_face = inp[startY:endY, startX:endX]
+                cur_face_n = pth_processing(Image.fromarray(cur_face)).to(device)
                 with torch.no_grad():
                     prediction = (
                         torch.nn.functional.softmax(pth_model_static(cur_face_n), dim=1)
                         .detach()
+                        .cpu()
                         .numpy()[0]
                     )
                 confidences = {DICT_EMO[i]: float(prediction[i]) for i in range(7)}
             for fl in results.multi_face_landmarks:
                 startX, startY, endX, endY = get_box(fl, w, h)
                 cur_face = frame[startY:endY, startX:endX]
+                cur_face_n = pth_processing(Image.fromarray(cur_face)).to(device)
                 with torch.no_grad():
                     features = pth_model_static(cur_face_n)
                     cur_face = frame_copy[startY:endY, startX: endX]
                     if count_face%config_data.FRAME_DOWNSAMPLING == 0:
+                        cur_face_copy = pth_processing(Image.fromarray(cur_face)).to(device)
                         with torch.no_grad():
+                            features = torch.nn.functional.relu(pth_model_static.extract_features(cur_face_copy)).detach().cpu().numpy()
                             au_intensities = features_to_au_intensities(pth_model_static(cur_face_copy))
                         grayscale_cam = cam(input_tensor=cur_face_copy)
                         else:
                             lstm_features = lstm_features[1:] + [features]
+                        lstm_f = torch.from_numpy(np.vstack(lstm_features)).to(device)
                         lstm_f = torch.unsqueeze(lstm_f, 0)
                         with torch.no_grad():
+                            output = pth_model_dynamic(lstm_f).detach().cpu().numpy()
                         last_output = output
                         if count_face == 0:
     return video, path_save_video_face, path_save_video_hm, stat, au_stat
+# The rest of the functions remain the same
+# ...
 def au_statistics_plot(frames, au_intensities_list):
     fig, ax = plt.subplots(figsize=(12, 6))
     au_intensities_array = np.array(au_intensities_list)

app/model.py CHANGED Viewed

@@ -1,64 +1,78 @@
-"""
-File: model.py
-Author: Elena Ryumina and Dmitry Ryumin
-Description: This module provides functions for loading and processing a pre-trained deep learning model
-             for facial expression recognition.
-License: MIT License
-"""
 import torch
-import requests
-from PIL import Image
-from torchvision import transforms
 from pytorch_grad_cam import GradCAM
-# Importing necessary components for the Gradio app
-from app.config import config_data
 from app.model_architectures import ResNet50, LSTMPyTorch
-def load_model(model_url, model_path):
-    try:
-        with requests.get(model_url, stream=True) as response:
-            with open(model_path, "wb") as file:
-                for chunk in response.iter_content(chunk_size=8192):
-                    file.write(chunk)
-        return model_path
-    except Exception as e:
-        print(f"Error loading model: {e}")
-        return None
-path_static = load_model(config_data.model_static_url, config_data.model_static_path)
-pth_model_static = ResNet50(7, channels=3)
-pth_model_static.load_state_dict(torch.load(path_static))
-pth_model_static.eval()
-path_dynamic = load_model(config_data.model_dynamic_url, config_data.model_dynamic_path)
-pth_model_dynamic = LSTMPyTorch()
-pth_model_dynamic.load_state_dict(torch.load(path_dynamic))
-pth_model_dynamic.eval()
-target_layers = [pth_model_static.layer4]
 cam = GradCAM(model=pth_model_static, target_layers=target_layers)
-def pth_processing(fp):
-    class PreprocessInput(torch.nn.Module):
-        def init(self):
-            super(PreprocessInput, self).init()
-        def forward(self, x):
-            x = x.to(torch.float32)
-            x = torch.flip(x, dims=(0,))
-            x[0, :, :] -= 91.4953
-            x[1, :, :] -= 103.8827
-            x[2, :, :] -= 131.0912
-            return x
-    def get_img_torch(img, target_size=(224, 224)):
-        transform = transforms.Compose([transforms.PILToTensor(), PreprocessInput()])
-        img = img.resize(target_size, Image.Resampling.NEAREST)
-        img = transform(img)
-        img = torch.unsqueeze(img, 0)
-        return img
-    return get_img_torch(fp)

+import os
 import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
 from pytorch_grad_cam import GradCAM
+from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
+import logging
 from app.model_architectures import ResNet50, LSTMPyTorch
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Determine the device
+device = torch.device('mps' if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu')
+logger.info(f"Using device: {device}")
+# Define paths
+STATIC_MODEL_PATH = 'assets/models/FER_static_ResNet50_AffectNet.pt'
+DYNAMIC_MODEL_PATH = 'assets/models/FER_dynamic_LSTM.pt'
+def load_model(model_class, model_path, *args, **kwargs):
+    model = model_class(*args, **kwargs).to(device)
+    if os.path.exists(model_path):
+        try:
+            model.load_state_dict(torch.load(model_path, map_location=device))
+            model.eval()
+            logger.info(f"Model loaded successfully from {model_path}")
+        except Exception as e:
+            logger.error(f"Error loading model from {model_path}: {str(e)}")
+            logger.info("Initializing with random weights.")
+    else:
+        logger.warning(f"Model file not found at {model_path}. Initializing with random weights.")
+    return model
+# Load the static model
+pth_model_static = load_model(ResNet50, STATIC_MODEL_PATH, num_classes=7, channels=3)
+# Load the dynamic model
+pth_model_dynamic = load_model(LSTMPyTorch, DYNAMIC_MODEL_PATH, input_size=2048, hidden_size=256, num_layers=2, num_classes=7)
+# Set up GradCAM
+target_layers = [pth_model_static.resnet.layer4[-1]]
 cam = GradCAM(model=pth_model_static, target_layers=target_layers)
+# Define image preprocessing
+pth_transform = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+])
+def pth_processing(img):
+    img = pth_transform(img).unsqueeze(0).to(device)
+    return img
+def predict_emotion(img):
+    with torch.no_grad():
+        output = pth_model_static(pth_processing(img))
+        _, predicted = torch.max(output, 1)
+    return predicted.item()
+def get_emotion_probabilities(img):
+    with torch.no_grad():
+        output = nn.functional.softmax(pth_model_static(pth_processing(img)), dim=1)
+    return output.squeeze().cpu().numpy()
+def generate_cam(img):
+    input_tensor = pth_processing(img)
+    targets = [ClassifierOutputTarget(predict_emotion(img))]
+    grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
+    return grayscale_cam[0, :]
+# Add any other necessary functions or variables here
+if __name__ == "__main__":
+    logger.info("Model initialization complete.")
+    # You can add some test code here to verify everything is working correctly

app/model_architectures.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import torch
+import torch.nn as nn
+import torchvision.models as models
+class ResNet50(nn.Module):
+    def __init__(self, num_classes=7, channels=3):
+        super(ResNet50, self).__init__()
+        self.resnet = models.resnet50(pretrained=True)
+        # Modify the first convolutional layer if channels != 3
+        if channels != 3:
+            self.resnet.conv1 = nn.Conv2d(channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        num_features = self.resnet.fc.in_features
+        self.resnet.fc = nn.Linear(num_features, num_classes)
+    def forward(self, x):
+        return self.resnet(x)
+    def extract_features(self, x):
+        x = self.resnet.conv1(x)
+        x = self.resnet.bn1(x)
+        x = self.resnet.relu(x)
+        x = self.resnet.maxpool(x)
+        x = self.resnet.layer1(x)
+        x = self.resnet.layer2(x)
+        x = self.resnet.layer3(x)
+        x = self.resnet.layer4(x)
+        x = self.resnet.avgpool(x)
+        x = torch.flatten(x, 1)
+        return x
+class LSTMPyTorch(nn.Module):
+    def __init__(self, input_size, hidden_size, num_layers, num_classes):
+        super(LSTMPyTorch, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
+        self.fc = nn.Linear(hidden_size, num_classes)
+    def forward(self, x):
+        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
+        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
+        out, _ = self.lstm(x, (h0, c0))
+        out = self.fc(out[:, -1, :])
+        return out

tabs/__pycache__/FACS_analysis.cpython-310.pyc CHANGED Viewed

Binary files a/tabs/__pycache__/FACS_analysis.cpython-310.pyc and b/tabs/__pycache__/FACS_analysis.cpython-310.pyc differ

tabs/__pycache__/speech_emotion_recognition.cpython-310.pyc CHANGED Viewed

Binary files a/tabs/__pycache__/speech_emotion_recognition.cpython-310.pyc and b/tabs/__pycache__/speech_emotion_recognition.cpython-310.pyc differ

tabs/__pycache__/speech_stress_analysis.cpython-310.pyc CHANGED Viewed

Binary files a/tabs/__pycache__/speech_stress_analysis.cpython-310.pyc and b/tabs/__pycache__/speech_stress_analysis.cpython-310.pyc differ