vitorcalvi commited on
Commit
38e0e4a
Β·
1 Parent(s): 2ae67c3
.DS_Store ADDED
Binary file (6.15 kB). View file
 
README.md CHANGED
@@ -4,10 +4,56 @@ emoji: πŸ˜€πŸ˜²πŸ˜πŸ˜₯πŸ₯΄πŸ˜±πŸ˜‘
4
  colorFrom: blue
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: '4.24.0'
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  colorFrom: blue
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.24.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ short_description: A tool to detect Stress, Anxiety and Depression
12
  ---
13
 
14
+ ## Technologies
15
+
16
+ This project utilizes various Python scripts for different aspects of analysis and recognition:
17
+
18
+ - `blink_detection.py`: Detects and analyzes blinking patterns.
19
+ - `body_movement_analysis.py`: Analyzes body movements.
20
+ - `emotion_analysis.py`: Analyzes emotional states.
21
+ - `face_expressions.py`: Recognizes facial expressions.
22
+ - `FACS_analysis_sad.py`: Performs Facial Action Coding System analysis for sadness.
23
+ - `gaze_estimation.py`: Estimates gaze direction.
24
+ - `head_posture_detection.py`: Detects head posture.
25
+ - `heart_rate_variability.py`: Analyzes heart rate variability.
26
+ - `posture_analysis.py`: Analyzes posture.
27
+ - `roberta_chatbot.py`: Chatbot using the RoBERTa model.
28
+ - `sentiment_analysis.py`: Performs sentiment analysis.
29
+ - `skin_analysis.py`: Analyzes skin conditions.
30
+ - `sleep_quality.py`: Evaluates sleep quality.
31
+ - `speech_emotion_recognition.py`: Recognizes emotions from speech.
32
+ - `speech_stress_analysis.py`: Analyzes stress levels from speech.
33
+
34
+ These scripts combine to provide comprehensive analysis capabilities for various aspects of human behavior and physiology.
35
+
36
+ ## Upload Trick to HG
37
+
38
+ # Track large files with Git LFS
39
+
40
+ git lfs track "assets/models/_.dat"
41
+ git lfs track "assets/models/_.pt"
42
+
43
+ # Add the .gitattributes file and commit it
44
+
45
+ git add .gitattributes
46
+ git commit -m "Track large files with Git LFS"
47
+
48
+ # Add your large files and commit them
49
+
50
+ git add assets/models/shape_predictor_68_face_landmarks.dat
51
+ git add assets/models/FER_dinamic_LSTM_IEMOCAP.pt
52
+ git add assets/models/FER_static_ResNet50_AffectNet.pt
53
+ git commit -m "Add large files"
54
+
55
+ # Add remaining files, commit, and push
56
+
57
+ git add .
58
+ git commit -m 'pre-launch'
59
+ git push origin main --force
app.py CHANGED
@@ -4,13 +4,6 @@ from tabs.FACS_analysis import create_facs_analysis_tab
4
  from ui_components import CUSTOM_CSS, HEADER_HTML, DISCLAIMER_HTML
5
  import spaces # Importing spaces to utilize Zero GPU
6
 
7
- # Initialize Zero GPU
8
- if torch.cuda.is_available():
9
- zero = torch.Tensor([0]).cuda()
10
- print(f"Initial device: {zero.device}")
11
- else:
12
- zero = torch.Tensor([0])
13
- print("CUDA is not available. Using CPU.")
14
 
15
  # Define the tab structure
16
  TAB_STRUCTURE = [
@@ -22,10 +15,6 @@ TAB_STRUCTURE = [
22
  # Decorate GPU-dependent function with Zero GPU
23
  @spaces.GPU(duration=120) # Allocates GPU for 120 seconds when needed
24
  def create_demo():
25
- if torch.cuda.is_available():
26
- print(f"Device inside create_demo: {zero.device}")
27
- else:
28
- print("CUDA is not available inside create_demo.")
29
 
30
  # Gradio blocks to create the interface
31
  with gr.Blocks(css=CUSTOM_CSS) as demo:
 
4
  from ui_components import CUSTOM_CSS, HEADER_HTML, DISCLAIMER_HTML
5
  import spaces # Importing spaces to utilize Zero GPU
6
 
 
 
 
 
 
 
 
7
 
8
  # Define the tab structure
9
  TAB_STRUCTURE = [
 
15
  # Decorate GPU-dependent function with Zero GPU
16
  @spaces.GPU(duration=120) # Allocates GPU for 120 seconds when needed
17
  def create_demo():
 
 
 
 
18
 
19
  # Gradio blocks to create the interface
20
  with gr.Blocks(css=CUSTOM_CSS) as demo:
app/au_processing.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ import cv2
4
+ import torch
5
+ from PIL import Image
6
+ from app.model import pth_model_static, cam, pth_processing
7
+ from app.face_utils import get_box
8
+ import mediapipe as mp
9
+
10
+ mp_face_mesh = mp.solutions.face_mesh
11
+
12
+ def preprocess_frame_and_predict_aus(frame):
13
+ if len(frame.shape) == 2:
14
+ frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
15
+ elif frame.shape[2] == 4:
16
+ frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
17
+
18
+ with mp_face_mesh.FaceMesh(
19
+ max_num_faces=1,
20
+ refine_landmarks=False,
21
+ min_detection_confidence=0.5,
22
+ min_tracking_confidence=0.5
23
+ ) as face_mesh:
24
+ results = face_mesh.process(frame)
25
+
26
+ if results.multi_face_landmarks:
27
+ h, w = frame.shape[:2]
28
+ for fl in results.multi_face_landmarks:
29
+ startX, startY, endX, endY = get_box(fl, w, h)
30
+ cur_face = frame[startY:endY, startX:endX]
31
+ cur_face_n = pth_processing(Image.fromarray(cur_face))
32
+
33
+ with torch.no_grad():
34
+ features = pth_model_static(cur_face_n)
35
+ au_intensities = features_to_au_intensities(features)
36
+
37
+ grayscale_cam = cam(input_tensor=cur_face_n)
38
+ grayscale_cam = grayscale_cam[0, :]
39
+ cur_face_hm = cv2.resize(cur_face, (224, 224))
40
+ cur_face_hm = np.float32(cur_face_hm) / 255
41
+ heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=True)
42
+
43
+ return cur_face, au_intensities, heatmap
44
+
45
+ return None, None, None
46
+
47
+ def features_to_au_intensities(features):
48
+ features_np = features.detach().cpu().numpy()[0]
49
+ au_intensities = (features_np - features_np.min()) / (features_np.max() - features_np.min())
50
+ return au_intensities[:24] # Assuming we want 24 AUs
51
+
52
+ def au_statistics_plot(frames, au_intensities_list):
53
+ fig, ax = plt.subplots(figsize=(12, 6))
54
+ au_intensities_array = np.array(au_intensities_list)
55
+
56
+ for i in range(au_intensities_array.shape[1]):
57
+ ax.plot(frames, au_intensities_array[:, i], label=f'AU{i+1}')
58
+
59
+ ax.set_xlabel('Frame')
60
+ ax.set_ylabel('AU Intensity')
61
+ ax.set_title('Action Unit Intensities Over Time')
62
+ ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
63
+ plt.tight_layout()
64
+ return fig
app/authors.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File: authors.py
3
+ Author: Elena Ryumina and Dmitry Ryumin
4
+ Description: About the authors.
5
+ License: MIT License
6
+ """
7
+
8
+
9
+ AUTHORS = """
10
+ Authors: [Elena Ryumina](https://github.com/ElenaRyumina), [Dmitry Ryumin](https://github.com/DmitryRyumin), [Denis Dresvyanskiy](https://www.uni-ulm.de/en/nt/staff/research-assistants/dresvyanskiy/), [Maxim Markitantov](https://hci.nw.ru/en/employees/10) and [Alexey Karpov](https://hci.nw.ru/en/employees/1)
11
+
12
+ Authorship contribution:
13
+
14
+ App developers: ``Elena Ryumina`` and ``Dmitry Ryumin``
15
+
16
+ Methodology developers: ``Elena Ryumina``, ``Denis Dresvyanskiy`` and ``Alexey Karpov``
17
+
18
+ Model developer: ``Elena Ryumina``
19
+
20
+ TensorFlow to PyTorch model converters: ``Maxim Markitantov`` and ``Elena Ryumina``
21
+
22
+ Citation
23
+
24
+ If you are using EMO-AffectNetModel in your research, please consider to cite research [paper](https://www.sciencedirect.com/science/article/pii/S0925231222012656). Here is an example of BibTeX entry:
25
+
26
+ <div class="highlight highlight-text-bibtex notranslate position-relative overflow-auto" dir="auto"><pre><span class="pl-k">@article</span>{<span class="pl-en">RYUMINA2022</span>,
27
+ <span class="pl-s">title</span> = <span class="pl-s"><span class="pl-pds">{</span>In Search of a Robust Facial Expressions Recognition Model: A Large-Scale Visual Cross-Corpus Study<span class="pl-pds">}</span></span>,
28
+ <span class="pl-s">author</span> = <span class="pl-s"><span class="pl-pds">{</span>Elena Ryumina and Denis Dresvyanskiy and Alexey Karpov<span class="pl-pds">}</span></span>,
29
+ <span class="pl-s">journal</span> = <span class="pl-s"><span class="pl-pds">{</span>Neurocomputing<span class="pl-pds">}</span></span>,
30
+ <span class="pl-s">year</span> = <span class="pl-s"><span class="pl-pds">{</span>2022<span class="pl-pds">}</span></span>,
31
+ <span class="pl-s">doi</span> = <span class="pl-s"><span class="pl-pds">{</span>10.1016/j.neucom.2022.10.013<span class="pl-pds">}</span></span>,
32
+ <span class="pl-s">url</span> = <span class="pl-s"><span class="pl-pds">{</span>https://www.sciencedirect.com/science/article/pii/S0925231222012656<span class="pl-pds">}</span></span>,
33
+ }</div>
34
+ """
app/config.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File: config.py
3
+ Author: Elena Ryumina and Dmitry Ryumin
4
+ Description: Configuration file.
5
+ License: MIT License
6
+ """
7
+
8
+ import toml
9
+ from typing import Dict
10
+ from types import SimpleNamespace
11
+
12
+
13
+ def flatten_dict(prefix: str, d: Dict) -> Dict:
14
+ result = {}
15
+
16
+ for k, v in d.items():
17
+ if isinstance(v, dict):
18
+ result.update(flatten_dict(f"{prefix}{k}_", v))
19
+ else:
20
+ result[f"{prefix}{k}"] = v
21
+
22
+ return result
23
+
24
+
25
+ config = toml.load("config.toml")
26
+
27
+ config_data = flatten_dict("", config)
28
+
29
+ config_data = SimpleNamespace(**config_data)
30
+
31
+ DICT_EMO = {
32
+ 0: "Neutral",
33
+ 1: "Happiness",
34
+ 2: "Sadness",
35
+ 3: "Surprise",
36
+ 4: "Fear",
37
+ 5: "Disgust",
38
+ 6: "Anger",
39
+ }
40
+
41
+ COLORS = {
42
+ 0: 'blue',
43
+ 1: 'orange',
44
+ 2: 'green',
45
+ 3: 'red',
46
+ 4: 'purple',
47
+ 5: 'brown',
48
+ 6: 'pink'
49
+ }
app/description.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File: description.py
3
+ Author: Elena Ryumina and Dmitry Ryumin
4
+ Description: Project description for the Gradio app.
5
+ License: MIT License
6
+ """
7
+
8
+ # Importing necessary components for the Gradio app
9
+ from app.config import config_data
10
+
11
+ DESCRIPTION_STATIC = f"""\
12
+ # Static Facial Expression Recognition
13
+ <div class="app-flex-container">
14
+ <img src="https://img.shields.io/badge/version-v{config_data.APP_VERSION}-rc0" alt="Version">
15
+ <a href="https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FElenaRyumina%2FFacial_Expression_Recognition"><img src="https://api.visitorbadge.io/api/combined?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FElenaRyumina%2FFacial_Expression_Recognition&countColor=%23263759&style=flat" /></a>
16
+ <a href="https://paperswithcode.com/paper/in-search-of-a-robust-facial-expressions"><img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/in-search-of-a-robust-facial-expressions/facial-expression-recognition-on-affectnet" /></a>
17
+ </div>
18
+ """
19
+
20
+ DESCRIPTION_DYNAMIC = f"""\
21
+ # Dynamic Facial Expression Recognition
22
+ <div class="app-flex-container">
23
+ <img src="https://img.shields.io/badge/version-v{config_data.APP_VERSION}-rc0" alt="Version">
24
+ <a href="https://paperswithcode.com/paper/in-search-of-a-robust-facial-expressions"><img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/in-search-of-a-robust-facial-expressions/facial-expression-recognition-on-affectnet" /></a>
25
+ </div>
26
+ """
27
+
28
+ DESCRIPTION_SLEEP_QUALITY = """
29
+ # Sleep Quality Analysis
30
+
31
+ This tab analyzes sleep quality based on facial features, focusing on skin tone and eye bags.
32
+
33
+ ## How to use:
34
+ 1. Upload a video of a person's face.
35
+ 2. Click 'Submit' to process the video.
36
+ 3. View the results, including:
37
+ - Original video
38
+ - Processed face video
39
+ - Sleep quality analysis video
40
+ - Eye bags detection image
41
+ - Sleep quality statistics over time
42
+
43
+ The analysis provides insights into potential sleep issues based on visual cues.
44
+
45
+ Note: This analysis is for informational purposes only and should not be considered a medical diagnosis. Always consult with a healthcare professional for sleep-related concerns.
46
+ """
app/image_processing.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ from PIL import Image
4
+ import torch
5
+ from app.model import pth_model_static, cam, pth_processing
6
+ from app.face_utils import get_box
7
+ from app.config import DICT_EMO
8
+ from pytorch_grad_cam.utils.image import show_cam_on_image
9
+ import mediapipe as mp
10
+
11
+ mp_face_mesh = mp.solutions.face_mesh
12
+
13
+ def preprocess_image_and_predict(inp):
14
+ inp = np.array(inp)
15
+
16
+ if inp is None:
17
+ return None, None, None
18
+
19
+ try:
20
+ h, w = inp.shape[:2]
21
+ except Exception:
22
+ return None, None, None
23
+
24
+ with mp_face_mesh.FaceMesh(
25
+ max_num_faces=1,
26
+ refine_landmarks=False,
27
+ min_detection_confidence=0.5,
28
+ min_tracking_confidence=0.5,
29
+ ) as face_mesh:
30
+ results = face_mesh.process(inp)
31
+ if results.multi_face_landmarks:
32
+ for fl in results.multi_face_landmarks:
33
+ startX, startY, endX, endY = get_box(fl, w, h)
34
+ cur_face = inp[startY:endY, startX:endX]
35
+ cur_face_n = pth_processing(Image.fromarray(cur_face))
36
+ with torch.no_grad():
37
+ prediction = (
38
+ torch.nn.functional.softmax(pth_model_static(cur_face_n), dim=1)
39
+ .detach()
40
+ .numpy()[0]
41
+ )
42
+ confidences = {DICT_EMO[i]: float(prediction[i]) for i in range(7)}
43
+ grayscale_cam = cam(input_tensor=cur_face_n)
44
+ grayscale_cam = grayscale_cam[0, :]
45
+ cur_face_hm = cv2.resize(cur_face,(224,224))
46
+ cur_face_hm = np.float32(cur_face_hm) / 255
47
+ heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=True)
48
+
49
+ return cur_face, heatmap, confidences
app/plot.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File: config.py
3
+ Author: Elena Ryumina and Dmitry Ryumin
4
+ Description: Plotting statistical information.
5
+ License: MIT License
6
+ """
7
+ import matplotlib.pyplot as plt
8
+ import numpy as np
9
+
10
+ # Importing necessary components for the Gradio app
11
+ from app.config import DICT_EMO, COLORS
12
+
13
+
14
+ def statistics_plot(frames, probs):
15
+ fig, ax = plt.subplots(figsize=(10, 4))
16
+ fig.subplots_adjust(left=0.07, bottom=0.14, right=0.98, top=0.8, wspace=0, hspace=0)
17
+ # Установка ΠΏΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€ΠΎΠ² left, bottom, right, top, Ρ‡Ρ‚ΠΎΠ±Ρ‹ Π²Ρ‹Π΄Π΅Π»ΠΈΡ‚ΡŒ мСсто для Π»Π΅Π³Π΅Π½Π΄Ρ‹ ΠΈ Π½Π°Π·Π²Π°Π½ΠΈΠΉ осСй
18
+ probs = np.array(probs)
19
+ for i in range(7):
20
+ try:
21
+ ax.plot(frames, probs[:, i], label=DICT_EMO[i], c=COLORS[i])
22
+ except Exception:
23
+ return None
24
+
25
+ ax.legend(loc='upper center', bbox_to_anchor=(0.47, 1.2), ncol=7, fontsize=12)
26
+ ax.set_xlabel('Frames', fontsize=12) # ДобавляСм подпись ΠΊ оси X
27
+ ax.set_ylabel('Probability', fontsize=12) # ДобавляСм подпись ΠΊ оси Y
28
+ ax.grid(True)
29
+ return plt
app/sleep_quality_processing.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import mediapipe as mp
5
+ from app.face_utils import get_box
6
+
7
+ mp_face_mesh = mp.solutions.face_mesh
8
+
9
+ def preprocess_video_and_predict_sleep_quality(video):
10
+ cap = cv2.VideoCapture(video)
11
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
12
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
13
+ fps = np.round(cap.get(cv2.CAP_PROP_FPS))
14
+
15
+ path_save_video_original = 'result_original.mp4'
16
+ path_save_video_face = 'result_face.mp4'
17
+ path_save_video_sleep = 'result_sleep.mp4'
18
+
19
+ vid_writer_original = cv2.VideoWriter(path_save_video_original, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
20
+ vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
21
+ vid_writer_sleep = cv2.VideoWriter(path_save_video_sleep, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
22
+
23
+ frames = []
24
+ sleep_quality_scores = []
25
+ eye_bags_images = []
26
+
27
+ with mp_face_mesh.FaceMesh(
28
+ max_num_faces=1,
29
+ refine_landmarks=False,
30
+ min_detection_confidence=0.5,
31
+ min_tracking_confidence=0.5) as face_mesh:
32
+
33
+ while cap.isOpened():
34
+ ret, frame = cap.read()
35
+ if not ret:
36
+ break
37
+
38
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
39
+ results = face_mesh.process(frame_rgb)
40
+
41
+ if results.multi_face_landmarks:
42
+ for fl in results.multi_face_landmarks:
43
+ startX, startY, endX, endY = get_box(fl, w, h)
44
+ cur_face = frame_rgb[startY:endY, startX:endX]
45
+
46
+ sleep_quality_score, eye_bags_image = analyze_sleep_quality(cur_face)
47
+ sleep_quality_scores.append(sleep_quality_score)
48
+ eye_bags_images.append(cv2.resize(eye_bags_image, (224, 224)))
49
+
50
+ sleep_quality_viz = create_sleep_quality_visualization(cur_face, sleep_quality_score)
51
+
52
+ cur_face = cv2.resize(cur_face, (224, 224))
53
+
54
+ vid_writer_face.write(cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR))
55
+ vid_writer_sleep.write(sleep_quality_viz)
56
+
57
+ vid_writer_original.write(frame)
58
+ frames.append(len(frames) + 1)
59
+
60
+ cap.release()
61
+ vid_writer_original.release()
62
+ vid_writer_face.release()
63
+ vid_writer_sleep.release()
64
+
65
+ sleep_stat = sleep_quality_statistics_plot(frames, sleep_quality_scores)
66
+
67
+ if eye_bags_images:
68
+ average_eye_bags_image = np.mean(np.array(eye_bags_images), axis=0).astype(np.uint8)
69
+ else:
70
+ average_eye_bags_image = np.zeros((224, 224, 3), dtype=np.uint8)
71
+
72
+ return (path_save_video_original, path_save_video_face, path_save_video_sleep,
73
+ average_eye_bags_image, sleep_stat)
74
+
75
+ def analyze_sleep_quality(face_image):
76
+ # Placeholder function - implement your sleep quality analysis here
77
+ sleep_quality_score = np.random.random()
78
+ eye_bags_image = cv2.resize(face_image, (224, 224))
79
+ return sleep_quality_score, eye_bags_image
80
+
81
+ def create_sleep_quality_visualization(face_image, sleep_quality_score):
82
+ viz = face_image.copy()
83
+ cv2.putText(viz, f"Sleep Quality: {sleep_quality_score:.2f}", (10, 30),
84
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
85
+ return cv2.cvtColor(viz, cv2.COLOR_RGB2BGR)
86
+
87
+ def sleep_quality_statistics_plot(frames, sleep_quality_scores):
88
+ fig, ax = plt.subplots()
89
+ ax.plot(frames, sleep_quality_scores)
90
+ ax.set_xlabel('Frame')
91
+ ax.set_ylabel('Sleep Quality Score')
92
+ ax.set_title('Sleep Quality Over Time')
93
+ plt.tight_layout()
94
+ return fig
app/video_processing.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import torch
4
+ from PIL import Image
5
+ import mediapipe as mp
6
+ from app.model import pth_model_static, pth_model_dynamic, cam, pth_processing
7
+ from app.face_utils import get_box, display_info
8
+ from app.config import config_data
9
+ from app.plot import statistics_plot
10
+ from .au_processing import features_to_au_intensities, au_statistics_plot
11
+
12
+ mp_face_mesh = mp.solutions.face_mesh
13
+
14
+ def preprocess_video_and_predict(video):
15
+ cap = cv2.VideoCapture(video)
16
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
17
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
18
+ fps = np.round(cap.get(cv2.CAP_PROP_FPS))
19
+
20
+ path_save_video_face = 'result_face.mp4'
21
+ vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
22
+
23
+ path_save_video_hm = 'result_hm.mp4'
24
+ vid_writer_hm = cv2.VideoWriter(path_save_video_hm, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
25
+
26
+ lstm_features = []
27
+ count_frame = 1
28
+ count_face = 0
29
+ probs = []
30
+ frames = []
31
+ au_intensities_list = []
32
+ last_output = None
33
+ last_heatmap = None
34
+ last_au_intensities = None
35
+ cur_face = None
36
+
37
+ with mp_face_mesh.FaceMesh(
38
+ max_num_faces=1,
39
+ refine_landmarks=False,
40
+ min_detection_confidence=0.5,
41
+ min_tracking_confidence=0.5) as face_mesh:
42
+
43
+ while cap.isOpened():
44
+ _, frame = cap.read()
45
+ if frame is None: break
46
+
47
+ frame_copy = frame.copy()
48
+ frame_copy.flags.writeable = False
49
+ frame_copy = cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB)
50
+ results = face_mesh.process(frame_copy)
51
+ frame_copy.flags.writeable = True
52
+
53
+ if results.multi_face_landmarks:
54
+ for fl in results.multi_face_landmarks:
55
+ startX, startY, endX, endY = get_box(fl, w, h)
56
+ cur_face = frame_copy[startY:endY, startX: endX]
57
+
58
+ if count_face%config_data.FRAME_DOWNSAMPLING == 0:
59
+ cur_face_copy = pth_processing(Image.fromarray(cur_face))
60
+ with torch.no_grad():
61
+ features = torch.nn.functional.relu(pth_model_static.extract_features(cur_face_copy)).detach().numpy()
62
+ au_intensities = features_to_au_intensities(pth_model_static(cur_face_copy))
63
+
64
+ grayscale_cam = cam(input_tensor=cur_face_copy)
65
+ grayscale_cam = grayscale_cam[0, :]
66
+ cur_face_hm = cv2.resize(cur_face,(224,224), interpolation = cv2.INTER_AREA)
67
+ cur_face_hm = np.float32(cur_face_hm) / 255
68
+ heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=False)
69
+ last_heatmap = heatmap
70
+ last_au_intensities = au_intensities
71
+
72
+ if len(lstm_features) == 0:
73
+ lstm_features = [features]*10
74
+ else:
75
+ lstm_features = lstm_features[1:] + [features]
76
+
77
+ lstm_f = torch.from_numpy(np.vstack(lstm_features))
78
+ lstm_f = torch.unsqueeze(lstm_f, 0)
79
+ with torch.no_grad():
80
+ output = pth_model_dynamic(lstm_f).detach().numpy()
81
+ last_output = output
82
+
83
+ if count_face == 0:
84
+ count_face += 1
85
+
86
+ else:
87
+ if last_output is not None:
88
+ output = last_output
89
+ heatmap = last_heatmap
90
+ au_intensities = last_au_intensities
91
+
92
+ elif last_output is None:
93
+ output = np.empty((1, 7))
94
+ output[:] = np.nan
95
+ au_intensities = np.empty(24)
96
+ au_intensities[:] = np.nan
97
+
98
+ probs.append(output[0])
99
+ frames.append(count_frame)
100
+ au_intensities_list.append(au_intensities)
101
+ else:
102
+ if last_output is not None:
103
+ lstm_features = []
104
+ empty = np.empty((7))
105
+ empty[:] = np.nan
106
+ probs.append(empty)
107
+ frames.append(count_frame)
108
+ au_intensities_list.append(np.full(24, np.nan))
109
+
110
+ if cur_face is not None:
111
+ heatmap_f = display_info(heatmap, 'Frame: {}'.format(count_frame), box_scale=.3)
112
+
113
+ cur_face = cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR)
114
+ cur_face = cv2.resize(cur_face, (224,224), interpolation = cv2.INTER_AREA)
115
+ cur_face = display_info(cur_face, 'Frame: {}'.format(count_frame), box_scale=.3)
116
+ vid_writer_face.write(cur_face)
117
+ vid_writer_hm.write(heatmap_f)
118
+
119
+ count_frame += 1
120
+ if count_face != 0:
121
+ count_face += 1
122
+
123
+ vid_writer_face.release()
124
+ vid_writer_hm.release()
125
+
126
+ stat = statistics_plot(frames, probs)
127
+ au_stat = au_statistics_plot(frames, au_intensities_list)
128
+
129
+ if not stat or not au_stat:
130
+ return None, None, None, None, None
131
+
132
+ return video, path_save_video_face, path_save_video_hm, stat, au_stat
assets/.DS_Store CHANGED
Binary files a/assets/.DS_Store and b/assets/.DS_Store differ
 
requirements.txt CHANGED
@@ -1,49 +1,139 @@
1
- # CUDA-enabled PyTorch packages
2
- torch
3
- torchvision
4
- torchaudio
5
-
6
- # Core dependencies
7
- gradio==4.38.1
8
- gradio_client==1.1.0
9
-
10
- # Additional dependencies
11
  absl-py==2.1.0
12
  aiofiles==23.2.1
13
  altair==5.3.0
 
14
  anyio==4.4.0
 
15
  attrs==23.2.0
16
  audioread==3.0.1
17
  certifi==2024.7.4
 
18
  charset-normalizer==3.3.2
19
  click==8.1.7
 
 
20
  decorator==4.4.2
 
 
 
 
21
  fastapi==0.111.1
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  h5py==3.11.0
 
 
 
23
  huggingface-hub==0.23.5
24
  idna==3.7
 
 
 
 
 
 
25
  Jinja2==3.1.4
26
  joblib==1.4.2
27
  jsonschema==4.23.0
 
 
28
  kiwisolver==1.4.5
 
 
29
  librosa==0.10.2.post1
 
 
 
30
  MarkupSafe==2.1.5
31
  matplotlib==3.9.1
 
 
 
 
 
 
 
 
 
32
  numpy==1.26.4
 
 
 
 
 
 
33
  pandas==2.2.2
34
- Pillow==10.4.0
 
 
 
 
 
35
  pydantic==2.8.2
 
 
 
 
 
 
36
  python-multipart==0.0.9
37
  pytz==2024.1
38
  PyYAML==6.0.1
 
 
39
  requests==2.32.3
 
 
 
 
40
  scikit-learn==1.5.1
41
  scipy==1.14.0
 
 
 
 
 
42
  soundfile==0.12.1
 
43
  starlette==0.37.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  tqdm==4.66.4
45
  transformers==4.42.4
 
 
 
 
 
 
46
  uvicorn==0.30.1
47
-
48
- # Any other necessary dependencies
49
- # Add your additional dependencies here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  absl-py==2.1.0
2
  aiofiles==23.2.1
3
  altair==5.3.0
4
+ annotated-types==0.7.0
5
  anyio==4.4.0
6
+ astunparse==1.6.3
7
  attrs==23.2.0
8
  audioread==3.0.1
9
  certifi==2024.7.4
10
+ cffi==1.16.0
11
  charset-normalizer==3.3.2
12
  click==8.1.7
13
+ contourpy==1.2.1
14
+ cycler==0.12.1
15
  decorator==4.4.2
16
+ dlib==19.24.4
17
+ dnspython==2.6.1
18
+ email_validator==2.2.0
19
+ exceptiongroup==1.2.2
20
  fastapi==0.111.1
21
+ fastapi-cli==0.0.4
22
+ ffmpy==0.3.2
23
+ filelock==3.15.4
24
+ flatbuffers==24.3.25
25
+ fonttools==4.53.1
26
+ fsspec==2024.6.1
27
+ gast==0.6.0
28
+ google-pasta==0.2.0
29
+ grad-cam
30
+ gradio==4.38.1
31
+ gradio_client==1.1.0
32
+ grpcio==1.64.1
33
+ h11==0.14.0
34
  h5py==3.11.0
35
+ httpcore==1.0.5
36
+ httptools==0.6.1
37
+ httpx==0.27.0
38
  huggingface-hub==0.23.5
39
  idna==3.7
40
+ imageio==2.34.2
41
+ imageio-ffmpeg==0.5.1
42
+ importlib_resources==6.4.0
43
+ imutils==0.5.4
44
+ jax==0.4.30
45
+ jaxlib==0.4.30
46
  Jinja2==3.1.4
47
  joblib==1.4.2
48
  jsonschema==4.23.0
49
+ jsonschema-specifications==2023.12.1
50
+ keras==3.4.1
51
  kiwisolver==1.4.5
52
+ lazy_loader==0.4
53
+ libclang==18.1.1
54
  librosa==0.10.2.post1
55
+ llvmlite==0.43.0
56
+ Markdown==3.6
57
+ markdown-it-py==3.0.0
58
  MarkupSafe==2.1.5
59
  matplotlib==3.9.1
60
+ mdurl==0.1.2
61
+ mediapipe==0.10.14
62
+ ml-dtypes==0.4.0
63
+ moviepy==1.0.3
64
+ mpmath==1.3.0
65
+ msgpack==1.0.8
66
+ namex==0.0.8
67
+ networkx==3.3
68
+ numba==0.60.0
69
  numpy==1.26.4
70
+ opencv-contrib-python==4.10.0.84
71
+ opencv-python==4.10.0.84
72
+ opt-einsum==3.3.0
73
+ optree==0.12.1
74
+ orjson==3.10.6
75
+ packaging==24.1
76
  pandas==2.2.2
77
+ pillow==10.4.0
78
+ platformdirs==4.2.2
79
+ pooch==1.8.2
80
+ proglog==0.1.10
81
+ protobuf==4.25.3
82
+ pycparser==2.22
83
  pydantic==2.8.2
84
+ pydantic_core==2.20.1
85
+ pydub==0.25.1
86
+ Pygments==2.18.0
87
+ pyparsing==3.1.2
88
+ python-dateutil==2.9.0.post0
89
+ python-dotenv==1.0.1
90
  python-multipart==0.0.9
91
  pytz==2024.1
92
  PyYAML==6.0.1
93
+ referencing==0.35.1
94
+ regex==2024.5.15
95
  requests==2.32.3
96
+ rich==13.7.1
97
+ rpds-py==0.19.0
98
+ ruff==0.5.2
99
+ safetensors==0.4.3
100
  scikit-learn==1.5.1
101
  scipy==1.14.0
102
+ semantic-version==2.10.0
103
+ shellingham==1.5.4
104
+ six==1.16.0
105
+ sniffio==1.3.1
106
+ sounddevice==0.4.7
107
  soundfile==0.12.1
108
+ soxr==0.3.7
109
  starlette==0.37.2
110
+ sympy==1.13.0
111
+ tensorboard==2.17.0
112
+ tensorboard-data-server==0.7.2
113
+ tensorflow==2.17.0
114
+ tensorflow-io-gcs-filesystem==0.37.1
115
+ termcolor==2.4.0
116
+ tf_keras==2.17.0
117
+ threadpoolctl==3.5.0
118
+ tokenizers==0.19.1
119
+ toml==0.10.2
120
+ tomlkit==0.12.0
121
+ toolz==0.12.1
122
+ torch==2.3.1
123
+ torchaudio==2.3.1
124
+ torchvision==0.18.1
125
  tqdm==4.66.4
126
  transformers==4.42.4
127
+ ttach==0.0.3
128
+ typer==0.12.3
129
+ typing_extensions==4.12.2
130
+ tzdata==2024.1
131
+ ujson==5.10.0
132
+ urllib3==2.2.2
133
  uvicorn==0.30.1
134
+ uvloop==0.19.0
135
+ watchfiles==0.22.0
136
+ wavio==0.0.9
137
+ websockets==11.0.3
138
+ Werkzeug==3.0.3
139
+ wrapt==1.16.0
tabs/__emotion_analysis.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ import gradio as gr
5
+
6
+ os.environ["TOKENIZERS_PARALLELISM"] = "true"
7
+
8
+ emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
9
+ emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
10
+ emotion_labels = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
11
+
12
+ def analyze_emotion(text):
13
+ try:
14
+ inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
15
+ outputs = emotion_model(**inputs)
16
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
17
+ max_prob, max_index = torch.max(probs, dim=1)
18
+ return emotion_labels[max_index.item()], f"{max_prob.item():.4f}"
19
+ except Exception as e:
20
+ print(f"Error in emotion analysis: {e}")
21
+ return "Error", "N/A"
22
+
23
+ def create_emotion_tab():
24
+ with gr.Row():
25
+ with gr.Column(scale=2):
26
+ input_text = gr.Textbox(value='I actually speak to the expets myself to give you the best value you can get', lines=5, placeholder="Enter text here...", label="Input Text")
27
+ with gr.Row():
28
+ clear_btn = gr.Button("Clear", scale=1)
29
+ submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit")
30
+ with gr.Column(scale=1):
31
+ output_emotion = gr.Textbox(label="Detected Emotion")
32
+ output_confidence = gr.Textbox(label="Emotion Confidence Score")
33
+
34
+ submit_btn.click(analyze_emotion, inputs=[input_text], outputs=[output_emotion, output_confidence])
35
+ clear_btn.click(lambda: ("", "", ""), outputs=[input_text, output_emotion, output_confidence])
36
+ gr.Examples(["I am so happy today!", "I feel terrible and sad.", "This is a neutral statement."], inputs=[input_text])
tabs/__sentiment_analysis.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ import gradio as gr
5
+
6
+ os.environ["TOKENIZERS_PARALLELISM"] = "true"
7
+
8
+ sentiment_tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
9
+ sentiment_model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
10
+ sentiment_labels = ["very negative", "negative", "neutral", "positive", "very positive"]
11
+
12
+ def analyze_sentiment(text):
13
+ try:
14
+ inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
15
+ outputs = sentiment_model(**inputs)
16
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
17
+ max_prob, max_index = torch.max(probs, dim=1)
18
+ return sentiment_labels[max_index.item()], f"{max_prob.item():.4f}"
19
+ except Exception as e:
20
+ print(f"Error in sentiment analysis: {e}")
21
+ return "Error", "N/A"
22
+
23
+ def create_sentiment_tab():
24
+ with gr.Row():
25
+ with gr.Column(scale=2):
26
+ input_text = gr.Textbox(value="I actually speak to the expets myself to give you the best value you can get", lines=5, placeholder="Enter text here...", label="Input Text")
27
+ with gr.Row():
28
+ clear_btn = gr.Button("Clear", scale=1)
29
+ submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit")
30
+ with gr.Column(scale=1):
31
+ output_sentiment = gr.Textbox(label="Detected Sentiment")
32
+ output_confidence = gr.Textbox(label="Sentiment Confidence Score")
33
+
34
+ submit_btn.click(analyze_sentiment, inputs=[input_text], outputs=[output_sentiment, output_confidence], queue=True)
35
+ clear_btn.click(lambda: ("", "", ""), outputs=[input_text, output_sentiment, output_confidence], queue=True)
36
+ gr.Examples(["I am so happy today!", "I feel terrible and sad.", "This is a neutral statement."], inputs=[input_text])
tabs/speech_emotion_recognition.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tabs/speech_emotion_recognition.py
2
+
3
+ import gradio as gr
4
+ import numpy as np
5
+ import librosa
6
+ import librosa.display
7
+ import matplotlib.pyplot as plt
8
+ from transformers import pipeline
9
+ import torch
10
+ import tempfile
11
+ import warnings
12
+ import os
13
+
14
+ # Suppress specific warnings from transformers if needed
15
+ warnings.filterwarnings("ignore", category=UserWarning, module='transformers')
16
+
17
+ # Determine the device
18
+ def get_device():
19
+ if torch.backends.mps.is_available():
20
+ device = torch.device("mps")
21
+ print("Using MPS device for inference.")
22
+ elif torch.cuda.is_available():
23
+ device = torch.device("cuda")
24
+ print("Using CUDA device for inference.")
25
+ else:
26
+ device = torch.device("cpu")
27
+ print("Using CPU for inference.")
28
+ return device
29
+
30
+ device = get_device()
31
+
32
+ # Initialize the pipelines with the specified device
33
+ try:
34
+ emotion_model = pipeline(
35
+ "audio-classification",
36
+ model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
37
+ device=0 if device.type == "cuda" else ("mps" if device.type == "mps" else -1)
38
+ )
39
+ print("Emotion model loaded successfully.")
40
+ except Exception as e:
41
+ print(f"Error loading emotion model: {e}")
42
+ emotion_model = None
43
+
44
+ try:
45
+ transcription_model = pipeline(
46
+ "automatic-speech-recognition",
47
+ model="facebook/wav2vec2-base-960h",
48
+ device=0 if device.type == "cuda" else ("mps" if device.type == "mps" else -1)
49
+ )
50
+ print("Transcription model loaded successfully.")
51
+ except Exception as e:
52
+ print(f"Error loading transcription model: {e}")
53
+ transcription_model = None
54
+
55
+ # Emotion Mapping
56
+ emotion_mapping = {
57
+ "angry": (0.8, 0.8, -0.5),
58
+ "happy": (0.6, 0.6, 0.8),
59
+ "sad": (-0.6, -0.4, -0.6),
60
+ "neutral": (0, 0, 0),
61
+ "fear": (0.3, -0.3, -0.7),
62
+ "surprise": (0.4, 0.2, 0.2),
63
+ "disgust": (0.2, 0.5, -0.6),
64
+ "calm": (-0.2, 0.1, 0.3),
65
+ "excited": (0.7, 0.5, 0.7),
66
+ "frustrated": (0.6, 0.5, -0.4)
67
+ }
68
+
69
+ def process_audio_emotion(audio_file):
70
+ """
71
+ Processes the input audio file to perform transcription and emotion recognition.
72
+ Generates waveform and mel spectrogram plots.
73
+
74
+ Returns:
75
+ A tuple containing:
76
+ - Transcription (str)
77
+ - Emotion (str)
78
+ - Confidence (%) (float)
79
+ - Arousal (float)
80
+ - Dominance (float)
81
+ - Valence (float)
82
+ - Waveform Plot (str: filepath)
83
+ - Mel Spectrogram Plot (str: filepath)
84
+ """
85
+ if not audio_file:
86
+ return (
87
+ "No audio file provided.", # Transcription (textbox)
88
+ None, # Emotion (textbox)
89
+ None, # Confidence (%) (number)
90
+ None, # Arousal (number)
91
+ None, # Dominance (number)
92
+ None, # Valence (number)
93
+ None, # Waveform Plot (image)
94
+ None # Mel Spectrogram Plot (image)
95
+ )
96
+
97
+ try:
98
+ y, sr = librosa.load(audio_file, sr=None)
99
+
100
+ # Transcription
101
+ if transcription_model:
102
+ transcription_result = transcription_model(audio_file)
103
+ transcription = transcription_result.get("text", "N/A")
104
+ else:
105
+ transcription = "Transcription model not loaded."
106
+
107
+ # Emotion Recognition
108
+ if emotion_model:
109
+ emotion_results = emotion_model(audio_file)
110
+ if emotion_results:
111
+ emotion_result = emotion_results[0]
112
+ emotion = emotion_result.get("label", "Unknown").lower()
113
+ confidence = emotion_result.get("score", 0.0) * 100 # Convert to percentage
114
+ arousal, dominance, valence = emotion_mapping.get(emotion, (0.0, 0.0, 0.0))
115
+ else:
116
+ emotion = "No emotion detected."
117
+ confidence = 0.0
118
+ arousal, dominance, valence = 0.0, 0.0, 0.0
119
+ else:
120
+ emotion = "Emotion model not loaded."
121
+ confidence = 0.0
122
+ arousal, dominance, valence = 0.0, 0.0, 0.0
123
+
124
+ # Plotting Waveform
125
+ plt.figure(figsize=(10, 4))
126
+ librosa.display.waveshow(y, sr=sr)
127
+ plt.title("Waveform")
128
+ plt.xlabel("Time (s)")
129
+ plt.ylabel("Amplitude")
130
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_waveform:
131
+ plt.savefig(tmp_waveform.name, bbox_inches='tight')
132
+ waveform_plot_path = tmp_waveform.name
133
+ plt.close()
134
+
135
+ # Plotting Mel Spectrogram
136
+ mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
137
+ plt.figure(figsize=(10, 4))
138
+ librosa.display.specshow(librosa.power_to_db(mel_spec, ref=np.max), sr=sr, x_axis='time', y_axis='mel')
139
+ plt.colorbar(format='%+2.0f dB')
140
+ plt.title("Mel Spectrogram")
141
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_mel:
142
+ plt.savefig(tmp_mel.name, bbox_inches='tight')
143
+ mel_spec_plot_path = tmp_mel.name
144
+ plt.close()
145
+
146
+ return (
147
+ transcription, # Transcription (textbox)
148
+ emotion.capitalize(), # Emotion (textbox)
149
+ confidence, # Confidence (%) (number)
150
+ arousal, # Arousal (number)
151
+ dominance, # Dominance (number)
152
+ valence, # Valence (number)
153
+ waveform_plot_path, # Waveform Plot (image)
154
+ mel_spec_plot_path # Mel Spectrogram Plot (image)
155
+ )
156
+ except Exception as e:
157
+ return (
158
+ f"Error: {str(e)}", # Transcription (textbox)
159
+ None, # Emotion (textbox)
160
+ None, # Confidence (%) (number)
161
+ None, # Arousal (number)
162
+ None, # Dominance (number)
163
+ None, # Valence (number)
164
+ None, # Waveform Plot (image)
165
+ None # Mel Spectrogram Plot (image)
166
+ )
167
+
168
+ def create_emotion_recognition_tab():
169
+ """
170
+ Creates the Emotion Recognition tab in the Gradio interface.
171
+ """
172
+ with gr.Row():
173
+ with gr.Column(scale=2):
174
+ input_audio = gr.Audio(label="Input Audio", type="filepath")
175
+ gr.Examples(
176
+ examples=["./assets/audio/fitness.wav"],
177
+ inputs=[input_audio],
178
+ label="Examples"
179
+ )
180
+ with gr.Column(scale=1):
181
+ transcription_output = gr.Textbox(label="Transcription", interactive=False)
182
+ emotion_output = gr.Textbox(label="Emotion", interactive=False)
183
+ confidence_output = gr.Number(label="Confidence (%)", interactive=False)
184
+ arousal_output = gr.Number(label="Arousal (Level of Energy)", interactive=False)
185
+ dominance_output = gr.Number(label="Dominance (Degree of Control)", interactive=False)
186
+ valence_output = gr.Number(label="Valence (Positivity/Negativity)", interactive=False)
187
+ with gr.Column(scale=1):
188
+ waveform_plot = gr.Image(label="Waveform")
189
+ mel_spec_plot = gr.Image(label="Mel Spectrogram")
190
+
191
+ input_audio.change(
192
+ fn=process_audio_emotion,
193
+ inputs=[input_audio],
194
+ outputs=[
195
+ transcription_output,
196
+ emotion_output,
197
+ confidence_output,
198
+ arousal_output,
199
+ dominance_output,
200
+ valence_output,
201
+ waveform_plot,
202
+ mel_spec_plot
203
+ ]
204
+ )
205
+
206
+ # Call create_emotion_recognition_tab to create the Gradio interface
tabs/speech_stress_analysis.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tabs/speech_stress_analysis.py
2
+
3
+ import gradio as gr
4
+ import librosa
5
+ import librosa.display
6
+ import numpy as np
7
+ import matplotlib.pyplot as plt
8
+ import tempfile
9
+ import warnings
10
+
11
+ # Suppress specific warnings from transformers if needed
12
+ warnings.filterwarnings("ignore", category=UserWarning, module='transformers')
13
+
14
+ def extract_audio_features(audio_file):
15
+ y, sr = librosa.load(audio_file, sr=None)
16
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
17
+ pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
18
+ pitches = pitches[(magnitudes > np.median(magnitudes)) & (pitches > 0)]
19
+ energy = librosa.feature.rms(y=y)[0]
20
+ return mfccs, pitches, energy, y, sr
21
+
22
+ def analyze_voice_stress(audio_file):
23
+ if not audio_file:
24
+ return "No audio file provided.", None
25
+
26
+ try:
27
+ mfccs, pitches, energy, y, sr = extract_audio_features(audio_file)
28
+
29
+ # Calculate variances
30
+ var_mfccs = np.var(mfccs, axis=1).mean() # Mean variance across MFCC coefficients
31
+ var_energy = np.var(energy) # Variance of RMS energy
32
+ var_pitches = np.var(pitches) if len(pitches) > 0 else 0 # Variance of pitches if present
33
+
34
+ # Debugging: Print individual variances
35
+ print(f"Variance MFCCs (mean across coefficients): {var_mfccs}")
36
+ print(f"Variance Energy: {var_energy}")
37
+ print(f"Variance Pitches: {var_pitches}")
38
+
39
+ # Normalize each variance using Z-Score Standardization
40
+ mfccs_mean = 1000
41
+ mfccs_std = 500
42
+ energy_mean = 0.005
43
+ energy_std = 0.005
44
+ pitches_mean = 500000
45
+ pitches_std = 200000
46
+
47
+ norm_var_mfccs = (var_mfccs - mfccs_mean) / mfccs_std
48
+ norm_var_energy = (var_energy - energy_mean) / energy_std
49
+ norm_var_pitches = (var_pitches - pitches_mean) / pitches_std if var_pitches > 0 else 0
50
+
51
+ # Debugging: Print normalized variances
52
+ print(f"Normalized Variance MFCCs: {norm_var_mfccs}")
53
+ print(f"Normalized Variance Energy: {norm_var_energy}")
54
+ print(f"Normalized Variance Pitches: {norm_var_pitches}")
55
+
56
+ # Combine normalized variances
57
+ stress_level = np.mean([
58
+ norm_var_mfccs,
59
+ norm_var_energy,
60
+ norm_var_pitches
61
+ ]) if var_pitches > 0 else np.mean([norm_var_mfccs, norm_var_energy])
62
+
63
+ # Debugging: Print stress_level before normalization
64
+ print(f"Calculated Stress Level (before scaling): {stress_level}")
65
+
66
+ # Scale to 0-100%
67
+ normalized_stress = (stress_level + 3) / 6 * 100 # Maps -3 to 0%, +3 to 100%
68
+ normalized_stress = np.clip(normalized_stress, 0, 100) # Ensure within 0-100%
69
+
70
+ # Debugging: Print normalized_stress
71
+ print(f"Normalized Stress Level: {normalized_stress}")
72
+
73
+ # Plotting
74
+ fig, axs = plt.subplots(3, 1, figsize=(10, 12))
75
+
76
+ # MFCCs
77
+ img_mfcc = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[0])
78
+ axs[0].set_title('MFCCs')
79
+ axs[0].set_ylabel('MFCC Coefficient')
80
+ fig.colorbar(img_mfcc, ax=axs[0])
81
+
82
+ # Pitch
83
+ axs[1].plot(pitches)
84
+ axs[1].set_title('Pitch')
85
+ axs[1].set_ylabel('Frequency (Hz)')
86
+
87
+ # Energy
88
+ axs[2].plot(energy)
89
+ axs[2].set_title('Energy (RMS)')
90
+ axs[2].set_ylabel('RMS Energy')
91
+ axs[2].set_xlabel('Frames')
92
+
93
+ plt.tight_layout()
94
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file:
95
+ plt.savefig(temp_file.name)
96
+ plot_path = temp_file.name
97
+ plt.close()
98
+
99
+ # Interpretation
100
+ if normalized_stress < 33:
101
+ stress_interpretation = "Low"
102
+ elif normalized_stress < 66:
103
+ stress_interpretation = "Medium"
104
+ else:
105
+ stress_interpretation = "High"
106
+
107
+ return f"{normalized_stress:.2f}% - {stress_interpretation} Stress", plot_path
108
+ except Exception as e:
109
+ return f"Error: {str(e)}", None
110
+
111
+ def create_voice_stress_tab():
112
+ with gr.Row():
113
+ with gr.Column(scale=2):
114
+ input_audio = gr.Audio(label="Input Audio", type="filepath")
115
+ clear_btn = gr.Button("Clear", scale=1)
116
+ with gr.Column(scale=1):
117
+ output_stress = gr.Label(label="Stress Level")
118
+ output_plot = gr.Image(label="Stress Analysis Plot")
119
+
120
+ # Automatically trigger analysis when an audio file is uploaded
121
+ input_audio.change(analyze_voice_stress, inputs=[input_audio], outputs=[output_stress, output_plot])
122
+
123
+ clear_btn.click(lambda: (None, None), outputs=[input_audio, output_stress, output_plot])
124
+
125
+ gr.Examples(["./assets/audio/fitness.wav"], inputs=[input_audio])
126
+