Spaces:
Runtime error
Runtime error
Commit
Β·
38e0e4a
1
Parent(s):
2ae67c3
- .DS_Store +0 -0
- README.md +48 -2
- app.py +0 -11
- app/au_processing.py +64 -0
- app/authors.py +34 -0
- app/config.py +49 -0
- app/description.py +46 -0
- app/image_processing.py +49 -0
- app/plot.py +29 -0
- app/sleep_quality_processing.py +94 -0
- app/video_processing.py +132 -0
- assets/.DS_Store +0 -0
- requirements.txt +104 -14
- tabs/__emotion_analysis.py +36 -0
- tabs/__sentiment_analysis.py +36 -0
- tabs/speech_emotion_recognition.py +206 -0
- tabs/speech_stress_analysis.py +126 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
README.md
CHANGED
@@ -4,10 +4,56 @@ emoji: ππ²ππ₯π₯΄π±π‘
|
|
4 |
colorFrom: blue
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
|
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
colorFrom: blue
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.24.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
+
short_description: A tool to detect Stress, Anxiety and Depression
|
12 |
---
|
13 |
|
14 |
+
## Technologies
|
15 |
+
|
16 |
+
This project utilizes various Python scripts for different aspects of analysis and recognition:
|
17 |
+
|
18 |
+
- `blink_detection.py`: Detects and analyzes blinking patterns.
|
19 |
+
- `body_movement_analysis.py`: Analyzes body movements.
|
20 |
+
- `emotion_analysis.py`: Analyzes emotional states.
|
21 |
+
- `face_expressions.py`: Recognizes facial expressions.
|
22 |
+
- `FACS_analysis_sad.py`: Performs Facial Action Coding System analysis for sadness.
|
23 |
+
- `gaze_estimation.py`: Estimates gaze direction.
|
24 |
+
- `head_posture_detection.py`: Detects head posture.
|
25 |
+
- `heart_rate_variability.py`: Analyzes heart rate variability.
|
26 |
+
- `posture_analysis.py`: Analyzes posture.
|
27 |
+
- `roberta_chatbot.py`: Chatbot using the RoBERTa model.
|
28 |
+
- `sentiment_analysis.py`: Performs sentiment analysis.
|
29 |
+
- `skin_analysis.py`: Analyzes skin conditions.
|
30 |
+
- `sleep_quality.py`: Evaluates sleep quality.
|
31 |
+
- `speech_emotion_recognition.py`: Recognizes emotions from speech.
|
32 |
+
- `speech_stress_analysis.py`: Analyzes stress levels from speech.
|
33 |
+
|
34 |
+
These scripts combine to provide comprehensive analysis capabilities for various aspects of human behavior and physiology.
|
35 |
+
|
36 |
+
## Upload Trick to HG
|
37 |
+
|
38 |
+
# Track large files with Git LFS
|
39 |
+
|
40 |
+
git lfs track "assets/models/_.dat"
|
41 |
+
git lfs track "assets/models/_.pt"
|
42 |
+
|
43 |
+
# Add the .gitattributes file and commit it
|
44 |
+
|
45 |
+
git add .gitattributes
|
46 |
+
git commit -m "Track large files with Git LFS"
|
47 |
+
|
48 |
+
# Add your large files and commit them
|
49 |
+
|
50 |
+
git add assets/models/shape_predictor_68_face_landmarks.dat
|
51 |
+
git add assets/models/FER_dinamic_LSTM_IEMOCAP.pt
|
52 |
+
git add assets/models/FER_static_ResNet50_AffectNet.pt
|
53 |
+
git commit -m "Add large files"
|
54 |
+
|
55 |
+
# Add remaining files, commit, and push
|
56 |
+
|
57 |
+
git add .
|
58 |
+
git commit -m 'pre-launch'
|
59 |
+
git push origin main --force
|
app.py
CHANGED
@@ -4,13 +4,6 @@ from tabs.FACS_analysis import create_facs_analysis_tab
|
|
4 |
from ui_components import CUSTOM_CSS, HEADER_HTML, DISCLAIMER_HTML
|
5 |
import spaces # Importing spaces to utilize Zero GPU
|
6 |
|
7 |
-
# Initialize Zero GPU
|
8 |
-
if torch.cuda.is_available():
|
9 |
-
zero = torch.Tensor([0]).cuda()
|
10 |
-
print(f"Initial device: {zero.device}")
|
11 |
-
else:
|
12 |
-
zero = torch.Tensor([0])
|
13 |
-
print("CUDA is not available. Using CPU.")
|
14 |
|
15 |
# Define the tab structure
|
16 |
TAB_STRUCTURE = [
|
@@ -22,10 +15,6 @@ TAB_STRUCTURE = [
|
|
22 |
# Decorate GPU-dependent function with Zero GPU
|
23 |
@spaces.GPU(duration=120) # Allocates GPU for 120 seconds when needed
|
24 |
def create_demo():
|
25 |
-
if torch.cuda.is_available():
|
26 |
-
print(f"Device inside create_demo: {zero.device}")
|
27 |
-
else:
|
28 |
-
print("CUDA is not available inside create_demo.")
|
29 |
|
30 |
# Gradio blocks to create the interface
|
31 |
with gr.Blocks(css=CUSTOM_CSS) as demo:
|
|
|
4 |
from ui_components import CUSTOM_CSS, HEADER_HTML, DISCLAIMER_HTML
|
5 |
import spaces # Importing spaces to utilize Zero GPU
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
# Define the tab structure
|
9 |
TAB_STRUCTURE = [
|
|
|
15 |
# Decorate GPU-dependent function with Zero GPU
|
16 |
@spaces.GPU(duration=120) # Allocates GPU for 120 seconds when needed
|
17 |
def create_demo():
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# Gradio blocks to create the interface
|
20 |
with gr.Blocks(css=CUSTOM_CSS) as demo:
|
app/au_processing.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
import cv2
|
4 |
+
import torch
|
5 |
+
from PIL import Image
|
6 |
+
from app.model import pth_model_static, cam, pth_processing
|
7 |
+
from app.face_utils import get_box
|
8 |
+
import mediapipe as mp
|
9 |
+
|
10 |
+
mp_face_mesh = mp.solutions.face_mesh
|
11 |
+
|
12 |
+
def preprocess_frame_and_predict_aus(frame):
|
13 |
+
if len(frame.shape) == 2:
|
14 |
+
frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
|
15 |
+
elif frame.shape[2] == 4:
|
16 |
+
frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
|
17 |
+
|
18 |
+
with mp_face_mesh.FaceMesh(
|
19 |
+
max_num_faces=1,
|
20 |
+
refine_landmarks=False,
|
21 |
+
min_detection_confidence=0.5,
|
22 |
+
min_tracking_confidence=0.5
|
23 |
+
) as face_mesh:
|
24 |
+
results = face_mesh.process(frame)
|
25 |
+
|
26 |
+
if results.multi_face_landmarks:
|
27 |
+
h, w = frame.shape[:2]
|
28 |
+
for fl in results.multi_face_landmarks:
|
29 |
+
startX, startY, endX, endY = get_box(fl, w, h)
|
30 |
+
cur_face = frame[startY:endY, startX:endX]
|
31 |
+
cur_face_n = pth_processing(Image.fromarray(cur_face))
|
32 |
+
|
33 |
+
with torch.no_grad():
|
34 |
+
features = pth_model_static(cur_face_n)
|
35 |
+
au_intensities = features_to_au_intensities(features)
|
36 |
+
|
37 |
+
grayscale_cam = cam(input_tensor=cur_face_n)
|
38 |
+
grayscale_cam = grayscale_cam[0, :]
|
39 |
+
cur_face_hm = cv2.resize(cur_face, (224, 224))
|
40 |
+
cur_face_hm = np.float32(cur_face_hm) / 255
|
41 |
+
heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=True)
|
42 |
+
|
43 |
+
return cur_face, au_intensities, heatmap
|
44 |
+
|
45 |
+
return None, None, None
|
46 |
+
|
47 |
+
def features_to_au_intensities(features):
|
48 |
+
features_np = features.detach().cpu().numpy()[0]
|
49 |
+
au_intensities = (features_np - features_np.min()) / (features_np.max() - features_np.min())
|
50 |
+
return au_intensities[:24] # Assuming we want 24 AUs
|
51 |
+
|
52 |
+
def au_statistics_plot(frames, au_intensities_list):
|
53 |
+
fig, ax = plt.subplots(figsize=(12, 6))
|
54 |
+
au_intensities_array = np.array(au_intensities_list)
|
55 |
+
|
56 |
+
for i in range(au_intensities_array.shape[1]):
|
57 |
+
ax.plot(frames, au_intensities_array[:, i], label=f'AU{i+1}')
|
58 |
+
|
59 |
+
ax.set_xlabel('Frame')
|
60 |
+
ax.set_ylabel('AU Intensity')
|
61 |
+
ax.set_title('Action Unit Intensities Over Time')
|
62 |
+
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
|
63 |
+
plt.tight_layout()
|
64 |
+
return fig
|
app/authors.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
File: authors.py
|
3 |
+
Author: Elena Ryumina and Dmitry Ryumin
|
4 |
+
Description: About the authors.
|
5 |
+
License: MIT License
|
6 |
+
"""
|
7 |
+
|
8 |
+
|
9 |
+
AUTHORS = """
|
10 |
+
Authors: [Elena Ryumina](https://github.com/ElenaRyumina), [Dmitry Ryumin](https://github.com/DmitryRyumin), [Denis Dresvyanskiy](https://www.uni-ulm.de/en/nt/staff/research-assistants/dresvyanskiy/), [Maxim Markitantov](https://hci.nw.ru/en/employees/10) and [Alexey Karpov](https://hci.nw.ru/en/employees/1)
|
11 |
+
|
12 |
+
Authorship contribution:
|
13 |
+
|
14 |
+
App developers: ``Elena Ryumina`` and ``Dmitry Ryumin``
|
15 |
+
|
16 |
+
Methodology developers: ``Elena Ryumina``, ``Denis Dresvyanskiy`` and ``Alexey Karpov``
|
17 |
+
|
18 |
+
Model developer: ``Elena Ryumina``
|
19 |
+
|
20 |
+
TensorFlow to PyTorch model converters: ``Maxim Markitantov`` and ``Elena Ryumina``
|
21 |
+
|
22 |
+
Citation
|
23 |
+
|
24 |
+
If you are using EMO-AffectNetModel in your research, please consider to cite research [paper](https://www.sciencedirect.com/science/article/pii/S0925231222012656). Here is an example of BibTeX entry:
|
25 |
+
|
26 |
+
<div class="highlight highlight-text-bibtex notranslate position-relative overflow-auto" dir="auto"><pre><span class="pl-k">@article</span>{<span class="pl-en">RYUMINA2022</span>,
|
27 |
+
<span class="pl-s">title</span> = <span class="pl-s"><span class="pl-pds">{</span>In Search of a Robust Facial Expressions Recognition Model: A Large-Scale Visual Cross-Corpus Study<span class="pl-pds">}</span></span>,
|
28 |
+
<span class="pl-s">author</span> = <span class="pl-s"><span class="pl-pds">{</span>Elena Ryumina and Denis Dresvyanskiy and Alexey Karpov<span class="pl-pds">}</span></span>,
|
29 |
+
<span class="pl-s">journal</span> = <span class="pl-s"><span class="pl-pds">{</span>Neurocomputing<span class="pl-pds">}</span></span>,
|
30 |
+
<span class="pl-s">year</span> = <span class="pl-s"><span class="pl-pds">{</span>2022<span class="pl-pds">}</span></span>,
|
31 |
+
<span class="pl-s">doi</span> = <span class="pl-s"><span class="pl-pds">{</span>10.1016/j.neucom.2022.10.013<span class="pl-pds">}</span></span>,
|
32 |
+
<span class="pl-s">url</span> = <span class="pl-s"><span class="pl-pds">{</span>https://www.sciencedirect.com/science/article/pii/S0925231222012656<span class="pl-pds">}</span></span>,
|
33 |
+
}</div>
|
34 |
+
"""
|
app/config.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
File: config.py
|
3 |
+
Author: Elena Ryumina and Dmitry Ryumin
|
4 |
+
Description: Configuration file.
|
5 |
+
License: MIT License
|
6 |
+
"""
|
7 |
+
|
8 |
+
import toml
|
9 |
+
from typing import Dict
|
10 |
+
from types import SimpleNamespace
|
11 |
+
|
12 |
+
|
13 |
+
def flatten_dict(prefix: str, d: Dict) -> Dict:
|
14 |
+
result = {}
|
15 |
+
|
16 |
+
for k, v in d.items():
|
17 |
+
if isinstance(v, dict):
|
18 |
+
result.update(flatten_dict(f"{prefix}{k}_", v))
|
19 |
+
else:
|
20 |
+
result[f"{prefix}{k}"] = v
|
21 |
+
|
22 |
+
return result
|
23 |
+
|
24 |
+
|
25 |
+
config = toml.load("config.toml")
|
26 |
+
|
27 |
+
config_data = flatten_dict("", config)
|
28 |
+
|
29 |
+
config_data = SimpleNamespace(**config_data)
|
30 |
+
|
31 |
+
DICT_EMO = {
|
32 |
+
0: "Neutral",
|
33 |
+
1: "Happiness",
|
34 |
+
2: "Sadness",
|
35 |
+
3: "Surprise",
|
36 |
+
4: "Fear",
|
37 |
+
5: "Disgust",
|
38 |
+
6: "Anger",
|
39 |
+
}
|
40 |
+
|
41 |
+
COLORS = {
|
42 |
+
0: 'blue',
|
43 |
+
1: 'orange',
|
44 |
+
2: 'green',
|
45 |
+
3: 'red',
|
46 |
+
4: 'purple',
|
47 |
+
5: 'brown',
|
48 |
+
6: 'pink'
|
49 |
+
}
|
app/description.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
File: description.py
|
3 |
+
Author: Elena Ryumina and Dmitry Ryumin
|
4 |
+
Description: Project description for the Gradio app.
|
5 |
+
License: MIT License
|
6 |
+
"""
|
7 |
+
|
8 |
+
# Importing necessary components for the Gradio app
|
9 |
+
from app.config import config_data
|
10 |
+
|
11 |
+
DESCRIPTION_STATIC = f"""\
|
12 |
+
# Static Facial Expression Recognition
|
13 |
+
<div class="app-flex-container">
|
14 |
+
<img src="https://img.shields.io/badge/version-v{config_data.APP_VERSION}-rc0" alt="Version">
|
15 |
+
<a href="https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FElenaRyumina%2FFacial_Expression_Recognition"><img src="https://api.visitorbadge.io/api/combined?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FElenaRyumina%2FFacial_Expression_Recognition&countColor=%23263759&style=flat" /></a>
|
16 |
+
<a href="https://paperswithcode.com/paper/in-search-of-a-robust-facial-expressions"><img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/in-search-of-a-robust-facial-expressions/facial-expression-recognition-on-affectnet" /></a>
|
17 |
+
</div>
|
18 |
+
"""
|
19 |
+
|
20 |
+
DESCRIPTION_DYNAMIC = f"""\
|
21 |
+
# Dynamic Facial Expression Recognition
|
22 |
+
<div class="app-flex-container">
|
23 |
+
<img src="https://img.shields.io/badge/version-v{config_data.APP_VERSION}-rc0" alt="Version">
|
24 |
+
<a href="https://paperswithcode.com/paper/in-search-of-a-robust-facial-expressions"><img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/in-search-of-a-robust-facial-expressions/facial-expression-recognition-on-affectnet" /></a>
|
25 |
+
</div>
|
26 |
+
"""
|
27 |
+
|
28 |
+
DESCRIPTION_SLEEP_QUALITY = """
|
29 |
+
# Sleep Quality Analysis
|
30 |
+
|
31 |
+
This tab analyzes sleep quality based on facial features, focusing on skin tone and eye bags.
|
32 |
+
|
33 |
+
## How to use:
|
34 |
+
1. Upload a video of a person's face.
|
35 |
+
2. Click 'Submit' to process the video.
|
36 |
+
3. View the results, including:
|
37 |
+
- Original video
|
38 |
+
- Processed face video
|
39 |
+
- Sleep quality analysis video
|
40 |
+
- Eye bags detection image
|
41 |
+
- Sleep quality statistics over time
|
42 |
+
|
43 |
+
The analysis provides insights into potential sleep issues based on visual cues.
|
44 |
+
|
45 |
+
Note: This analysis is for informational purposes only and should not be considered a medical diagnosis. Always consult with a healthcare professional for sleep-related concerns.
|
46 |
+
"""
|
app/image_processing.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import cv2
|
3 |
+
from PIL import Image
|
4 |
+
import torch
|
5 |
+
from app.model import pth_model_static, cam, pth_processing
|
6 |
+
from app.face_utils import get_box
|
7 |
+
from app.config import DICT_EMO
|
8 |
+
from pytorch_grad_cam.utils.image import show_cam_on_image
|
9 |
+
import mediapipe as mp
|
10 |
+
|
11 |
+
mp_face_mesh = mp.solutions.face_mesh
|
12 |
+
|
13 |
+
def preprocess_image_and_predict(inp):
|
14 |
+
inp = np.array(inp)
|
15 |
+
|
16 |
+
if inp is None:
|
17 |
+
return None, None, None
|
18 |
+
|
19 |
+
try:
|
20 |
+
h, w = inp.shape[:2]
|
21 |
+
except Exception:
|
22 |
+
return None, None, None
|
23 |
+
|
24 |
+
with mp_face_mesh.FaceMesh(
|
25 |
+
max_num_faces=1,
|
26 |
+
refine_landmarks=False,
|
27 |
+
min_detection_confidence=0.5,
|
28 |
+
min_tracking_confidence=0.5,
|
29 |
+
) as face_mesh:
|
30 |
+
results = face_mesh.process(inp)
|
31 |
+
if results.multi_face_landmarks:
|
32 |
+
for fl in results.multi_face_landmarks:
|
33 |
+
startX, startY, endX, endY = get_box(fl, w, h)
|
34 |
+
cur_face = inp[startY:endY, startX:endX]
|
35 |
+
cur_face_n = pth_processing(Image.fromarray(cur_face))
|
36 |
+
with torch.no_grad():
|
37 |
+
prediction = (
|
38 |
+
torch.nn.functional.softmax(pth_model_static(cur_face_n), dim=1)
|
39 |
+
.detach()
|
40 |
+
.numpy()[0]
|
41 |
+
)
|
42 |
+
confidences = {DICT_EMO[i]: float(prediction[i]) for i in range(7)}
|
43 |
+
grayscale_cam = cam(input_tensor=cur_face_n)
|
44 |
+
grayscale_cam = grayscale_cam[0, :]
|
45 |
+
cur_face_hm = cv2.resize(cur_face,(224,224))
|
46 |
+
cur_face_hm = np.float32(cur_face_hm) / 255
|
47 |
+
heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=True)
|
48 |
+
|
49 |
+
return cur_face, heatmap, confidences
|
app/plot.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
File: config.py
|
3 |
+
Author: Elena Ryumina and Dmitry Ryumin
|
4 |
+
Description: Plotting statistical information.
|
5 |
+
License: MIT License
|
6 |
+
"""
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import numpy as np
|
9 |
+
|
10 |
+
# Importing necessary components for the Gradio app
|
11 |
+
from app.config import DICT_EMO, COLORS
|
12 |
+
|
13 |
+
|
14 |
+
def statistics_plot(frames, probs):
|
15 |
+
fig, ax = plt.subplots(figsize=(10, 4))
|
16 |
+
fig.subplots_adjust(left=0.07, bottom=0.14, right=0.98, top=0.8, wspace=0, hspace=0)
|
17 |
+
# Π£ΡΡΠ°Π½ΠΎΠ²ΠΊΠ° ΠΏΠ°ΡΠ°ΠΌΠ΅ΡΡΠΎΠ² left, bottom, right, top, ΡΡΠΎΠ±Ρ Π²ΡΠ΄Π΅Π»ΠΈΡΡ ΠΌΠ΅ΡΡΠΎ Π΄Π»Ρ Π»Π΅Π³Π΅Π½Π΄Ρ ΠΈ Π½Π°Π·Π²Π°Π½ΠΈΠΉ ΠΎΡΠ΅ΠΉ
|
18 |
+
probs = np.array(probs)
|
19 |
+
for i in range(7):
|
20 |
+
try:
|
21 |
+
ax.plot(frames, probs[:, i], label=DICT_EMO[i], c=COLORS[i])
|
22 |
+
except Exception:
|
23 |
+
return None
|
24 |
+
|
25 |
+
ax.legend(loc='upper center', bbox_to_anchor=(0.47, 1.2), ncol=7, fontsize=12)
|
26 |
+
ax.set_xlabel('Frames', fontsize=12) # ΠΠΎΠ±Π°Π²Π»ΡΠ΅ΠΌ ΠΏΠΎΠ΄ΠΏΠΈΡΡ ΠΊ ΠΎΡΠΈ X
|
27 |
+
ax.set_ylabel('Probability', fontsize=12) # ΠΠΎΠ±Π°Π²Π»ΡΠ΅ΠΌ ΠΏΠΎΠ΄ΠΏΠΈΡΡ ΠΊ ΠΎΡΠΈ Y
|
28 |
+
ax.grid(True)
|
29 |
+
return plt
|
app/sleep_quality_processing.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import mediapipe as mp
|
5 |
+
from app.face_utils import get_box
|
6 |
+
|
7 |
+
mp_face_mesh = mp.solutions.face_mesh
|
8 |
+
|
9 |
+
def preprocess_video_and_predict_sleep_quality(video):
|
10 |
+
cap = cv2.VideoCapture(video)
|
11 |
+
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
12 |
+
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
13 |
+
fps = np.round(cap.get(cv2.CAP_PROP_FPS))
|
14 |
+
|
15 |
+
path_save_video_original = 'result_original.mp4'
|
16 |
+
path_save_video_face = 'result_face.mp4'
|
17 |
+
path_save_video_sleep = 'result_sleep.mp4'
|
18 |
+
|
19 |
+
vid_writer_original = cv2.VideoWriter(path_save_video_original, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
20 |
+
vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
|
21 |
+
vid_writer_sleep = cv2.VideoWriter(path_save_video_sleep, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
|
22 |
+
|
23 |
+
frames = []
|
24 |
+
sleep_quality_scores = []
|
25 |
+
eye_bags_images = []
|
26 |
+
|
27 |
+
with mp_face_mesh.FaceMesh(
|
28 |
+
max_num_faces=1,
|
29 |
+
refine_landmarks=False,
|
30 |
+
min_detection_confidence=0.5,
|
31 |
+
min_tracking_confidence=0.5) as face_mesh:
|
32 |
+
|
33 |
+
while cap.isOpened():
|
34 |
+
ret, frame = cap.read()
|
35 |
+
if not ret:
|
36 |
+
break
|
37 |
+
|
38 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
39 |
+
results = face_mesh.process(frame_rgb)
|
40 |
+
|
41 |
+
if results.multi_face_landmarks:
|
42 |
+
for fl in results.multi_face_landmarks:
|
43 |
+
startX, startY, endX, endY = get_box(fl, w, h)
|
44 |
+
cur_face = frame_rgb[startY:endY, startX:endX]
|
45 |
+
|
46 |
+
sleep_quality_score, eye_bags_image = analyze_sleep_quality(cur_face)
|
47 |
+
sleep_quality_scores.append(sleep_quality_score)
|
48 |
+
eye_bags_images.append(cv2.resize(eye_bags_image, (224, 224)))
|
49 |
+
|
50 |
+
sleep_quality_viz = create_sleep_quality_visualization(cur_face, sleep_quality_score)
|
51 |
+
|
52 |
+
cur_face = cv2.resize(cur_face, (224, 224))
|
53 |
+
|
54 |
+
vid_writer_face.write(cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR))
|
55 |
+
vid_writer_sleep.write(sleep_quality_viz)
|
56 |
+
|
57 |
+
vid_writer_original.write(frame)
|
58 |
+
frames.append(len(frames) + 1)
|
59 |
+
|
60 |
+
cap.release()
|
61 |
+
vid_writer_original.release()
|
62 |
+
vid_writer_face.release()
|
63 |
+
vid_writer_sleep.release()
|
64 |
+
|
65 |
+
sleep_stat = sleep_quality_statistics_plot(frames, sleep_quality_scores)
|
66 |
+
|
67 |
+
if eye_bags_images:
|
68 |
+
average_eye_bags_image = np.mean(np.array(eye_bags_images), axis=0).astype(np.uint8)
|
69 |
+
else:
|
70 |
+
average_eye_bags_image = np.zeros((224, 224, 3), dtype=np.uint8)
|
71 |
+
|
72 |
+
return (path_save_video_original, path_save_video_face, path_save_video_sleep,
|
73 |
+
average_eye_bags_image, sleep_stat)
|
74 |
+
|
75 |
+
def analyze_sleep_quality(face_image):
|
76 |
+
# Placeholder function - implement your sleep quality analysis here
|
77 |
+
sleep_quality_score = np.random.random()
|
78 |
+
eye_bags_image = cv2.resize(face_image, (224, 224))
|
79 |
+
return sleep_quality_score, eye_bags_image
|
80 |
+
|
81 |
+
def create_sleep_quality_visualization(face_image, sleep_quality_score):
|
82 |
+
viz = face_image.copy()
|
83 |
+
cv2.putText(viz, f"Sleep Quality: {sleep_quality_score:.2f}", (10, 30),
|
84 |
+
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
85 |
+
return cv2.cvtColor(viz, cv2.COLOR_RGB2BGR)
|
86 |
+
|
87 |
+
def sleep_quality_statistics_plot(frames, sleep_quality_scores):
|
88 |
+
fig, ax = plt.subplots()
|
89 |
+
ax.plot(frames, sleep_quality_scores)
|
90 |
+
ax.set_xlabel('Frame')
|
91 |
+
ax.set_ylabel('Sleep Quality Score')
|
92 |
+
ax.set_title('Sleep Quality Over Time')
|
93 |
+
plt.tight_layout()
|
94 |
+
return fig
|
app/video_processing.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
from PIL import Image
|
5 |
+
import mediapipe as mp
|
6 |
+
from app.model import pth_model_static, pth_model_dynamic, cam, pth_processing
|
7 |
+
from app.face_utils import get_box, display_info
|
8 |
+
from app.config import config_data
|
9 |
+
from app.plot import statistics_plot
|
10 |
+
from .au_processing import features_to_au_intensities, au_statistics_plot
|
11 |
+
|
12 |
+
mp_face_mesh = mp.solutions.face_mesh
|
13 |
+
|
14 |
+
def preprocess_video_and_predict(video):
|
15 |
+
cap = cv2.VideoCapture(video)
|
16 |
+
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
17 |
+
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
18 |
+
fps = np.round(cap.get(cv2.CAP_PROP_FPS))
|
19 |
+
|
20 |
+
path_save_video_face = 'result_face.mp4'
|
21 |
+
vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
|
22 |
+
|
23 |
+
path_save_video_hm = 'result_hm.mp4'
|
24 |
+
vid_writer_hm = cv2.VideoWriter(path_save_video_hm, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
|
25 |
+
|
26 |
+
lstm_features = []
|
27 |
+
count_frame = 1
|
28 |
+
count_face = 0
|
29 |
+
probs = []
|
30 |
+
frames = []
|
31 |
+
au_intensities_list = []
|
32 |
+
last_output = None
|
33 |
+
last_heatmap = None
|
34 |
+
last_au_intensities = None
|
35 |
+
cur_face = None
|
36 |
+
|
37 |
+
with mp_face_mesh.FaceMesh(
|
38 |
+
max_num_faces=1,
|
39 |
+
refine_landmarks=False,
|
40 |
+
min_detection_confidence=0.5,
|
41 |
+
min_tracking_confidence=0.5) as face_mesh:
|
42 |
+
|
43 |
+
while cap.isOpened():
|
44 |
+
_, frame = cap.read()
|
45 |
+
if frame is None: break
|
46 |
+
|
47 |
+
frame_copy = frame.copy()
|
48 |
+
frame_copy.flags.writeable = False
|
49 |
+
frame_copy = cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB)
|
50 |
+
results = face_mesh.process(frame_copy)
|
51 |
+
frame_copy.flags.writeable = True
|
52 |
+
|
53 |
+
if results.multi_face_landmarks:
|
54 |
+
for fl in results.multi_face_landmarks:
|
55 |
+
startX, startY, endX, endY = get_box(fl, w, h)
|
56 |
+
cur_face = frame_copy[startY:endY, startX: endX]
|
57 |
+
|
58 |
+
if count_face%config_data.FRAME_DOWNSAMPLING == 0:
|
59 |
+
cur_face_copy = pth_processing(Image.fromarray(cur_face))
|
60 |
+
with torch.no_grad():
|
61 |
+
features = torch.nn.functional.relu(pth_model_static.extract_features(cur_face_copy)).detach().numpy()
|
62 |
+
au_intensities = features_to_au_intensities(pth_model_static(cur_face_copy))
|
63 |
+
|
64 |
+
grayscale_cam = cam(input_tensor=cur_face_copy)
|
65 |
+
grayscale_cam = grayscale_cam[0, :]
|
66 |
+
cur_face_hm = cv2.resize(cur_face,(224,224), interpolation = cv2.INTER_AREA)
|
67 |
+
cur_face_hm = np.float32(cur_face_hm) / 255
|
68 |
+
heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=False)
|
69 |
+
last_heatmap = heatmap
|
70 |
+
last_au_intensities = au_intensities
|
71 |
+
|
72 |
+
if len(lstm_features) == 0:
|
73 |
+
lstm_features = [features]*10
|
74 |
+
else:
|
75 |
+
lstm_features = lstm_features[1:] + [features]
|
76 |
+
|
77 |
+
lstm_f = torch.from_numpy(np.vstack(lstm_features))
|
78 |
+
lstm_f = torch.unsqueeze(lstm_f, 0)
|
79 |
+
with torch.no_grad():
|
80 |
+
output = pth_model_dynamic(lstm_f).detach().numpy()
|
81 |
+
last_output = output
|
82 |
+
|
83 |
+
if count_face == 0:
|
84 |
+
count_face += 1
|
85 |
+
|
86 |
+
else:
|
87 |
+
if last_output is not None:
|
88 |
+
output = last_output
|
89 |
+
heatmap = last_heatmap
|
90 |
+
au_intensities = last_au_intensities
|
91 |
+
|
92 |
+
elif last_output is None:
|
93 |
+
output = np.empty((1, 7))
|
94 |
+
output[:] = np.nan
|
95 |
+
au_intensities = np.empty(24)
|
96 |
+
au_intensities[:] = np.nan
|
97 |
+
|
98 |
+
probs.append(output[0])
|
99 |
+
frames.append(count_frame)
|
100 |
+
au_intensities_list.append(au_intensities)
|
101 |
+
else:
|
102 |
+
if last_output is not None:
|
103 |
+
lstm_features = []
|
104 |
+
empty = np.empty((7))
|
105 |
+
empty[:] = np.nan
|
106 |
+
probs.append(empty)
|
107 |
+
frames.append(count_frame)
|
108 |
+
au_intensities_list.append(np.full(24, np.nan))
|
109 |
+
|
110 |
+
if cur_face is not None:
|
111 |
+
heatmap_f = display_info(heatmap, 'Frame: {}'.format(count_frame), box_scale=.3)
|
112 |
+
|
113 |
+
cur_face = cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR)
|
114 |
+
cur_face = cv2.resize(cur_face, (224,224), interpolation = cv2.INTER_AREA)
|
115 |
+
cur_face = display_info(cur_face, 'Frame: {}'.format(count_frame), box_scale=.3)
|
116 |
+
vid_writer_face.write(cur_face)
|
117 |
+
vid_writer_hm.write(heatmap_f)
|
118 |
+
|
119 |
+
count_frame += 1
|
120 |
+
if count_face != 0:
|
121 |
+
count_face += 1
|
122 |
+
|
123 |
+
vid_writer_face.release()
|
124 |
+
vid_writer_hm.release()
|
125 |
+
|
126 |
+
stat = statistics_plot(frames, probs)
|
127 |
+
au_stat = au_statistics_plot(frames, au_intensities_list)
|
128 |
+
|
129 |
+
if not stat or not au_stat:
|
130 |
+
return None, None, None, None, None
|
131 |
+
|
132 |
+
return video, path_save_video_face, path_save_video_hm, stat, au_stat
|
assets/.DS_Store
CHANGED
Binary files a/assets/.DS_Store and b/assets/.DS_Store differ
|
|
requirements.txt
CHANGED
@@ -1,49 +1,139 @@
|
|
1 |
-
# CUDA-enabled PyTorch packages
|
2 |
-
torch
|
3 |
-
torchvision
|
4 |
-
torchaudio
|
5 |
-
|
6 |
-
# Core dependencies
|
7 |
-
gradio==4.38.1
|
8 |
-
gradio_client==1.1.0
|
9 |
-
|
10 |
-
# Additional dependencies
|
11 |
absl-py==2.1.0
|
12 |
aiofiles==23.2.1
|
13 |
altair==5.3.0
|
|
|
14 |
anyio==4.4.0
|
|
|
15 |
attrs==23.2.0
|
16 |
audioread==3.0.1
|
17 |
certifi==2024.7.4
|
|
|
18 |
charset-normalizer==3.3.2
|
19 |
click==8.1.7
|
|
|
|
|
20 |
decorator==4.4.2
|
|
|
|
|
|
|
|
|
21 |
fastapi==0.111.1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
h5py==3.11.0
|
|
|
|
|
|
|
23 |
huggingface-hub==0.23.5
|
24 |
idna==3.7
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
Jinja2==3.1.4
|
26 |
joblib==1.4.2
|
27 |
jsonschema==4.23.0
|
|
|
|
|
28 |
kiwisolver==1.4.5
|
|
|
|
|
29 |
librosa==0.10.2.post1
|
|
|
|
|
|
|
30 |
MarkupSafe==2.1.5
|
31 |
matplotlib==3.9.1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
numpy==1.26.4
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
pandas==2.2.2
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
35 |
pydantic==2.8.2
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
python-multipart==0.0.9
|
37 |
pytz==2024.1
|
38 |
PyYAML==6.0.1
|
|
|
|
|
39 |
requests==2.32.3
|
|
|
|
|
|
|
|
|
40 |
scikit-learn==1.5.1
|
41 |
scipy==1.14.0
|
|
|
|
|
|
|
|
|
|
|
42 |
soundfile==0.12.1
|
|
|
43 |
starlette==0.37.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
tqdm==4.66.4
|
45 |
transformers==4.42.4
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
uvicorn==0.30.1
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
absl-py==2.1.0
|
2 |
aiofiles==23.2.1
|
3 |
altair==5.3.0
|
4 |
+
annotated-types==0.7.0
|
5 |
anyio==4.4.0
|
6 |
+
astunparse==1.6.3
|
7 |
attrs==23.2.0
|
8 |
audioread==3.0.1
|
9 |
certifi==2024.7.4
|
10 |
+
cffi==1.16.0
|
11 |
charset-normalizer==3.3.2
|
12 |
click==8.1.7
|
13 |
+
contourpy==1.2.1
|
14 |
+
cycler==0.12.1
|
15 |
decorator==4.4.2
|
16 |
+
dlib==19.24.4
|
17 |
+
dnspython==2.6.1
|
18 |
+
email_validator==2.2.0
|
19 |
+
exceptiongroup==1.2.2
|
20 |
fastapi==0.111.1
|
21 |
+
fastapi-cli==0.0.4
|
22 |
+
ffmpy==0.3.2
|
23 |
+
filelock==3.15.4
|
24 |
+
flatbuffers==24.3.25
|
25 |
+
fonttools==4.53.1
|
26 |
+
fsspec==2024.6.1
|
27 |
+
gast==0.6.0
|
28 |
+
google-pasta==0.2.0
|
29 |
+
grad-cam
|
30 |
+
gradio==4.38.1
|
31 |
+
gradio_client==1.1.0
|
32 |
+
grpcio==1.64.1
|
33 |
+
h11==0.14.0
|
34 |
h5py==3.11.0
|
35 |
+
httpcore==1.0.5
|
36 |
+
httptools==0.6.1
|
37 |
+
httpx==0.27.0
|
38 |
huggingface-hub==0.23.5
|
39 |
idna==3.7
|
40 |
+
imageio==2.34.2
|
41 |
+
imageio-ffmpeg==0.5.1
|
42 |
+
importlib_resources==6.4.0
|
43 |
+
imutils==0.5.4
|
44 |
+
jax==0.4.30
|
45 |
+
jaxlib==0.4.30
|
46 |
Jinja2==3.1.4
|
47 |
joblib==1.4.2
|
48 |
jsonschema==4.23.0
|
49 |
+
jsonschema-specifications==2023.12.1
|
50 |
+
keras==3.4.1
|
51 |
kiwisolver==1.4.5
|
52 |
+
lazy_loader==0.4
|
53 |
+
libclang==18.1.1
|
54 |
librosa==0.10.2.post1
|
55 |
+
llvmlite==0.43.0
|
56 |
+
Markdown==3.6
|
57 |
+
markdown-it-py==3.0.0
|
58 |
MarkupSafe==2.1.5
|
59 |
matplotlib==3.9.1
|
60 |
+
mdurl==0.1.2
|
61 |
+
mediapipe==0.10.14
|
62 |
+
ml-dtypes==0.4.0
|
63 |
+
moviepy==1.0.3
|
64 |
+
mpmath==1.3.0
|
65 |
+
msgpack==1.0.8
|
66 |
+
namex==0.0.8
|
67 |
+
networkx==3.3
|
68 |
+
numba==0.60.0
|
69 |
numpy==1.26.4
|
70 |
+
opencv-contrib-python==4.10.0.84
|
71 |
+
opencv-python==4.10.0.84
|
72 |
+
opt-einsum==3.3.0
|
73 |
+
optree==0.12.1
|
74 |
+
orjson==3.10.6
|
75 |
+
packaging==24.1
|
76 |
pandas==2.2.2
|
77 |
+
pillow==10.4.0
|
78 |
+
platformdirs==4.2.2
|
79 |
+
pooch==1.8.2
|
80 |
+
proglog==0.1.10
|
81 |
+
protobuf==4.25.3
|
82 |
+
pycparser==2.22
|
83 |
pydantic==2.8.2
|
84 |
+
pydantic_core==2.20.1
|
85 |
+
pydub==0.25.1
|
86 |
+
Pygments==2.18.0
|
87 |
+
pyparsing==3.1.2
|
88 |
+
python-dateutil==2.9.0.post0
|
89 |
+
python-dotenv==1.0.1
|
90 |
python-multipart==0.0.9
|
91 |
pytz==2024.1
|
92 |
PyYAML==6.0.1
|
93 |
+
referencing==0.35.1
|
94 |
+
regex==2024.5.15
|
95 |
requests==2.32.3
|
96 |
+
rich==13.7.1
|
97 |
+
rpds-py==0.19.0
|
98 |
+
ruff==0.5.2
|
99 |
+
safetensors==0.4.3
|
100 |
scikit-learn==1.5.1
|
101 |
scipy==1.14.0
|
102 |
+
semantic-version==2.10.0
|
103 |
+
shellingham==1.5.4
|
104 |
+
six==1.16.0
|
105 |
+
sniffio==1.3.1
|
106 |
+
sounddevice==0.4.7
|
107 |
soundfile==0.12.1
|
108 |
+
soxr==0.3.7
|
109 |
starlette==0.37.2
|
110 |
+
sympy==1.13.0
|
111 |
+
tensorboard==2.17.0
|
112 |
+
tensorboard-data-server==0.7.2
|
113 |
+
tensorflow==2.17.0
|
114 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
115 |
+
termcolor==2.4.0
|
116 |
+
tf_keras==2.17.0
|
117 |
+
threadpoolctl==3.5.0
|
118 |
+
tokenizers==0.19.1
|
119 |
+
toml==0.10.2
|
120 |
+
tomlkit==0.12.0
|
121 |
+
toolz==0.12.1
|
122 |
+
torch==2.3.1
|
123 |
+
torchaudio==2.3.1
|
124 |
+
torchvision==0.18.1
|
125 |
tqdm==4.66.4
|
126 |
transformers==4.42.4
|
127 |
+
ttach==0.0.3
|
128 |
+
typer==0.12.3
|
129 |
+
typing_extensions==4.12.2
|
130 |
+
tzdata==2024.1
|
131 |
+
ujson==5.10.0
|
132 |
+
urllib3==2.2.2
|
133 |
uvicorn==0.30.1
|
134 |
+
uvloop==0.19.0
|
135 |
+
watchfiles==0.22.0
|
136 |
+
wavio==0.0.9
|
137 |
+
websockets==11.0.3
|
138 |
+
Werkzeug==3.0.3
|
139 |
+
wrapt==1.16.0
|
tabs/__emotion_analysis.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
4 |
+
import gradio as gr
|
5 |
+
|
6 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "true"
|
7 |
+
|
8 |
+
emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
|
9 |
+
emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
|
10 |
+
emotion_labels = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
|
11 |
+
|
12 |
+
def analyze_emotion(text):
|
13 |
+
try:
|
14 |
+
inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
15 |
+
outputs = emotion_model(**inputs)
|
16 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
17 |
+
max_prob, max_index = torch.max(probs, dim=1)
|
18 |
+
return emotion_labels[max_index.item()], f"{max_prob.item():.4f}"
|
19 |
+
except Exception as e:
|
20 |
+
print(f"Error in emotion analysis: {e}")
|
21 |
+
return "Error", "N/A"
|
22 |
+
|
23 |
+
def create_emotion_tab():
|
24 |
+
with gr.Row():
|
25 |
+
with gr.Column(scale=2):
|
26 |
+
input_text = gr.Textbox(value='I actually speak to the expets myself to give you the best value you can get', lines=5, placeholder="Enter text here...", label="Input Text")
|
27 |
+
with gr.Row():
|
28 |
+
clear_btn = gr.Button("Clear", scale=1)
|
29 |
+
submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit")
|
30 |
+
with gr.Column(scale=1):
|
31 |
+
output_emotion = gr.Textbox(label="Detected Emotion")
|
32 |
+
output_confidence = gr.Textbox(label="Emotion Confidence Score")
|
33 |
+
|
34 |
+
submit_btn.click(analyze_emotion, inputs=[input_text], outputs=[output_emotion, output_confidence])
|
35 |
+
clear_btn.click(lambda: ("", "", ""), outputs=[input_text, output_emotion, output_confidence])
|
36 |
+
gr.Examples(["I am so happy today!", "I feel terrible and sad.", "This is a neutral statement."], inputs=[input_text])
|
tabs/__sentiment_analysis.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
4 |
+
import gradio as gr
|
5 |
+
|
6 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "true"
|
7 |
+
|
8 |
+
sentiment_tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
|
9 |
+
sentiment_model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
|
10 |
+
sentiment_labels = ["very negative", "negative", "neutral", "positive", "very positive"]
|
11 |
+
|
12 |
+
def analyze_sentiment(text):
|
13 |
+
try:
|
14 |
+
inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
15 |
+
outputs = sentiment_model(**inputs)
|
16 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
17 |
+
max_prob, max_index = torch.max(probs, dim=1)
|
18 |
+
return sentiment_labels[max_index.item()], f"{max_prob.item():.4f}"
|
19 |
+
except Exception as e:
|
20 |
+
print(f"Error in sentiment analysis: {e}")
|
21 |
+
return "Error", "N/A"
|
22 |
+
|
23 |
+
def create_sentiment_tab():
|
24 |
+
with gr.Row():
|
25 |
+
with gr.Column(scale=2):
|
26 |
+
input_text = gr.Textbox(value="I actually speak to the expets myself to give you the best value you can get", lines=5, placeholder="Enter text here...", label="Input Text")
|
27 |
+
with gr.Row():
|
28 |
+
clear_btn = gr.Button("Clear", scale=1)
|
29 |
+
submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit")
|
30 |
+
with gr.Column(scale=1):
|
31 |
+
output_sentiment = gr.Textbox(label="Detected Sentiment")
|
32 |
+
output_confidence = gr.Textbox(label="Sentiment Confidence Score")
|
33 |
+
|
34 |
+
submit_btn.click(analyze_sentiment, inputs=[input_text], outputs=[output_sentiment, output_confidence], queue=True)
|
35 |
+
clear_btn.click(lambda: ("", "", ""), outputs=[input_text, output_sentiment, output_confidence], queue=True)
|
36 |
+
gr.Examples(["I am so happy today!", "I feel terrible and sad.", "This is a neutral statement."], inputs=[input_text])
|
tabs/speech_emotion_recognition.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tabs/speech_emotion_recognition.py
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import numpy as np
|
5 |
+
import librosa
|
6 |
+
import librosa.display
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
from transformers import pipeline
|
9 |
+
import torch
|
10 |
+
import tempfile
|
11 |
+
import warnings
|
12 |
+
import os
|
13 |
+
|
14 |
+
# Suppress specific warnings from transformers if needed
|
15 |
+
warnings.filterwarnings("ignore", category=UserWarning, module='transformers')
|
16 |
+
|
17 |
+
# Determine the device
|
18 |
+
def get_device():
|
19 |
+
if torch.backends.mps.is_available():
|
20 |
+
device = torch.device("mps")
|
21 |
+
print("Using MPS device for inference.")
|
22 |
+
elif torch.cuda.is_available():
|
23 |
+
device = torch.device("cuda")
|
24 |
+
print("Using CUDA device for inference.")
|
25 |
+
else:
|
26 |
+
device = torch.device("cpu")
|
27 |
+
print("Using CPU for inference.")
|
28 |
+
return device
|
29 |
+
|
30 |
+
device = get_device()
|
31 |
+
|
32 |
+
# Initialize the pipelines with the specified device
|
33 |
+
try:
|
34 |
+
emotion_model = pipeline(
|
35 |
+
"audio-classification",
|
36 |
+
model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
|
37 |
+
device=0 if device.type == "cuda" else ("mps" if device.type == "mps" else -1)
|
38 |
+
)
|
39 |
+
print("Emotion model loaded successfully.")
|
40 |
+
except Exception as e:
|
41 |
+
print(f"Error loading emotion model: {e}")
|
42 |
+
emotion_model = None
|
43 |
+
|
44 |
+
try:
|
45 |
+
transcription_model = pipeline(
|
46 |
+
"automatic-speech-recognition",
|
47 |
+
model="facebook/wav2vec2-base-960h",
|
48 |
+
device=0 if device.type == "cuda" else ("mps" if device.type == "mps" else -1)
|
49 |
+
)
|
50 |
+
print("Transcription model loaded successfully.")
|
51 |
+
except Exception as e:
|
52 |
+
print(f"Error loading transcription model: {e}")
|
53 |
+
transcription_model = None
|
54 |
+
|
55 |
+
# Emotion Mapping
|
56 |
+
emotion_mapping = {
|
57 |
+
"angry": (0.8, 0.8, -0.5),
|
58 |
+
"happy": (0.6, 0.6, 0.8),
|
59 |
+
"sad": (-0.6, -0.4, -0.6),
|
60 |
+
"neutral": (0, 0, 0),
|
61 |
+
"fear": (0.3, -0.3, -0.7),
|
62 |
+
"surprise": (0.4, 0.2, 0.2),
|
63 |
+
"disgust": (0.2, 0.5, -0.6),
|
64 |
+
"calm": (-0.2, 0.1, 0.3),
|
65 |
+
"excited": (0.7, 0.5, 0.7),
|
66 |
+
"frustrated": (0.6, 0.5, -0.4)
|
67 |
+
}
|
68 |
+
|
69 |
+
def process_audio_emotion(audio_file):
|
70 |
+
"""
|
71 |
+
Processes the input audio file to perform transcription and emotion recognition.
|
72 |
+
Generates waveform and mel spectrogram plots.
|
73 |
+
|
74 |
+
Returns:
|
75 |
+
A tuple containing:
|
76 |
+
- Transcription (str)
|
77 |
+
- Emotion (str)
|
78 |
+
- Confidence (%) (float)
|
79 |
+
- Arousal (float)
|
80 |
+
- Dominance (float)
|
81 |
+
- Valence (float)
|
82 |
+
- Waveform Plot (str: filepath)
|
83 |
+
- Mel Spectrogram Plot (str: filepath)
|
84 |
+
"""
|
85 |
+
if not audio_file:
|
86 |
+
return (
|
87 |
+
"No audio file provided.", # Transcription (textbox)
|
88 |
+
None, # Emotion (textbox)
|
89 |
+
None, # Confidence (%) (number)
|
90 |
+
None, # Arousal (number)
|
91 |
+
None, # Dominance (number)
|
92 |
+
None, # Valence (number)
|
93 |
+
None, # Waveform Plot (image)
|
94 |
+
None # Mel Spectrogram Plot (image)
|
95 |
+
)
|
96 |
+
|
97 |
+
try:
|
98 |
+
y, sr = librosa.load(audio_file, sr=None)
|
99 |
+
|
100 |
+
# Transcription
|
101 |
+
if transcription_model:
|
102 |
+
transcription_result = transcription_model(audio_file)
|
103 |
+
transcription = transcription_result.get("text", "N/A")
|
104 |
+
else:
|
105 |
+
transcription = "Transcription model not loaded."
|
106 |
+
|
107 |
+
# Emotion Recognition
|
108 |
+
if emotion_model:
|
109 |
+
emotion_results = emotion_model(audio_file)
|
110 |
+
if emotion_results:
|
111 |
+
emotion_result = emotion_results[0]
|
112 |
+
emotion = emotion_result.get("label", "Unknown").lower()
|
113 |
+
confidence = emotion_result.get("score", 0.0) * 100 # Convert to percentage
|
114 |
+
arousal, dominance, valence = emotion_mapping.get(emotion, (0.0, 0.0, 0.0))
|
115 |
+
else:
|
116 |
+
emotion = "No emotion detected."
|
117 |
+
confidence = 0.0
|
118 |
+
arousal, dominance, valence = 0.0, 0.0, 0.0
|
119 |
+
else:
|
120 |
+
emotion = "Emotion model not loaded."
|
121 |
+
confidence = 0.0
|
122 |
+
arousal, dominance, valence = 0.0, 0.0, 0.0
|
123 |
+
|
124 |
+
# Plotting Waveform
|
125 |
+
plt.figure(figsize=(10, 4))
|
126 |
+
librosa.display.waveshow(y, sr=sr)
|
127 |
+
plt.title("Waveform")
|
128 |
+
plt.xlabel("Time (s)")
|
129 |
+
plt.ylabel("Amplitude")
|
130 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_waveform:
|
131 |
+
plt.savefig(tmp_waveform.name, bbox_inches='tight')
|
132 |
+
waveform_plot_path = tmp_waveform.name
|
133 |
+
plt.close()
|
134 |
+
|
135 |
+
# Plotting Mel Spectrogram
|
136 |
+
mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
|
137 |
+
plt.figure(figsize=(10, 4))
|
138 |
+
librosa.display.specshow(librosa.power_to_db(mel_spec, ref=np.max), sr=sr, x_axis='time', y_axis='mel')
|
139 |
+
plt.colorbar(format='%+2.0f dB')
|
140 |
+
plt.title("Mel Spectrogram")
|
141 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_mel:
|
142 |
+
plt.savefig(tmp_mel.name, bbox_inches='tight')
|
143 |
+
mel_spec_plot_path = tmp_mel.name
|
144 |
+
plt.close()
|
145 |
+
|
146 |
+
return (
|
147 |
+
transcription, # Transcription (textbox)
|
148 |
+
emotion.capitalize(), # Emotion (textbox)
|
149 |
+
confidence, # Confidence (%) (number)
|
150 |
+
arousal, # Arousal (number)
|
151 |
+
dominance, # Dominance (number)
|
152 |
+
valence, # Valence (number)
|
153 |
+
waveform_plot_path, # Waveform Plot (image)
|
154 |
+
mel_spec_plot_path # Mel Spectrogram Plot (image)
|
155 |
+
)
|
156 |
+
except Exception as e:
|
157 |
+
return (
|
158 |
+
f"Error: {str(e)}", # Transcription (textbox)
|
159 |
+
None, # Emotion (textbox)
|
160 |
+
None, # Confidence (%) (number)
|
161 |
+
None, # Arousal (number)
|
162 |
+
None, # Dominance (number)
|
163 |
+
None, # Valence (number)
|
164 |
+
None, # Waveform Plot (image)
|
165 |
+
None # Mel Spectrogram Plot (image)
|
166 |
+
)
|
167 |
+
|
168 |
+
def create_emotion_recognition_tab():
|
169 |
+
"""
|
170 |
+
Creates the Emotion Recognition tab in the Gradio interface.
|
171 |
+
"""
|
172 |
+
with gr.Row():
|
173 |
+
with gr.Column(scale=2):
|
174 |
+
input_audio = gr.Audio(label="Input Audio", type="filepath")
|
175 |
+
gr.Examples(
|
176 |
+
examples=["./assets/audio/fitness.wav"],
|
177 |
+
inputs=[input_audio],
|
178 |
+
label="Examples"
|
179 |
+
)
|
180 |
+
with gr.Column(scale=1):
|
181 |
+
transcription_output = gr.Textbox(label="Transcription", interactive=False)
|
182 |
+
emotion_output = gr.Textbox(label="Emotion", interactive=False)
|
183 |
+
confidence_output = gr.Number(label="Confidence (%)", interactive=False)
|
184 |
+
arousal_output = gr.Number(label="Arousal (Level of Energy)", interactive=False)
|
185 |
+
dominance_output = gr.Number(label="Dominance (Degree of Control)", interactive=False)
|
186 |
+
valence_output = gr.Number(label="Valence (Positivity/Negativity)", interactive=False)
|
187 |
+
with gr.Column(scale=1):
|
188 |
+
waveform_plot = gr.Image(label="Waveform")
|
189 |
+
mel_spec_plot = gr.Image(label="Mel Spectrogram")
|
190 |
+
|
191 |
+
input_audio.change(
|
192 |
+
fn=process_audio_emotion,
|
193 |
+
inputs=[input_audio],
|
194 |
+
outputs=[
|
195 |
+
transcription_output,
|
196 |
+
emotion_output,
|
197 |
+
confidence_output,
|
198 |
+
arousal_output,
|
199 |
+
dominance_output,
|
200 |
+
valence_output,
|
201 |
+
waveform_plot,
|
202 |
+
mel_spec_plot
|
203 |
+
]
|
204 |
+
)
|
205 |
+
|
206 |
+
# Call create_emotion_recognition_tab to create the Gradio interface
|
tabs/speech_stress_analysis.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tabs/speech_stress_analysis.py
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import librosa
|
5 |
+
import librosa.display
|
6 |
+
import numpy as np
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import tempfile
|
9 |
+
import warnings
|
10 |
+
|
11 |
+
# Suppress specific warnings from transformers if needed
|
12 |
+
warnings.filterwarnings("ignore", category=UserWarning, module='transformers')
|
13 |
+
|
14 |
+
def extract_audio_features(audio_file):
|
15 |
+
y, sr = librosa.load(audio_file, sr=None)
|
16 |
+
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
17 |
+
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
|
18 |
+
pitches = pitches[(magnitudes > np.median(magnitudes)) & (pitches > 0)]
|
19 |
+
energy = librosa.feature.rms(y=y)[0]
|
20 |
+
return mfccs, pitches, energy, y, sr
|
21 |
+
|
22 |
+
def analyze_voice_stress(audio_file):
|
23 |
+
if not audio_file:
|
24 |
+
return "No audio file provided.", None
|
25 |
+
|
26 |
+
try:
|
27 |
+
mfccs, pitches, energy, y, sr = extract_audio_features(audio_file)
|
28 |
+
|
29 |
+
# Calculate variances
|
30 |
+
var_mfccs = np.var(mfccs, axis=1).mean() # Mean variance across MFCC coefficients
|
31 |
+
var_energy = np.var(energy) # Variance of RMS energy
|
32 |
+
var_pitches = np.var(pitches) if len(pitches) > 0 else 0 # Variance of pitches if present
|
33 |
+
|
34 |
+
# Debugging: Print individual variances
|
35 |
+
print(f"Variance MFCCs (mean across coefficients): {var_mfccs}")
|
36 |
+
print(f"Variance Energy: {var_energy}")
|
37 |
+
print(f"Variance Pitches: {var_pitches}")
|
38 |
+
|
39 |
+
# Normalize each variance using Z-Score Standardization
|
40 |
+
mfccs_mean = 1000
|
41 |
+
mfccs_std = 500
|
42 |
+
energy_mean = 0.005
|
43 |
+
energy_std = 0.005
|
44 |
+
pitches_mean = 500000
|
45 |
+
pitches_std = 200000
|
46 |
+
|
47 |
+
norm_var_mfccs = (var_mfccs - mfccs_mean) / mfccs_std
|
48 |
+
norm_var_energy = (var_energy - energy_mean) / energy_std
|
49 |
+
norm_var_pitches = (var_pitches - pitches_mean) / pitches_std if var_pitches > 0 else 0
|
50 |
+
|
51 |
+
# Debugging: Print normalized variances
|
52 |
+
print(f"Normalized Variance MFCCs: {norm_var_mfccs}")
|
53 |
+
print(f"Normalized Variance Energy: {norm_var_energy}")
|
54 |
+
print(f"Normalized Variance Pitches: {norm_var_pitches}")
|
55 |
+
|
56 |
+
# Combine normalized variances
|
57 |
+
stress_level = np.mean([
|
58 |
+
norm_var_mfccs,
|
59 |
+
norm_var_energy,
|
60 |
+
norm_var_pitches
|
61 |
+
]) if var_pitches > 0 else np.mean([norm_var_mfccs, norm_var_energy])
|
62 |
+
|
63 |
+
# Debugging: Print stress_level before normalization
|
64 |
+
print(f"Calculated Stress Level (before scaling): {stress_level}")
|
65 |
+
|
66 |
+
# Scale to 0-100%
|
67 |
+
normalized_stress = (stress_level + 3) / 6 * 100 # Maps -3 to 0%, +3 to 100%
|
68 |
+
normalized_stress = np.clip(normalized_stress, 0, 100) # Ensure within 0-100%
|
69 |
+
|
70 |
+
# Debugging: Print normalized_stress
|
71 |
+
print(f"Normalized Stress Level: {normalized_stress}")
|
72 |
+
|
73 |
+
# Plotting
|
74 |
+
fig, axs = plt.subplots(3, 1, figsize=(10, 12))
|
75 |
+
|
76 |
+
# MFCCs
|
77 |
+
img_mfcc = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[0])
|
78 |
+
axs[0].set_title('MFCCs')
|
79 |
+
axs[0].set_ylabel('MFCC Coefficient')
|
80 |
+
fig.colorbar(img_mfcc, ax=axs[0])
|
81 |
+
|
82 |
+
# Pitch
|
83 |
+
axs[1].plot(pitches)
|
84 |
+
axs[1].set_title('Pitch')
|
85 |
+
axs[1].set_ylabel('Frequency (Hz)')
|
86 |
+
|
87 |
+
# Energy
|
88 |
+
axs[2].plot(energy)
|
89 |
+
axs[2].set_title('Energy (RMS)')
|
90 |
+
axs[2].set_ylabel('RMS Energy')
|
91 |
+
axs[2].set_xlabel('Frames')
|
92 |
+
|
93 |
+
plt.tight_layout()
|
94 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file:
|
95 |
+
plt.savefig(temp_file.name)
|
96 |
+
plot_path = temp_file.name
|
97 |
+
plt.close()
|
98 |
+
|
99 |
+
# Interpretation
|
100 |
+
if normalized_stress < 33:
|
101 |
+
stress_interpretation = "Low"
|
102 |
+
elif normalized_stress < 66:
|
103 |
+
stress_interpretation = "Medium"
|
104 |
+
else:
|
105 |
+
stress_interpretation = "High"
|
106 |
+
|
107 |
+
return f"{normalized_stress:.2f}% - {stress_interpretation} Stress", plot_path
|
108 |
+
except Exception as e:
|
109 |
+
return f"Error: {str(e)}", None
|
110 |
+
|
111 |
+
def create_voice_stress_tab():
|
112 |
+
with gr.Row():
|
113 |
+
with gr.Column(scale=2):
|
114 |
+
input_audio = gr.Audio(label="Input Audio", type="filepath")
|
115 |
+
clear_btn = gr.Button("Clear", scale=1)
|
116 |
+
with gr.Column(scale=1):
|
117 |
+
output_stress = gr.Label(label="Stress Level")
|
118 |
+
output_plot = gr.Image(label="Stress Analysis Plot")
|
119 |
+
|
120 |
+
# Automatically trigger analysis when an audio file is uploaded
|
121 |
+
input_audio.change(analyze_voice_stress, inputs=[input_audio], outputs=[output_stress, output_plot])
|
122 |
+
|
123 |
+
clear_btn.click(lambda: (None, None), outputs=[input_audio, output_stress, output_plot])
|
124 |
+
|
125 |
+
gr.Examples(["./assets/audio/fitness.wav"], inputs=[input_audio])
|
126 |
+
|