Spaces:
Build error
Build error
File size: 9,660 Bytes
18c46ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 |
# tabs/speech_stress_analysis.py
import gradio as gr
import librosa
import numpy as np
import matplotlib.pyplot as plt
import tempfile
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module='librosa')
def extract_audio_features(audio_file):
y, sr = librosa.load(audio_file, sr=None)
# Fundamental frequency estimation using librosa.pyin
f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=75, fmax=600)
f0 = f0[~np.isnan(f0)] # Remove unvoiced frames
# Energy (intensity)
energy = librosa.feature.rms(y=y)[0]
# MFCCs (Mel-frequency cepstral coefficients)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
# Onset envelope for speech rate estimation
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
speech_rate = tempo / 60 # Speech rate estimation (syllables per second)
return f0, energy, speech_rate, mfccs, y, sr
def analyze_voice_stress(audio_file):
if not audio_file:
return "No audio file provided.", None, None
try:
f0, energy, speech_rate, mfccs, y, sr = extract_audio_features(audio_file)
# Calculate statistical measures
mean_f0 = np.mean(f0)
std_f0 = np.std(f0)
mean_energy = np.mean(energy)
std_energy = np.std(energy)
# Normative data (example values from medical literature)
norm_mean_f0_male = 110
norm_mean_f0_female = 220
norm_std_f0 = 20
norm_mean_energy = 0.02
norm_std_energy = 0.005
norm_speech_rate = 4.4
norm_std_speech_rate = 0.5
# Gender detection
gender = 'male' if mean_f0 < 165 else 'female'
norm_mean_f0 = norm_mean_f0_male if gender == 'male' else norm_mean_f0_female
# Compute Z-scores
z_f0 = (mean_f0 - norm_mean_f0) / norm_std_f0
z_energy = (mean_energy - norm_mean_energy) / norm_std_energy
z_speech_rate = (speech_rate - norm_speech_rate) / norm_std_speech_rate
# Combine Z-scores for stress level
stress_score = (0.4 * z_f0) + (0.4 * z_speech_rate) + (0.2 * z_energy)
stress_level = float(1 / (1 + np.exp(-stress_score)) * 100) # Sigmoid function
if stress_level < 20:
stress_category = "Very Low Stress"
elif stress_level < 40:
stress_category = "Low Stress"
elif stress_level < 60:
stress_category = "Moderate Stress"
elif stress_level < 80:
stress_category = "High Stress"
else:
stress_category = "Very High Stress"
# More verbose interpretations for each stress category
interpretations = {
"Very Low Stress": (
"Your vocal analysis indicates a very relaxed state. "
"This suggests that you're currently experiencing minimal stress. "
"Maintaining such low stress levels is beneficial for your health. "
"Continue engaging in activities that promote relaxation and well-being. "
"Regular self-care practices can help sustain this positive state."
),
"Low Stress": (
"Minor signs of stress are detected in your voice. "
"This is common due to everyday challenges and is usually not concerning. "
"Incorporating relaxation techniques, like deep breathing or meditation, may help. "
"Regular breaks and leisure activities can also reduce stress. "
"Staying mindful of stress levels supports overall health."
),
"Moderate Stress": (
"Your voice reflects moderate stress levels. "
"This could be due to ongoing pressures or challenges you're facing. "
"Consider practicing stress management strategies such as mindfulness exercises or physical activity. "
"Identifying stressors and addressing them can be beneficial. "
"Balancing work and rest is important for your well-being."
),
"High Stress": (
"Elevated stress levels are apparent in your vocal patterns. "
"It's important to recognize and address these feelings. "
"Identifying stressors and seeking support from friends, family, or professionals could be helpful. "
"Engaging in stress reduction techniques is recommended. "
"Taking proactive steps can improve your mental and physical health."
),
"Very High Stress": (
"Your voice suggests very high stress levels. "
"This may indicate significant strain or anxiety. "
"It may be helpful to consult a healthcare professional for support. "
"Promptly addressing stress is important for your well-being. "
"Consider reaching out to trusted individuals or resources."
)
}
final_interpretation = interpretations[stress_category]
# Plotting
fig, axs = plt.subplots(5, 1, figsize=(10, 15))
# Plot Fundamental Frequency (Pitch)
axs[0].plot(f0)
axs[0].set_title('Fundamental Frequency (Pitch)')
axs[0].set_ylabel('Frequency (Hz)')
# Plot Energy (Loudness)
axs[1].plot(energy)
axs[1].set_title('Energy (Loudness)')
axs[1].set_ylabel('Energy')
# Plot MFCCs
img = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[2])
axs[2].set_title('MFCCs (Mel-frequency cepstral coefficients)')
fig.colorbar(img, ax=axs[2])
# Plot Waveform
librosa.display.waveshow(y, sr=sr, ax=axs[3])
axs[3].set_title('Waveform')
axs[3].set_xlabel('Time (s)')
axs[3].set_ylabel('Amplitude')
# Plot Pitch Contour (Histogram of f0)
axs[4].hist(f0, bins=50, color='blue', alpha=0.7)
axs[4].set_title('Pitch Contour (Histogram of f0)')
axs[4].set_xlabel('Frequency (Hz)')
axs[4].set_ylabel('Count')
plt.tight_layout()
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file:
plt.savefig(temp_file.name)
plot_path = temp_file.name
plt.close()
# Return separate values for Gradio output components
return f"{stress_level:.2f}% - {stress_category}", final_interpretation, plot_path
except Exception as e:
return f"Error: {str(e)}", None, None
def create_voice_stress_tab():
custom_css = """
/* General container styling for mobile */
.gradio-container {
padding: 10px !important;
font-size: 16px !important;
}
/* Headings */
h3 {
text-align: center;
font-size: 1.5em !important;
margin-bottom: 20px !important;
}
/* Full width for audio input and other components */
.gradio-container .gradio-row, .gradio-container .gradio-column {
flex-direction: column !important;
align-items: center !important;
}
/* Make the components scale better on smaller screens */
#input_audio, #stress_output, #interpretation_output, #plot_output {
width: 100% !important;
max-width: 100% !important;
}
#input_audio label, #stress_output label, #interpretation_output label, #plot_output label {
font-size: 1.2em !important;
}
/* Textbox area adjustment */
#interpretation_output textarea {
font-size: 1em !important;
line-height: 1.4 !important;
}
/* Responsive styling for images */
#plot_output img {
width: 100% !important;
height: auto !important;
}
/* Adjust clear button */
#clear_btn button {
font-size: 1em !important;
padding: 10px 20px !important;
}
/* Responsive adjustments */
@media only screen and (max-width: 600px) {
.gradio-container {
padding: 5px !important;
font-size: 14px !important;
}
h3 {
font-size: 1.2em !important;
}
#clear_btn button {
font-size: 0.9em !important;
}
#interpretation_output textarea {
font-size: 0.9em !important;
}
}
"""
with gr.Blocks(css=custom_css) as voice_stress_tab:
gr.Markdown("<h3>Speech Stress Analysis</h3>")
with gr.Column():
input_audio = gr.Audio(label="Upload your voice recording", type="filepath", elem_id="input_audio")
stress_output = gr.Label(label="Stress Interpretation", elem_id="stress_output")
interpretation_output = gr.Textbox(label="Detailed Interpretation", lines=6, elem_id="interpretation_output")
plot_output = gr.Image(label="Stress Analysis Plot", elem_id="plot_output")
# Examples section
gr.Examples(
examples=["./assets/audio/fitness.wav"],
inputs=[input_audio],
label="Examples"
)
# Analyze stress when audio input changes
input_audio.change(
analyze_voice_stress,
inputs=[input_audio],
outputs=[stress_output, interpretation_output, plot_output]
)
# Clear button to reset outputs
gr.Button("Clear", elem_id="clear_btn").click(
lambda: (None, None, None, None),
outputs=[input_audio, stress_output, interpretation_output, plot_output]
)
return voice_stress_tab
|