Spaces:
Runtime error
Runtime error
# tabs/speech_stress_analysis.py | |
import gradio as gr | |
import librosa | |
import librosa.display | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import tempfile | |
import warnings | |
# Suppress specific warnings from transformers if needed | |
warnings.filterwarnings("ignore", category=UserWarning, module='transformers') | |
def extract_audio_features(audio_file): | |
y, sr = librosa.load(audio_file, sr=None) | |
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) | |
pitches, magnitudes = librosa.piptrack(y=y, sr=sr) | |
pitches = pitches[(magnitudes > np.median(magnitudes)) & (pitches > 0)] | |
energy = librosa.feature.rms(y=y)[0] | |
return mfccs, pitches, energy, y, sr | |
def analyze_voice_stress(audio_file): | |
if not audio_file: | |
return "No audio file provided.", None | |
try: | |
mfccs, pitches, energy, y, sr = extract_audio_features(audio_file) | |
# Calculate variances | |
var_mfccs = np.var(mfccs, axis=1).mean() # Mean variance across MFCC coefficients | |
var_energy = np.var(energy) # Variance of RMS energy | |
var_pitches = np.var(pitches) if len(pitches) > 0 else 0 # Variance of pitches if present | |
# Debugging: Print individual variances | |
print(f"Variance MFCCs (mean across coefficients): {var_mfccs}") | |
print(f"Variance Energy: {var_energy}") | |
print(f"Variance Pitches: {var_pitches}") | |
# Normalize each variance using Z-Score Standardization | |
mfccs_mean = 1000 | |
mfccs_std = 500 | |
energy_mean = 0.005 | |
energy_std = 0.005 | |
pitches_mean = 500000 | |
pitches_std = 200000 | |
norm_var_mfccs = (var_mfccs - mfccs_mean) / mfccs_std | |
norm_var_energy = (var_energy - energy_mean) / energy_std | |
norm_var_pitches = (var_pitches - pitches_mean) / pitches_std if var_pitches > 0 else 0 | |
# Debugging: Print normalized variances | |
print(f"Normalized Variance MFCCs: {norm_var_mfccs}") | |
print(f"Normalized Variance Energy: {norm_var_energy}") | |
print(f"Normalized Variance Pitches: {norm_var_pitches}") | |
# Combine normalized variances | |
stress_level = np.mean([ | |
norm_var_mfccs, | |
norm_var_energy, | |
norm_var_pitches | |
]) if var_pitches > 0 else np.mean([norm_var_mfccs, norm_var_energy]) | |
# Debugging: Print stress_level before normalization | |
print(f"Calculated Stress Level (before scaling): {stress_level}") | |
# Scale to 0-100% | |
normalized_stress = (stress_level + 3) / 6 * 100 # Maps -3 to 0%, +3 to 100% | |
normalized_stress = np.clip(normalized_stress, 0, 100) # Ensure within 0-100% | |
# Debugging: Print normalized_stress | |
print(f"Normalized Stress Level: {normalized_stress}") | |
# Plotting | |
fig, axs = plt.subplots(3, 1, figsize=(10, 12)) | |
# MFCCs | |
img_mfcc = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[0]) | |
axs[0].set_title('MFCCs') | |
axs[0].set_ylabel('MFCC Coefficient') | |
fig.colorbar(img_mfcc, ax=axs[0]) | |
# Pitch | |
axs[1].plot(pitches) | |
axs[1].set_title('Pitch') | |
axs[1].set_ylabel('Frequency (Hz)') | |
# Energy | |
axs[2].plot(energy) | |
axs[2].set_title('Energy (RMS)') | |
axs[2].set_ylabel('RMS Energy') | |
axs[2].set_xlabel('Frames') | |
plt.tight_layout() | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file: | |
plt.savefig(temp_file.name) | |
plot_path = temp_file.name | |
plt.close() | |
# Interpretation | |
if normalized_stress < 33: | |
stress_interpretation = "Low" | |
elif normalized_stress < 66: | |
stress_interpretation = "Medium" | |
else: | |
stress_interpretation = "High" | |
return f"{normalized_stress:.2f}% - {stress_interpretation} Stress", plot_path | |
except Exception as e: | |
return f"Error: {str(e)}", None | |
def create_voice_stress_tab(): | |
with gr.Row(): | |
with gr.Column(scale=2): | |
input_audio = gr.Audio(label="Input Audio", type="filepath") | |
clear_btn = gr.Button("Clear", scale=1) | |
with gr.Column(scale=1): | |
output_stress = gr.Label(label="Stress Level") | |
output_plot = gr.Image(label="Stress Analysis Plot") | |
# Automatically trigger analysis when an audio file is uploaded | |
input_audio.change(analyze_voice_stress, inputs=[input_audio], outputs=[output_stress, output_plot]) | |
clear_btn.click(lambda: (None, None), outputs=[input_audio, output_stress, output_plot]) | |
gr.Examples(["./assets/audio/fitness.wav"], inputs=[input_audio]) | |