# tabs/speech_stress_analysis.py import gradio as gr import librosa import librosa.display import numpy as np import matplotlib.pyplot as plt import tempfile import warnings # Suppress specific warnings from transformers if needed warnings.filterwarnings("ignore", category=UserWarning, module='transformers') def extract_audio_features(audio_file): y, sr = librosa.load(audio_file, sr=None) mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) pitches, magnitudes = librosa.piptrack(y=y, sr=sr) pitches = pitches[(magnitudes > np.median(magnitudes)) & (pitches > 0)] energy = librosa.feature.rms(y=y)[0] return mfccs, pitches, energy, y, sr def analyze_voice_stress(audio_file): if not audio_file: return "No audio file provided.", None try: mfccs, pitches, energy, y, sr = extract_audio_features(audio_file) # Calculate variances var_mfccs = np.var(mfccs, axis=1).mean() # Mean variance across MFCC coefficients var_energy = np.var(energy) # Variance of RMS energy var_pitches = np.var(pitches) if len(pitches) > 0 else 0 # Variance of pitches if present # Debugging: Print individual variances print(f"Variance MFCCs (mean across coefficients): {var_mfccs}") print(f"Variance Energy: {var_energy}") print(f"Variance Pitches: {var_pitches}") # Normalize each variance using Z-Score Standardization mfccs_mean = 1000 mfccs_std = 500 energy_mean = 0.005 energy_std = 0.005 pitches_mean = 500000 pitches_std = 200000 norm_var_mfccs = (var_mfccs - mfccs_mean) / mfccs_std norm_var_energy = (var_energy - energy_mean) / energy_std norm_var_pitches = (var_pitches - pitches_mean) / pitches_std if var_pitches > 0 else 0 # Debugging: Print normalized variances print(f"Normalized Variance MFCCs: {norm_var_mfccs}") print(f"Normalized Variance Energy: {norm_var_energy}") print(f"Normalized Variance Pitches: {norm_var_pitches}") # Combine normalized variances stress_level = np.mean([ norm_var_mfccs, norm_var_energy, norm_var_pitches ]) if var_pitches > 0 else np.mean([norm_var_mfccs, norm_var_energy]) # Debugging: Print stress_level before normalization print(f"Calculated Stress Level (before scaling): {stress_level}") # Scale to 0-100% normalized_stress = (stress_level + 3) / 6 * 100 # Maps -3 to 0%, +3 to 100% normalized_stress = np.clip(normalized_stress, 0, 100) # Ensure within 0-100% # Debugging: Print normalized_stress print(f"Normalized Stress Level: {normalized_stress}") # Plotting fig, axs = plt.subplots(3, 1, figsize=(10, 12)) # MFCCs img_mfcc = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[0]) axs[0].set_title('MFCCs') axs[0].set_ylabel('MFCC Coefficient') fig.colorbar(img_mfcc, ax=axs[0]) # Pitch axs[1].plot(pitches) axs[1].set_title('Pitch') axs[1].set_ylabel('Frequency (Hz)') # Energy axs[2].plot(energy) axs[2].set_title('Energy (RMS)') axs[2].set_ylabel('RMS Energy') axs[2].set_xlabel('Frames') plt.tight_layout() with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file: plt.savefig(temp_file.name) plot_path = temp_file.name plt.close() # Interpretation if normalized_stress < 33: stress_interpretation = "Low" elif normalized_stress < 66: stress_interpretation = "Medium" else: stress_interpretation = "High" return f"{normalized_stress:.2f}% - {stress_interpretation} Stress", plot_path except Exception as e: return f"Error: {str(e)}", None def create_voice_stress_tab(): with gr.Row(): with gr.Column(scale=2): input_audio = gr.Audio(label="Input Audio", type="filepath") clear_btn = gr.Button("Clear", scale=1) with gr.Column(scale=1): output_stress = gr.Label(label="Stress Level") output_plot = gr.Image(label="Stress Analysis Plot") # Automatically trigger analysis when an audio file is uploaded input_audio.change(analyze_voice_stress, inputs=[input_audio], outputs=[output_stress, output_plot]) clear_btn.click(lambda: (None, None), outputs=[input_audio, output_stress, output_plot]) gr.Examples(["./assets/audio/fitness.wav"], inputs=[input_audio])