Spaces:

vitorcalvi
/

dyagnosys-free

Build error

File size: 9,660 Bytes

18c46ab

# tabs/speech_stress_analysis.py

import gradio as gr
import librosa
import numpy as np
import matplotlib.pyplot as plt
import tempfile
import warnings

warnings.filterwarnings("ignore", category=UserWarning, module='librosa')

def extract_audio_features(audio_file):
    y, sr = librosa.load(audio_file, sr=None)
    
    # Fundamental frequency estimation using librosa.pyin
    f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=75, fmax=600)
    f0 = f0[~np.isnan(f0)]  # Remove unvoiced frames

    # Energy (intensity)
    energy = librosa.feature.rms(y=y)[0]

    # MFCCs (Mel-frequency cepstral coefficients)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

    # Onset envelope for speech rate estimation
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
    speech_rate = tempo / 60  # Speech rate estimation (syllables per second)

    return f0, energy, speech_rate, mfccs, y, sr

def analyze_voice_stress(audio_file):
    if not audio_file:
        return "No audio file provided.", None, None

    try:
        f0, energy, speech_rate, mfccs, y, sr = extract_audio_features(audio_file)

        # Calculate statistical measures
        mean_f0 = np.mean(f0)
        std_f0 = np.std(f0)
        mean_energy = np.mean(energy)
        std_energy = np.std(energy)

        # Normative data (example values from medical literature)
        norm_mean_f0_male = 110
        norm_mean_f0_female = 220
        norm_std_f0 = 20
        norm_mean_energy = 0.02
        norm_std_energy = 0.005
        norm_speech_rate = 4.4
        norm_std_speech_rate = 0.5

        # Gender detection
        gender = 'male' if mean_f0 < 165 else 'female'
        norm_mean_f0 = norm_mean_f0_male if gender == 'male' else norm_mean_f0_female

        # Compute Z-scores
        z_f0 = (mean_f0 - norm_mean_f0) / norm_std_f0
        z_energy = (mean_energy - norm_mean_energy) / norm_std_energy
        z_speech_rate = (speech_rate - norm_speech_rate) / norm_std_speech_rate

        # Combine Z-scores for stress level
        stress_score = (0.4 * z_f0) + (0.4 * z_speech_rate) + (0.2 * z_energy)
        stress_level = float(1 / (1 + np.exp(-stress_score)) * 100)  # Sigmoid function

        if stress_level < 20:
            stress_category = "Very Low Stress"
        elif stress_level < 40:
            stress_category = "Low Stress"
        elif stress_level < 60:
            stress_category = "Moderate Stress"
        elif stress_level < 80:
            stress_category = "High Stress"
        else:
            stress_category = "Very High Stress"

        # More verbose interpretations for each stress category
        interpretations = {
            "Very Low Stress": (
                "Your vocal analysis indicates a very relaxed state. "
                "This suggests that you're currently experiencing minimal stress. "
                "Maintaining such low stress levels is beneficial for your health. "
                "Continue engaging in activities that promote relaxation and well-being. "
                "Regular self-care practices can help sustain this positive state."
            ),
            "Low Stress": (
                "Minor signs of stress are detected in your voice. "
                "This is common due to everyday challenges and is usually not concerning. "
                "Incorporating relaxation techniques, like deep breathing or meditation, may help. "
                "Regular breaks and leisure activities can also reduce stress. "
                "Staying mindful of stress levels supports overall health."
            ),
            "Moderate Stress": (
                "Your voice reflects moderate stress levels. "
                "This could be due to ongoing pressures or challenges you're facing. "
                "Consider practicing stress management strategies such as mindfulness exercises or physical activity. "
                "Identifying stressors and addressing them can be beneficial. "
                "Balancing work and rest is important for your well-being."
            ),
            "High Stress": (
                "Elevated stress levels are apparent in your vocal patterns. "
                "It's important to recognize and address these feelings. "
                "Identifying stressors and seeking support from friends, family, or professionals could be helpful. "
                "Engaging in stress reduction techniques is recommended. "
                "Taking proactive steps can improve your mental and physical health."
            ),
            "Very High Stress": (
                "Your voice suggests very high stress levels. "
                "This may indicate significant strain or anxiety. "
                "It may be helpful to consult a healthcare professional for support. "
                "Promptly addressing stress is important for your well-being. "
                "Consider reaching out to trusted individuals or resources."
            )
        }

        final_interpretation = interpretations[stress_category]

        # Plotting
        fig, axs = plt.subplots(5, 1, figsize=(10, 15))

        # Plot Fundamental Frequency (Pitch)
        axs[0].plot(f0)
        axs[0].set_title('Fundamental Frequency (Pitch)')
        axs[0].set_ylabel('Frequency (Hz)')

        # Plot Energy (Loudness)
        axs[1].plot(energy)
        axs[1].set_title('Energy (Loudness)')
        axs[1].set_ylabel('Energy')

        # Plot MFCCs
        img = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[2])
        axs[2].set_title('MFCCs (Mel-frequency cepstral coefficients)')
        fig.colorbar(img, ax=axs[2])

        # Plot Waveform
        librosa.display.waveshow(y, sr=sr, ax=axs[3])
        axs[3].set_title('Waveform')
        axs[3].set_xlabel('Time (s)')
        axs[3].set_ylabel('Amplitude')

        # Plot Pitch Contour (Histogram of f0)
        axs[4].hist(f0, bins=50, color='blue', alpha=0.7)
        axs[4].set_title('Pitch Contour (Histogram of f0)')
        axs[4].set_xlabel('Frequency (Hz)')
        axs[4].set_ylabel('Count')

        plt.tight_layout()
        with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file:
            plt.savefig(temp_file.name)
            plot_path = temp_file.name
        plt.close()

        # Return separate values for Gradio output components
        return f"{stress_level:.2f}% - {stress_category}", final_interpretation, plot_path

    except Exception as e:
        return f"Error: {str(e)}", None, None


def create_voice_stress_tab():
    custom_css = """
    /* General container styling for mobile */
    .gradio-container {
        padding: 10px !important;
        font-size: 16px !important;
    }

    /* Headings */
    h3 {
        text-align: center;
        font-size: 1.5em !important;
        margin-bottom: 20px !important;
    }

    /* Full width for audio input and other components */
    .gradio-container .gradio-row, .gradio-container .gradio-column {
        flex-direction: column !important;
        align-items: center !important;
    }

    /* Make the components scale better on smaller screens */
    #input_audio, #stress_output, #interpretation_output, #plot_output {
        width: 100% !important;
        max-width: 100% !important;
    }

    #input_audio label, #stress_output label, #interpretation_output label, #plot_output label {
        font-size: 1.2em !important;
    }

    /* Textbox area adjustment */
    #interpretation_output textarea {
        font-size: 1em !important;
        line-height: 1.4 !important;
    }

    /* Responsive styling for images */
    #plot_output img {
        width: 100% !important;
        height: auto !important;
    }

    /* Adjust clear button */
    #clear_btn button {
        font-size: 1em !important;
        padding: 10px 20px !important;
    }

    /* Responsive adjustments */
    @media only screen and (max-width: 600px) {
        .gradio-container {
            padding: 5px !important;
            font-size: 14px !important;
        }
        h3 {
            font-size: 1.2em !important;
        }
        #clear_btn button {
            font-size: 0.9em !important;
        }
        #interpretation_output textarea {
            font-size: 0.9em !important;
        }
    }
    """

    with gr.Blocks(css=custom_css) as voice_stress_tab:
        gr.Markdown("<h3>Speech Stress Analysis</h3>")
    
        with gr.Column():
            input_audio = gr.Audio(label="Upload your voice recording", type="filepath", elem_id="input_audio")
            stress_output = gr.Label(label="Stress Interpretation", elem_id="stress_output")
            interpretation_output = gr.Textbox(label="Detailed Interpretation", lines=6, elem_id="interpretation_output")
            plot_output = gr.Image(label="Stress Analysis Plot", elem_id="plot_output")

            # Examples section
            gr.Examples(
                examples=["./assets/audio/fitness.wav"],
                inputs=[input_audio],
                label="Examples"
            )

            # Analyze stress when audio input changes
            input_audio.change(
                analyze_voice_stress, 
                inputs=[input_audio], 
                outputs=[stress_output, interpretation_output, plot_output]
            )

            # Clear button to reset outputs
            gr.Button("Clear", elem_id="clear_btn").click(
                lambda: (None, None, None, None),
                outputs=[input_audio, stress_output, interpretation_output, plot_output]
            )

    return voice_stress_tab