Spaces:
Running
Running
| import gradio as gr | |
| from transformers import pipeline | |
| import librosa | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| # Load Whisper model using transformers pipeline | |
| transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en") | |
| def analyze_audio(audio): | |
| # Convert audio to text using Whisper | |
| transcription_result = transcriber(audio) | |
| transcription = transcription_result["text"] | |
| # Load audio file | |
| y, sr = librosa.load(audio, sr=None) | |
| # Extract prosodic features | |
| pitch = librosa.yin(y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7')) | |
| tempo, _ = librosa.beat.beat_track(y=y, sr=sr) | |
| # Calculate pitch variance | |
| pitch_variance = np.var(pitch) | |
| # Estimate speaking pace (syllables per second) | |
| # This is a simplified estimation based on the number of words | |
| num_syllables = len(transcription.split()) | |
| duration = librosa.get_duration(y=y, sr=sr) | |
| pace = num_syllables / duration | |
| # Plot pitch | |
| plt.figure(figsize=(10, 4)) | |
| plt.plot(pitch, label='Pitch') | |
| plt.xlabel('Time') | |
| plt.ylabel('Frequency (Hz)') | |
| plt.title('Pitch Over Time') | |
| plt.legend() | |
| pitch_plot_path = '/tmp/pitch_contour.png' | |
| plt.savefig(pitch_plot_path) | |
| plt.close() | |
| # Voice Stress Analysis (simplified example) | |
| stress_level = np.std(pitch) # Standard deviation as a simple stress indicator | |
| return transcription, tempo, pace, pitch_variance, pitch_plot_path | |
| # Create Gradio interface | |
| input_audio = gr.Audio(label="Input Audio", type="filepath") | |
| iface = gr.Interface( | |
| fn=analyze_audio, | |
| inputs=input_audio, | |
| outputs=[ | |
| gr.Textbox(label="Transcription"), | |
| gr.Number(label="Tempo (BPM)"), | |
| gr.Number(label="Speaking Pace (syllables/sec)"), | |
| gr.Number(label="Pitch Variance"), | |
| gr.Image(label="Pitch Contour Plot") | |
| ], | |
| live=True | |
| ) | |
| iface.launch(share=False) | |