""" Compute fluency score from audio file using SRS and PAS calculations """ import librosa import numpy as np from typing import Dict, Any, Union from .fluency import calc_srs, calculate_pas, calculate_fluency, get_fluency_insight from .filler_analyzer import detect_fillers def compute_fluency_score(file_path: str, whisper_model) -> Dict[str, Any]: """ Compute fluency score and its components from a speech sample. Args: file_path (str): Path to the audio file. whisper_model: Transcription model (e.g., OpenAI Whisper or faster-whisper) Returns: dict: A dictionary containing fluency score, SRS, PAS, and component scores. """ # Transcribe audio result = whisper_model.transcribe(file_path) transcript = result.get("text", "").strip() segments = result.get("segments", []) # Validate early if not transcript or not segments: raise ValueError("Empty transcript or segments from Whisper.") # Detect filler words filler_count, _ = detect_fillers(transcript) # Load audio y, sr = librosa.load(file_path, sr=None) duration = len(y) / sr if sr else 0.0 if duration <= 0: raise ValueError("Audio duration invalid or zero.") # Calculate pitch variation (in semitones) f0, voiced_flags, voiced_probs = librosa.pyin( y, sr=sr, fmin=80, fmax=400, frame_length=1024, hop_length=256, fill_na=np.nan) voiced_f0 = f0[~np.isnan(f0)] pitch_variation = 0.0 if voiced_f0.size > 0: median_f0 = np.nanmedian(voiced_f0) median_f0 = max(median_f0, 1e-6) semitone_diffs = 12 * np.log2(voiced_f0 / median_f0) pitch_variation = float(np.nanstd(semitone_diffs)) # Analyze pauses long_pause_count = 0 if segments: for i in range(len(segments) - 1): pause_dur = segments[i + 1]["start"] - segments[i]["end"] if pause_dur > 1.0: long_pause_count += 1 # Check beginning and end pauses if segments[0]["start"] > 1.0: long_pause_count += 1 if duration - segments[-1]["end"] > 1.0: long_pause_count += 1 # Calculate WPM word_count = len(transcript.split()) words_per_min = (word_count / duration) * 60.0 if duration > 0 else 0.0 # Calculate SRS - Speech Rate Stability srs_score = calc_srs( wpm=words_per_min, filler_count=filler_count, long_pause_count=long_pause_count, pitch_variation=pitch_variation ) # Calculate PAS - Pause Appropriateness Score pas_result = calculate_pas( transcript=transcript, segments=segments, filler_count=filler_count, duration=duration ) pas_score = pas_result["PAS"] # Calculate final fluency score fluency_result = calculate_fluency(srs=srs_score, pas=pas_score) fluency_score = fluency_result["score"] insight = get_fluency_insight(fluency_score) # Build and return comprehensive result return { "fluency_score": fluency_score, "insight": insight, "SRS": srs_score, "PAS": pas_score, "components": { "wpm": words_per_min, "filler_count": filler_count, "long_pause_count": long_pause_count, "pitch_variation": pitch_variation, "word_count": word_count, "duration": duration, "pas_components": pas_result }, "transcript": transcript }