"""Legacy compatibility functions for STT functionality.""" import logging from pathlib import Path from typing import Union from .provider_factory import STTProviderFactory from ...domain.models.audio_content import AudioContent from ...domain.exceptions import SpeechRecognitionException logger = logging.getLogger(__name__) def transcribe_audio(audio_path: Union[str, Path], model_name: str = "parakeet") -> str: """ Convert audio file to text using specified STT model (legacy interface). This function maintains backward compatibility with the original utils/stt.py interface. Args: audio_path: Path to input audio file model_name: Name of the STT model/provider to use (whisper or parakeet) Returns: str: Transcribed English text Raises: SpeechRecognitionException: If transcription fails """ logger.info(f"Starting transcription for: {audio_path} using {model_name} model") try: # Convert path to Path object audio_path = Path(audio_path) if not audio_path.exists(): raise SpeechRecognitionException(f"Audio file not found: {audio_path}") # Read audio file and create AudioContent with open(audio_path, 'rb') as f: audio_data = f.read() # Determine audio format from file extension audio_format = audio_path.suffix.lower().lstrip('.') if audio_format not in ['wav', 'mp3', 'flac', 'ogg']: audio_format = 'wav' # Default fallback # Create AudioContent (we'll use reasonable placeholder values) # The provider will handle the actual audio analysis during preprocessing try: audio_content = AudioContent( data=audio_data, format=audio_format, sample_rate=16000, # Standard rate for STT duration=max(1.0, len(audio_data) / (16000 * 2)), # Rough estimate filename=audio_path.name ) except ValueError: # If validation fails, try with minimal valid values audio_content = AudioContent( data=audio_data, format=audio_format, sample_rate=16000, duration=1.0, # Minimum valid duration filename=audio_path.name ) # Get the appropriate provider try: provider = STTProviderFactory.create_provider(model_name) except SpeechRecognitionException: # Fallback to any available provider logger.warning(f"Requested provider {model_name} not available, using fallback") provider = STTProviderFactory.create_provider_with_fallback(model_name) # Get the default model for the provider model = provider.get_default_model() # Transcribe audio text_content = provider.transcribe(audio_content, model) result = text_content.text logger.info(f"Transcription completed: {result}") return result except Exception as e: logger.error(f"Transcription failed: {str(e)}", exc_info=True) raise SpeechRecognitionException(f"Transcription failed: {str(e)}") from e def create_audio_content_from_file(audio_path: Union[str, Path]) -> AudioContent: """ Create AudioContent from an audio file with proper metadata detection. Args: audio_path: Path to the audio file Returns: AudioContent: The audio content object Raises: SpeechRecognitionException: If file cannot be processed """ try: from pydub import AudioSegment audio_path = Path(audio_path) # Load audio file to get metadata audio_segment = AudioSegment.from_file(audio_path) # Read raw audio data with open(audio_path, 'rb') as f: audio_data = f.read() # Determine format audio_format = audio_path.suffix.lower().lstrip('.') if audio_format not in ['wav', 'mp3', 'flac', 'ogg']: audio_format = 'wav' # Create AudioContent with actual metadata return AudioContent( data=audio_data, format=audio_format, sample_rate=audio_segment.frame_rate, duration=len(audio_segment) / 1000.0, # Convert ms to seconds filename=audio_path.name ) except ImportError: # Fallback without pydub logger.warning("pydub not available, using placeholder metadata") with open(audio_path, 'rb') as f: audio_data = f.read() audio_format = Path(audio_path).suffix.lower().lstrip('.') if audio_format not in ['wav', 'mp3', 'flac', 'ogg']: audio_format = 'wav' return AudioContent( data=audio_data, format=audio_format, sample_rate=16000, # Default duration=1.0, # Placeholder filename=Path(audio_path).name ) except Exception as e: raise SpeechRecognitionException(f"Failed to create AudioContent from file: {str(e)}") from e