Spaces:
Sleeping
Sleeping
Michael Hu
commited on
Commit
Β·
1be582a
1
Parent(s):
1f9c751
Migrate existing STT providers to infrastructure layer
Browse files- src/infrastructure/base/stt_provider_base.py +10 -0
- src/infrastructure/stt/__init__.py +15 -0
- src/infrastructure/stt/legacy_compatibility.py +150 -0
- src/infrastructure/stt/parakeet_provider.py +122 -0
- src/infrastructure/stt/provider_factory.py +189 -0
- src/infrastructure/stt/whisper_provider.py +154 -0
- test_stt_migration.py +121 -0
src/infrastructure/base/stt_provider_base.py
CHANGED
|
@@ -114,6 +114,16 @@ class STTProviderBase(ISpeechRecognitionService, ABC):
|
|
| 114 |
"""
|
| 115 |
pass
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
def _preprocess_audio(self, audio: 'AudioContent') -> Path:
|
| 118 |
"""
|
| 119 |
Preprocess audio content for transcription.
|
|
|
|
| 114 |
"""
|
| 115 |
pass
|
| 116 |
|
| 117 |
+
@abstractmethod
|
| 118 |
+
def get_default_model(self) -> str:
|
| 119 |
+
"""
|
| 120 |
+
Get the default model for this provider.
|
| 121 |
+
|
| 122 |
+
Returns:
|
| 123 |
+
str: Default model name
|
| 124 |
+
"""
|
| 125 |
+
pass
|
| 126 |
+
|
| 127 |
def _preprocess_audio(self, audio: 'AudioContent') -> Path:
|
| 128 |
"""
|
| 129 |
Preprocess audio content for transcription.
|
src/infrastructure/stt/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""STT provider implementations."""
|
| 2 |
+
|
| 3 |
+
from .whisper_provider import WhisperSTTProvider
|
| 4 |
+
from .parakeet_provider import ParakeetSTTProvider
|
| 5 |
+
from .provider_factory import STTProviderFactory, ASRFactory
|
| 6 |
+
from .legacy_compatibility import transcribe_audio, create_audio_content_from_file
|
| 7 |
+
|
| 8 |
+
__all__ = [
|
| 9 |
+
'WhisperSTTProvider',
|
| 10 |
+
'ParakeetSTTProvider',
|
| 11 |
+
'STTProviderFactory',
|
| 12 |
+
'ASRFactory',
|
| 13 |
+
'transcribe_audio',
|
| 14 |
+
'create_audio_content_from_file'
|
| 15 |
+
]
|
src/infrastructure/stt/legacy_compatibility.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Legacy compatibility functions for STT functionality."""
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Union
|
| 6 |
+
|
| 7 |
+
from .provider_factory import STTProviderFactory
|
| 8 |
+
from ...domain.models.audio_content import AudioContent
|
| 9 |
+
from ...domain.exceptions import SpeechRecognitionException
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def transcribe_audio(audio_path: Union[str, Path], model_name: str = "parakeet") -> str:
|
| 15 |
+
"""
|
| 16 |
+
Convert audio file to text using specified STT model (legacy interface).
|
| 17 |
+
|
| 18 |
+
This function maintains backward compatibility with the original utils/stt.py interface.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
audio_path: Path to input audio file
|
| 22 |
+
model_name: Name of the STT model/provider to use (whisper or parakeet)
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
str: Transcribed English text
|
| 26 |
+
|
| 27 |
+
Raises:
|
| 28 |
+
SpeechRecognitionException: If transcription fails
|
| 29 |
+
"""
|
| 30 |
+
logger.info(f"Starting transcription for: {audio_path} using {model_name} model")
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
# Convert path to Path object
|
| 34 |
+
audio_path = Path(audio_path)
|
| 35 |
+
|
| 36 |
+
if not audio_path.exists():
|
| 37 |
+
raise SpeechRecognitionException(f"Audio file not found: {audio_path}")
|
| 38 |
+
|
| 39 |
+
# Read audio file and create AudioContent
|
| 40 |
+
with open(audio_path, 'rb') as f:
|
| 41 |
+
audio_data = f.read()
|
| 42 |
+
|
| 43 |
+
# Determine audio format from file extension
|
| 44 |
+
audio_format = audio_path.suffix.lower().lstrip('.')
|
| 45 |
+
if audio_format not in ['wav', 'mp3', 'flac', 'ogg']:
|
| 46 |
+
audio_format = 'wav' # Default fallback
|
| 47 |
+
|
| 48 |
+
# Create AudioContent (we'll use reasonable placeholder values)
|
| 49 |
+
# The provider will handle the actual audio analysis during preprocessing
|
| 50 |
+
try:
|
| 51 |
+
audio_content = AudioContent(
|
| 52 |
+
data=audio_data,
|
| 53 |
+
format=audio_format,
|
| 54 |
+
sample_rate=16000, # Standard rate for STT
|
| 55 |
+
duration=max(1.0, len(audio_data) / (16000 * 2)), # Rough estimate
|
| 56 |
+
filename=audio_path.name
|
| 57 |
+
)
|
| 58 |
+
except ValueError:
|
| 59 |
+
# If validation fails, try with minimal valid values
|
| 60 |
+
audio_content = AudioContent(
|
| 61 |
+
data=audio_data,
|
| 62 |
+
format=audio_format,
|
| 63 |
+
sample_rate=16000,
|
| 64 |
+
duration=1.0, # Minimum valid duration
|
| 65 |
+
filename=audio_path.name
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
# Get the appropriate provider
|
| 69 |
+
try:
|
| 70 |
+
provider = STTProviderFactory.create_provider(model_name)
|
| 71 |
+
except SpeechRecognitionException:
|
| 72 |
+
# Fallback to any available provider
|
| 73 |
+
logger.warning(f"Requested provider {model_name} not available, using fallback")
|
| 74 |
+
provider = STTProviderFactory.create_provider_with_fallback(model_name)
|
| 75 |
+
|
| 76 |
+
# Get the default model for the provider
|
| 77 |
+
model = provider.get_default_model()
|
| 78 |
+
|
| 79 |
+
# Transcribe audio
|
| 80 |
+
text_content = provider.transcribe(audio_content, model)
|
| 81 |
+
result = text_content.text
|
| 82 |
+
|
| 83 |
+
logger.info(f"Transcription completed: {result}")
|
| 84 |
+
return result
|
| 85 |
+
|
| 86 |
+
except Exception as e:
|
| 87 |
+
logger.error(f"Transcription failed: {str(e)}", exc_info=True)
|
| 88 |
+
raise SpeechRecognitionException(f"Transcription failed: {str(e)}") from e
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def create_audio_content_from_file(audio_path: Union[str, Path]) -> AudioContent:
|
| 92 |
+
"""
|
| 93 |
+
Create AudioContent from an audio file with proper metadata detection.
|
| 94 |
+
|
| 95 |
+
Args:
|
| 96 |
+
audio_path: Path to the audio file
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
AudioContent: The audio content object
|
| 100 |
+
|
| 101 |
+
Raises:
|
| 102 |
+
SpeechRecognitionException: If file cannot be processed
|
| 103 |
+
"""
|
| 104 |
+
try:
|
| 105 |
+
from pydub import AudioSegment
|
| 106 |
+
|
| 107 |
+
audio_path = Path(audio_path)
|
| 108 |
+
|
| 109 |
+
# Load audio file to get metadata
|
| 110 |
+
audio_segment = AudioSegment.from_file(audio_path)
|
| 111 |
+
|
| 112 |
+
# Read raw audio data
|
| 113 |
+
with open(audio_path, 'rb') as f:
|
| 114 |
+
audio_data = f.read()
|
| 115 |
+
|
| 116 |
+
# Determine format
|
| 117 |
+
audio_format = audio_path.suffix.lower().lstrip('.')
|
| 118 |
+
if audio_format not in ['wav', 'mp3', 'flac', 'ogg']:
|
| 119 |
+
audio_format = 'wav'
|
| 120 |
+
|
| 121 |
+
# Create AudioContent with actual metadata
|
| 122 |
+
return AudioContent(
|
| 123 |
+
data=audio_data,
|
| 124 |
+
format=audio_format,
|
| 125 |
+
sample_rate=audio_segment.frame_rate,
|
| 126 |
+
duration=len(audio_segment) / 1000.0, # Convert ms to seconds
|
| 127 |
+
filename=audio_path.name
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
except ImportError:
|
| 131 |
+
# Fallback without pydub
|
| 132 |
+
logger.warning("pydub not available, using placeholder metadata")
|
| 133 |
+
|
| 134 |
+
with open(audio_path, 'rb') as f:
|
| 135 |
+
audio_data = f.read()
|
| 136 |
+
|
| 137 |
+
audio_format = Path(audio_path).suffix.lower().lstrip('.')
|
| 138 |
+
if audio_format not in ['wav', 'mp3', 'flac', 'ogg']:
|
| 139 |
+
audio_format = 'wav'
|
| 140 |
+
|
| 141 |
+
return AudioContent(
|
| 142 |
+
data=audio_data,
|
| 143 |
+
format=audio_format,
|
| 144 |
+
sample_rate=16000, # Default
|
| 145 |
+
duration=1.0, # Placeholder
|
| 146 |
+
filename=Path(audio_path).name
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
except Exception as e:
|
| 150 |
+
raise SpeechRecognitionException(f"Failed to create AudioContent from file: {str(e)}") from e
|
src/infrastructure/stt/parakeet_provider.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Parakeet STT provider implementation."""
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import TYPE_CHECKING
|
| 6 |
+
|
| 7 |
+
if TYPE_CHECKING:
|
| 8 |
+
from ...domain.models.audio_content import AudioContent
|
| 9 |
+
from ...domain.models.text_content import TextContent
|
| 10 |
+
|
| 11 |
+
from ..base.stt_provider_base import STTProviderBase
|
| 12 |
+
from ...domain.exceptions import SpeechRecognitionException
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class ParakeetSTTProvider(STTProviderBase):
|
| 18 |
+
"""Parakeet STT provider using NVIDIA NeMo implementation."""
|
| 19 |
+
|
| 20 |
+
def __init__(self):
|
| 21 |
+
"""Initialize the Parakeet STT provider."""
|
| 22 |
+
super().__init__(
|
| 23 |
+
provider_name="Parakeet",
|
| 24 |
+
supported_languages=["en"] # Parakeet primarily supports English
|
| 25 |
+
)
|
| 26 |
+
self.model = None
|
| 27 |
+
|
| 28 |
+
def _perform_transcription(self, audio_path: Path, model: str) -> str:
|
| 29 |
+
"""
|
| 30 |
+
Perform transcription using Parakeet.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
audio_path: Path to the preprocessed audio file
|
| 34 |
+
model: The Parakeet model to use
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
str: The transcribed text
|
| 38 |
+
"""
|
| 39 |
+
try:
|
| 40 |
+
# Load model if not already loaded
|
| 41 |
+
if self.model is None:
|
| 42 |
+
self._load_model(model)
|
| 43 |
+
|
| 44 |
+
logger.info(f"Starting Parakeet transcription with model {model}")
|
| 45 |
+
|
| 46 |
+
# Perform transcription
|
| 47 |
+
output = self.model.transcribe([str(audio_path)])
|
| 48 |
+
result = output[0].text if output and len(output) > 0 else ""
|
| 49 |
+
|
| 50 |
+
logger.info("Parakeet transcription completed successfully")
|
| 51 |
+
return result
|
| 52 |
+
|
| 53 |
+
except Exception as e:
|
| 54 |
+
self._handle_provider_error(e, "transcription")
|
| 55 |
+
|
| 56 |
+
def _load_model(self, model_name: str):
|
| 57 |
+
"""
|
| 58 |
+
Load the Parakeet model.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
model_name: Name of the model to load
|
| 62 |
+
"""
|
| 63 |
+
try:
|
| 64 |
+
import nemo.collections.asr as nemo_asr
|
| 65 |
+
|
| 66 |
+
logger.info(f"Loading Parakeet model: {model_name}")
|
| 67 |
+
|
| 68 |
+
# Map model names to actual model identifiers
|
| 69 |
+
model_mapping = {
|
| 70 |
+
"parakeet-tdt-0.6b-v2": "nvidia/parakeet-tdt-0.6b-v2",
|
| 71 |
+
"parakeet-tdt-1.1b": "nvidia/parakeet-tdt-1.1b",
|
| 72 |
+
"parakeet-ctc-0.6b": "nvidia/parakeet-ctc-0.6b",
|
| 73 |
+
"default": "nvidia/parakeet-tdt-0.6b-v2"
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
actual_model_name = model_mapping.get(model_name, model_mapping["default"])
|
| 77 |
+
|
| 78 |
+
self.model = nemo_asr.models.ASRModel.from_pretrained(model_name=actual_model_name)
|
| 79 |
+
logger.info(f"Parakeet model {model_name} loaded successfully")
|
| 80 |
+
|
| 81 |
+
except ImportError as e:
|
| 82 |
+
raise SpeechRecognitionException(
|
| 83 |
+
"nemo_toolkit not available. Please install with: pip install -U 'nemo_toolkit[asr]'"
|
| 84 |
+
) from e
|
| 85 |
+
except Exception as e:
|
| 86 |
+
raise SpeechRecognitionException(f"Failed to load Parakeet model {model_name}: {str(e)}") from e
|
| 87 |
+
|
| 88 |
+
def is_available(self) -> bool:
|
| 89 |
+
"""
|
| 90 |
+
Check if the Parakeet provider is available.
|
| 91 |
+
|
| 92 |
+
Returns:
|
| 93 |
+
bool: True if nemo_toolkit is available, False otherwise
|
| 94 |
+
"""
|
| 95 |
+
try:
|
| 96 |
+
import nemo.collections.asr
|
| 97 |
+
return True
|
| 98 |
+
except ImportError:
|
| 99 |
+
logger.warning("nemo_toolkit not available")
|
| 100 |
+
return False
|
| 101 |
+
|
| 102 |
+
def get_available_models(self) -> list[str]:
|
| 103 |
+
"""
|
| 104 |
+
Get list of available Parakeet models.
|
| 105 |
+
|
| 106 |
+
Returns:
|
| 107 |
+
list[str]: List of available model names
|
| 108 |
+
"""
|
| 109 |
+
return [
|
| 110 |
+
"parakeet-tdt-0.6b-v2",
|
| 111 |
+
"parakeet-tdt-1.1b",
|
| 112 |
+
"parakeet-ctc-0.6b"
|
| 113 |
+
]
|
| 114 |
+
|
| 115 |
+
def get_default_model(self) -> str:
|
| 116 |
+
"""
|
| 117 |
+
Get the default model for this provider.
|
| 118 |
+
|
| 119 |
+
Returns:
|
| 120 |
+
str: Default model name
|
| 121 |
+
"""
|
| 122 |
+
return "parakeet-tdt-0.6b-v2"
|
src/infrastructure/stt/provider_factory.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Factory for creating STT provider instances."""
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
from typing import Dict, Type, Optional
|
| 5 |
+
|
| 6 |
+
from ..base.stt_provider_base import STTProviderBase
|
| 7 |
+
from .whisper_provider import WhisperSTTProvider
|
| 8 |
+
from .parakeet_provider import ParakeetSTTProvider
|
| 9 |
+
from ...domain.exceptions import SpeechRecognitionException
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class STTProviderFactory:
|
| 15 |
+
"""Factory for creating STT provider instances with availability checking and fallback logic."""
|
| 16 |
+
|
| 17 |
+
_providers: Dict[str, Type[STTProviderBase]] = {
|
| 18 |
+
"whisper": WhisperSTTProvider,
|
| 19 |
+
"parakeet": ParakeetSTTProvider
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
_fallback_order = ["whisper", "parakeet"]
|
| 23 |
+
|
| 24 |
+
@classmethod
|
| 25 |
+
def create_provider(cls, provider_name: str) -> STTProviderBase:
|
| 26 |
+
"""
|
| 27 |
+
Create an STT provider instance by name.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
provider_name: Name of the provider to create
|
| 31 |
+
|
| 32 |
+
Returns:
|
| 33 |
+
STTProviderBase: The created provider instance
|
| 34 |
+
|
| 35 |
+
Raises:
|
| 36 |
+
SpeechRecognitionException: If provider is not available or creation fails
|
| 37 |
+
"""
|
| 38 |
+
provider_name = provider_name.lower()
|
| 39 |
+
|
| 40 |
+
if provider_name not in cls._providers:
|
| 41 |
+
raise SpeechRecognitionException(f"Unknown STT provider: {provider_name}")
|
| 42 |
+
|
| 43 |
+
provider_class = cls._providers[provider_name]
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
provider = provider_class()
|
| 47 |
+
|
| 48 |
+
if not provider.is_available():
|
| 49 |
+
raise SpeechRecognitionException(f"STT provider {provider_name} is not available")
|
| 50 |
+
|
| 51 |
+
logger.info(f"Created STT provider: {provider_name}")
|
| 52 |
+
return provider
|
| 53 |
+
|
| 54 |
+
except Exception as e:
|
| 55 |
+
logger.error(f"Failed to create STT provider {provider_name}: {str(e)}")
|
| 56 |
+
raise SpeechRecognitionException(f"Failed to create STT provider {provider_name}: {str(e)}") from e
|
| 57 |
+
|
| 58 |
+
@classmethod
|
| 59 |
+
def create_provider_with_fallback(cls, preferred_provider: str) -> STTProviderBase:
|
| 60 |
+
"""
|
| 61 |
+
Create an STT provider with fallback to other available providers.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
preferred_provider: The preferred provider name
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
STTProviderBase: The created provider instance
|
| 68 |
+
|
| 69 |
+
Raises:
|
| 70 |
+
SpeechRecognitionException: If no providers are available
|
| 71 |
+
"""
|
| 72 |
+
# Try preferred provider first
|
| 73 |
+
try:
|
| 74 |
+
return cls.create_provider(preferred_provider)
|
| 75 |
+
except SpeechRecognitionException as e:
|
| 76 |
+
logger.warning(f"Preferred STT provider {preferred_provider} failed: {str(e)}")
|
| 77 |
+
|
| 78 |
+
# Try fallback providers
|
| 79 |
+
for provider_name in cls._fallback_order:
|
| 80 |
+
if provider_name.lower() == preferred_provider.lower():
|
| 81 |
+
continue # Skip the preferred provider we already tried
|
| 82 |
+
|
| 83 |
+
try:
|
| 84 |
+
logger.info(f"Trying fallback STT provider: {provider_name}")
|
| 85 |
+
return cls.create_provider(provider_name)
|
| 86 |
+
except SpeechRecognitionException as e:
|
| 87 |
+
logger.warning(f"Fallback STT provider {provider_name} failed: {str(e)}")
|
| 88 |
+
continue
|
| 89 |
+
|
| 90 |
+
raise SpeechRecognitionException("No STT providers are available")
|
| 91 |
+
|
| 92 |
+
@classmethod
|
| 93 |
+
def get_available_providers(cls) -> list[str]:
|
| 94 |
+
"""
|
| 95 |
+
Get list of available STT providers.
|
| 96 |
+
|
| 97 |
+
Returns:
|
| 98 |
+
list[str]: List of available provider names
|
| 99 |
+
"""
|
| 100 |
+
available = []
|
| 101 |
+
|
| 102 |
+
for provider_name, provider_class in cls._providers.items():
|
| 103 |
+
try:
|
| 104 |
+
provider = provider_class()
|
| 105 |
+
if provider.is_available():
|
| 106 |
+
available.append(provider_name)
|
| 107 |
+
except Exception as e:
|
| 108 |
+
logger.debug(f"Provider {provider_name} not available: {str(e)}")
|
| 109 |
+
|
| 110 |
+
return available
|
| 111 |
+
|
| 112 |
+
@classmethod
|
| 113 |
+
def get_provider_info(cls, provider_name: str) -> Optional[dict]:
|
| 114 |
+
"""
|
| 115 |
+
Get information about a specific provider.
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
provider_name: Name of the provider
|
| 119 |
+
|
| 120 |
+
Returns:
|
| 121 |
+
Optional[dict]: Provider information or None if not found
|
| 122 |
+
"""
|
| 123 |
+
provider_name = provider_name.lower()
|
| 124 |
+
|
| 125 |
+
if provider_name not in cls._providers:
|
| 126 |
+
return None
|
| 127 |
+
|
| 128 |
+
provider_class = cls._providers[provider_name]
|
| 129 |
+
|
| 130 |
+
try:
|
| 131 |
+
provider = provider_class()
|
| 132 |
+
return {
|
| 133 |
+
"name": provider.provider_name,
|
| 134 |
+
"available": provider.is_available(),
|
| 135 |
+
"supported_languages": provider.supported_languages,
|
| 136 |
+
"available_models": provider.get_available_models() if provider.is_available() else [],
|
| 137 |
+
"default_model": provider.get_default_model() if provider.is_available() else None
|
| 138 |
+
}
|
| 139 |
+
except Exception as e:
|
| 140 |
+
logger.debug(f"Failed to get info for provider {provider_name}: {str(e)}")
|
| 141 |
+
return {
|
| 142 |
+
"name": provider_name,
|
| 143 |
+
"available": False,
|
| 144 |
+
"error": str(e)
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
@classmethod
|
| 148 |
+
def register_provider(cls, name: str, provider_class: Type[STTProviderBase]) -> None:
|
| 149 |
+
"""
|
| 150 |
+
Register a new STT provider.
|
| 151 |
+
|
| 152 |
+
Args:
|
| 153 |
+
name: Name of the provider
|
| 154 |
+
provider_class: The provider class
|
| 155 |
+
"""
|
| 156 |
+
cls._providers[name.lower()] = provider_class
|
| 157 |
+
logger.info(f"Registered STT provider: {name}")
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
# Legacy compatibility - create an ASRFactory alias
|
| 161 |
+
class ASRFactory:
|
| 162 |
+
"""Legacy ASRFactory for backward compatibility."""
|
| 163 |
+
|
| 164 |
+
@staticmethod
|
| 165 |
+
def get_model(model_name: str = "parakeet") -> STTProviderBase:
|
| 166 |
+
"""
|
| 167 |
+
Get STT provider by model name (legacy interface).
|
| 168 |
+
|
| 169 |
+
Args:
|
| 170 |
+
model_name: Name of the model/provider to use
|
| 171 |
+
|
| 172 |
+
Returns:
|
| 173 |
+
STTProviderBase: The provider instance
|
| 174 |
+
"""
|
| 175 |
+
# Map legacy model names to provider names
|
| 176 |
+
provider_mapping = {
|
| 177 |
+
"whisper": "whisper",
|
| 178 |
+
"parakeet": "parakeet",
|
| 179 |
+
"faster-whisper": "whisper"
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
provider_name = provider_mapping.get(model_name.lower(), model_name.lower())
|
| 183 |
+
|
| 184 |
+
try:
|
| 185 |
+
return STTProviderFactory.create_provider(provider_name)
|
| 186 |
+
except SpeechRecognitionException:
|
| 187 |
+
# Fallback to any available provider
|
| 188 |
+
logger.warning(f"Requested provider {provider_name} not available, using fallback")
|
| 189 |
+
return STTProviderFactory.create_provider_with_fallback(provider_name)
|
src/infrastructure/stt/whisper_provider.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Whisper STT provider implementation."""
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import TYPE_CHECKING
|
| 6 |
+
|
| 7 |
+
if TYPE_CHECKING:
|
| 8 |
+
from ...domain.models.audio_content import AudioContent
|
| 9 |
+
from ...domain.models.text_content import TextContent
|
| 10 |
+
|
| 11 |
+
from ..base.stt_provider_base import STTProviderBase
|
| 12 |
+
from ...domain.exceptions import SpeechRecognitionException
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class WhisperSTTProvider(STTProviderBase):
|
| 18 |
+
"""Whisper STT provider using faster-whisper implementation."""
|
| 19 |
+
|
| 20 |
+
def __init__(self):
|
| 21 |
+
"""Initialize the Whisper STT provider."""
|
| 22 |
+
super().__init__(
|
| 23 |
+
provider_name="Whisper",
|
| 24 |
+
supported_languages=["en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"]
|
| 25 |
+
)
|
| 26 |
+
self.model = None
|
| 27 |
+
self._device = None
|
| 28 |
+
self._compute_type = None
|
| 29 |
+
self._initialize_device_settings()
|
| 30 |
+
|
| 31 |
+
def _initialize_device_settings(self):
|
| 32 |
+
"""Initialize device and compute type settings."""
|
| 33 |
+
try:
|
| 34 |
+
import torch
|
| 35 |
+
self._device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 36 |
+
except ImportError:
|
| 37 |
+
# Fallback to CPU if torch is not available
|
| 38 |
+
self._device = "cpu"
|
| 39 |
+
|
| 40 |
+
self._compute_type = "float16" if self._device == "cuda" else "int8"
|
| 41 |
+
logger.info(f"Whisper provider initialized with device: {self._device}, compute_type: {self._compute_type}")
|
| 42 |
+
|
| 43 |
+
def _perform_transcription(self, audio_path: Path, model: str) -> str:
|
| 44 |
+
"""
|
| 45 |
+
Perform transcription using Faster Whisper.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
audio_path: Path to the preprocessed audio file
|
| 49 |
+
model: The Whisper model to use (e.g., 'large-v3', 'medium', 'small')
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
str: The transcribed text
|
| 53 |
+
"""
|
| 54 |
+
try:
|
| 55 |
+
# Load model if not already loaded or if model changed
|
| 56 |
+
if self.model is None or getattr(self.model, 'model_size_or_path', None) != model:
|
| 57 |
+
self._load_model(model)
|
| 58 |
+
|
| 59 |
+
logger.info(f"Starting Whisper transcription with model {model}")
|
| 60 |
+
|
| 61 |
+
# Perform transcription
|
| 62 |
+
segments, info = self.model.transcribe(
|
| 63 |
+
str(audio_path),
|
| 64 |
+
beam_size=5,
|
| 65 |
+
language="en", # Can be made configurable
|
| 66 |
+
task="transcribe"
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
logger.info(f"Detected language '{info.language}' with probability {info.language_probability}")
|
| 70 |
+
|
| 71 |
+
# Collect all segments into a single text
|
| 72 |
+
result_text = ""
|
| 73 |
+
for segment in segments:
|
| 74 |
+
result_text += segment.text + " "
|
| 75 |
+
logger.debug(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
|
| 76 |
+
|
| 77 |
+
result = result_text.strip()
|
| 78 |
+
logger.info("Whisper transcription completed successfully")
|
| 79 |
+
return result
|
| 80 |
+
|
| 81 |
+
except Exception as e:
|
| 82 |
+
self._handle_provider_error(e, "transcription")
|
| 83 |
+
|
| 84 |
+
def _load_model(self, model_name: str):
|
| 85 |
+
"""
|
| 86 |
+
Load the Whisper model.
|
| 87 |
+
|
| 88 |
+
Args:
|
| 89 |
+
model_name: Name of the model to load
|
| 90 |
+
"""
|
| 91 |
+
try:
|
| 92 |
+
from faster_whisper import WhisperModel as FasterWhisperModel
|
| 93 |
+
|
| 94 |
+
logger.info(f"Loading Whisper model: {model_name}")
|
| 95 |
+
logger.info(f"Using device: {self._device}, compute_type: {self._compute_type}")
|
| 96 |
+
|
| 97 |
+
self.model = FasterWhisperModel(
|
| 98 |
+
model_name,
|
| 99 |
+
device=self._device,
|
| 100 |
+
compute_type=self._compute_type
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
logger.info(f"Whisper model {model_name} loaded successfully")
|
| 104 |
+
|
| 105 |
+
except ImportError as e:
|
| 106 |
+
raise SpeechRecognitionException(
|
| 107 |
+
"faster-whisper not available. Please install with: pip install faster-whisper"
|
| 108 |
+
) from e
|
| 109 |
+
except Exception as e:
|
| 110 |
+
raise SpeechRecognitionException(f"Failed to load Whisper model {model_name}: {str(e)}") from e
|
| 111 |
+
|
| 112 |
+
def is_available(self) -> bool:
|
| 113 |
+
"""
|
| 114 |
+
Check if the Whisper provider is available.
|
| 115 |
+
|
| 116 |
+
Returns:
|
| 117 |
+
bool: True if faster-whisper is available, False otherwise
|
| 118 |
+
"""
|
| 119 |
+
try:
|
| 120 |
+
import faster_whisper
|
| 121 |
+
return True
|
| 122 |
+
except ImportError:
|
| 123 |
+
logger.warning("faster-whisper not available")
|
| 124 |
+
return False
|
| 125 |
+
|
| 126 |
+
def get_available_models(self) -> list[str]:
|
| 127 |
+
"""
|
| 128 |
+
Get list of available Whisper models.
|
| 129 |
+
|
| 130 |
+
Returns:
|
| 131 |
+
list[str]: List of available model names
|
| 132 |
+
"""
|
| 133 |
+
return [
|
| 134 |
+
"tiny",
|
| 135 |
+
"tiny.en",
|
| 136 |
+
"base",
|
| 137 |
+
"base.en",
|
| 138 |
+
"small",
|
| 139 |
+
"small.en",
|
| 140 |
+
"medium",
|
| 141 |
+
"medium.en",
|
| 142 |
+
"large-v1",
|
| 143 |
+
"large-v2",
|
| 144 |
+
"large-v3"
|
| 145 |
+
]
|
| 146 |
+
|
| 147 |
+
def get_default_model(self) -> str:
|
| 148 |
+
"""
|
| 149 |
+
Get the default model for this provider.
|
| 150 |
+
|
| 151 |
+
Returns:
|
| 152 |
+
str: Default model name
|
| 153 |
+
"""
|
| 154 |
+
return "large-v3"
|
test_stt_migration.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Test script for STT migration."""
|
| 3 |
+
|
| 4 |
+
import sys
|
| 5 |
+
import logging
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
# Add src to path
|
| 9 |
+
sys.path.insert(0, str(Path(__file__).parent / "src"))
|
| 10 |
+
|
| 11 |
+
# Configure logging
|
| 12 |
+
logging.basicConfig(level=logging.INFO)
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
def test_provider_availability():
|
| 16 |
+
"""Test that providers can be imported and checked for availability."""
|
| 17 |
+
try:
|
| 18 |
+
from infrastructure.stt import STTProviderFactory, WhisperSTTProvider, ParakeetSTTProvider
|
| 19 |
+
|
| 20 |
+
print("β Successfully imported STT providers")
|
| 21 |
+
|
| 22 |
+
# Test factory
|
| 23 |
+
available_providers = STTProviderFactory.get_available_providers()
|
| 24 |
+
print(f"Available providers: {available_providers}")
|
| 25 |
+
|
| 26 |
+
# Test individual providers
|
| 27 |
+
whisper = WhisperSTTProvider()
|
| 28 |
+
print(f"Whisper available: {whisper.is_available()}")
|
| 29 |
+
print(f"Whisper models: {whisper.get_available_models()}")
|
| 30 |
+
print(f"Whisper default model: {whisper.get_default_model()}")
|
| 31 |
+
|
| 32 |
+
parakeet = ParakeetSTTProvider()
|
| 33 |
+
print(f"Parakeet available: {parakeet.is_available()}")
|
| 34 |
+
print(f"Parakeet models: {parakeet.get_available_models()}")
|
| 35 |
+
print(f"Parakeet default model: {parakeet.get_default_model()}")
|
| 36 |
+
|
| 37 |
+
return True
|
| 38 |
+
|
| 39 |
+
except Exception as e:
|
| 40 |
+
print(f"β Error testing providers: {e}")
|
| 41 |
+
import traceback
|
| 42 |
+
traceback.print_exc()
|
| 43 |
+
return False
|
| 44 |
+
|
| 45 |
+
def test_legacy_compatibility():
|
| 46 |
+
"""Test legacy compatibility functions."""
|
| 47 |
+
try:
|
| 48 |
+
from infrastructure.stt import transcribe_audio, ASRFactory
|
| 49 |
+
|
| 50 |
+
print("β Successfully imported legacy compatibility functions")
|
| 51 |
+
|
| 52 |
+
# Test ASRFactory
|
| 53 |
+
try:
|
| 54 |
+
model = ASRFactory.get_model("whisper")
|
| 55 |
+
print(f"β ASRFactory created model: {model.provider_name}")
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f"ASRFactory test failed (expected if dependencies missing): {e}")
|
| 58 |
+
|
| 59 |
+
return True
|
| 60 |
+
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"β Error testing legacy compatibility: {e}")
|
| 63 |
+
import traceback
|
| 64 |
+
traceback.print_exc()
|
| 65 |
+
return False
|
| 66 |
+
|
| 67 |
+
def test_domain_integration():
|
| 68 |
+
"""Test integration with domain models."""
|
| 69 |
+
try:
|
| 70 |
+
from domain.models.audio_content import AudioContent
|
| 71 |
+
from domain.models.text_content import TextContent
|
| 72 |
+
from domain.exceptions import SpeechRecognitionException
|
| 73 |
+
|
| 74 |
+
print("β Successfully imported domain models")
|
| 75 |
+
|
| 76 |
+
# Create test audio content
|
| 77 |
+
test_audio = AudioContent(
|
| 78 |
+
data=b"fake audio data for testing",
|
| 79 |
+
format="wav",
|
| 80 |
+
sample_rate=16000,
|
| 81 |
+
duration=1.0,
|
| 82 |
+
filename="test.wav"
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
print(f"β Created test AudioContent: {test_audio.filename}")
|
| 86 |
+
|
| 87 |
+
return True
|
| 88 |
+
|
| 89 |
+
except Exception as e:
|
| 90 |
+
print(f"β Error testing domain integration: {e}")
|
| 91 |
+
import traceback
|
| 92 |
+
traceback.print_exc()
|
| 93 |
+
return False
|
| 94 |
+
|
| 95 |
+
if __name__ == "__main__":
|
| 96 |
+
print("Testing STT migration...")
|
| 97 |
+
print("=" * 50)
|
| 98 |
+
|
| 99 |
+
tests = [
|
| 100 |
+
("Provider Availability", test_provider_availability),
|
| 101 |
+
("Legacy Compatibility", test_legacy_compatibility),
|
| 102 |
+
("Domain Integration", test_domain_integration)
|
| 103 |
+
]
|
| 104 |
+
|
| 105 |
+
results = []
|
| 106 |
+
for test_name, test_func in tests:
|
| 107 |
+
print(f"\n{test_name}:")
|
| 108 |
+
print("-" * 30)
|
| 109 |
+
result = test_func()
|
| 110 |
+
results.append((test_name, result))
|
| 111 |
+
|
| 112 |
+
print("\n" + "=" * 50)
|
| 113 |
+
print("Test Results:")
|
| 114 |
+
for test_name, result in results:
|
| 115 |
+
status = "β PASS" if result else "β FAIL"
|
| 116 |
+
print(f"{test_name}: {status}")
|
| 117 |
+
|
| 118 |
+
all_passed = all(result for _, result in results)
|
| 119 |
+
print(f"\nOverall: {'β ALL TESTS PASSED' if all_passed else 'β SOME TESTS FAILED'}")
|
| 120 |
+
|
| 121 |
+
sys.exit(0 if all_passed else 1)
|