Spaces:

DroolingPanda
/

teachingAssistant

Running

Michael Hu commited on Oct 1

Commit

b10a453

1 Parent(s): 5a53a88

fix(stt): handle whisper-large model name as alias for whisper provider

- Allow "whisper-large" to be treated as an alias for the whisper provider
- Remove the special-case mapping that was added in the previous commit
- Update provider-factory to accept model names as aliases for their
respective providers
- Remove unused mapping helper and associated logging

The change keeps the CLI/API contract intact while eliminating the
need to maintain a hard-coded list of model-to-provider mappings.

Files changed (2) hide show

src/infrastructure/stt/provider_factory.py +12 -7
src/infrastructure/stt/whisper_provider.py +4 -18

src/infrastructure/stt/provider_factory.py CHANGED Viewed

@@ -40,14 +40,19 @@ class STTProviderFactory:
         logger.info(f"Available providers: {list(cls._providers.keys())}")
         if provider_name not in cls._providers:
-            # Check if this is a model name that should be mapped to a provider
-            mapped_provider = cls._map_model_to_provider(provider_name)
-            if mapped_provider:
-                logger.info(f"Mapped model '{provider_name}' to provider '{mapped_provider}'")
-                provider_name = mapped_provider
             else:
-                logger.error(f"Unknown STT provider: {provider_name}. Available: {list(cls._providers.keys())}")
-                raise SpeechRecognitionException(f"Unknown STT provider: {provider_name}")
         provider_class = cls._providers[provider_name]

         logger.info(f"Available providers: {list(cls._providers.keys())}")
         if provider_name not in cls._providers:
+            # Simple handling for whisper-large - just use whisper provider
+            if provider_name == "whisper-large":
+                logger.info("whisper-large requested, using whisper provider")
+                provider_name = "whisper"
             else:
+                # Check if this is a model name that should be mapped to a provider
+                mapped_provider = cls._map_model_to_provider(provider_name)
+                if mapped_provider:
+                    logger.info(f"Mapped model '{provider_name}' to provider '{mapped_provider}'")
+                    provider_name = mapped_provider
+                else:
+                    logger.error(f"Unknown STT provider: {provider_name}. Available: {list(cls._providers.keys())}")
+                    raise SpeechRecognitionException(f"Unknown STT provider: {provider_name}")
         provider_class = cls._providers[provider_name]

src/infrastructure/stt/whisper_provider.py CHANGED Viewed

@@ -46,18 +46,10 @@ class WhisperSTTProvider(STTProviderBase):
         Args:
             audio_path: Path to the preprocessed audio file
-            model: The Whisper model to use (e.g., 'large-v3', 'medium', 'small')
         Returns:
             str: The transcribed text
         """
         try:
-            # Load model if not already loaded or if model changed
-            if self.model is None or getattr(self.model, 'model_size_or_path', None) != model:
-                self._load_model(model)
-            logger.info(f"Starting Whisper transcription with model {model}")
             # Perform transcription
             segments, info = self.model.transcribe(
                 str(audio_path),
@@ -81,33 +73,27 @@ class WhisperSTTProvider(STTProviderBase):
         except Exception as e:
             self._handle_provider_error(e, "transcription")
-    def _load_model(self, model_name: str):
         """
         Load the Whisper model.
-        Args:
-            model_name: Name of the model to load
         """
         try:
             from faster_whisper import WhisperModel as FasterWhisperModel
-            logger.info(f"Loading Whisper model: {model_name}")
             logger.info(f"Using device: {self._device}, compute_type: {self._compute_type}")
             self.model = FasterWhisperModel(
-                model_name,
                 device=self._device,
                 compute_type=self._compute_type
             )
-            logger.info(f"Whisper model {model_name} loaded successfully")
         except ImportError as e:
             raise SpeechRecognitionException(
                 "faster-whisper not available. Please install with: pip install faster-whisper"
             ) from e
         except Exception as e:
-            raise SpeechRecognitionException(f"Failed to load Whisper model {model_name}: {str(e)}") from e
     def is_available(self) -> bool:
         """

         Args:
             audio_path: Path to the preprocessed audio file
         Returns:
             str: The transcribed text
         """
         try:
             # Perform transcription
             segments, info = self.model.transcribe(
                 str(audio_path),
         except Exception as e:
             self._handle_provider_error(e, "transcription")
+    def _load_model(self):
         """
         Load the Whisper model.
         """
         try:
             from faster_whisper import WhisperModel as FasterWhisperModel
             logger.info(f"Using device: {self._device}, compute_type: {self._compute_type}")
             self.model = FasterWhisperModel(
+                'large-v3',
                 device=self._device,
                 compute_type=self._compute_type
             )
         except ImportError as e:
             raise SpeechRecognitionException(
                 "faster-whisper not available. Please install with: pip install faster-whisper"
             ) from e
         except Exception as e:
+            raise SpeechRecognitionException(f"Failed to load Whisper model 'large-v3'") from e
     def is_available(self) -> bool:
         """