Michael Hu commited on
Commit
5a53a88
Β·
1 Parent(s): 619b266

fix(stt): improve provider selection and error handling in STT service

Browse files

- Add detailed logging for provider creation and fallback behavior
- Add proper error handling when a requested provider is unavailable
- Add fallback mechanism when the requested provider is unavailable
- Add mapping for legacy model names (e.g. "whisper-large" -> "whisper")

src/infrastructure/config/dependency_container.py CHANGED
@@ -340,12 +340,25 @@ class DependencyContainer:
340
  Returns:
341
  ISpeechRecognitionService: STT provider instance
342
  """
 
343
  factory = self.resolve(STTProviderFactory)
344
 
345
  if provider_name:
346
- return factory.create_provider(provider_name)
 
 
 
 
 
 
 
 
 
 
 
347
  else:
348
  preferred_provider = self._config.stt.default_model
 
349
  return factory.create_provider_with_fallback(preferred_provider)
350
 
351
  def get_translation_provider(
 
340
  Returns:
341
  ISpeechRecognitionService: STT provider instance
342
  """
343
+ logger.info(f"🎯 Requesting STT provider: {provider_name or 'default'}")
344
  factory = self.resolve(STTProviderFactory)
345
 
346
  if provider_name:
347
+ logger.info(f"πŸ”§ Attempting to create specific STT provider: {provider_name}")
348
+ try:
349
+ provider = factory.create_provider(provider_name)
350
+ logger.info(f"βœ… Successfully created STT provider: {provider_name}")
351
+ return provider
352
+ except Exception as e:
353
+ logger.warning(f"❌ Failed to create specific STT provider {provider_name}: {e}")
354
+ logger.info("πŸ”„ Falling back to default provider selection")
355
+ # Fall back to default provider selection
356
+ preferred_providers = self._config.stt.preferred_providers
357
+ logger.info(f"πŸ“‹ Preferred providers for fallback: {preferred_providers}")
358
+ return factory.create_provider_with_fallback(preferred_providers[0] if preferred_providers else "whisper")
359
  else:
360
  preferred_provider = self._config.stt.default_model
361
+ logger.info(f"πŸ“‹ Using default provider: {preferred_provider}")
362
  return factory.create_provider_with_fallback(preferred_provider)
363
 
364
  def get_translation_provider(
src/infrastructure/stt/provider_factory.py CHANGED
@@ -34,9 +34,20 @@ class STTProviderFactory:
34
  SpeechRecognitionException: If provider is not available or creation fails
35
  """
36
  provider_name = provider_name.lower()
 
 
 
 
37
 
38
  if provider_name not in cls._providers:
39
- raise SpeechRecognitionException(f"Unknown STT provider: {provider_name}")
 
 
 
 
 
 
 
40
 
41
  provider_class = cls._providers[provider_name]
42
 
@@ -154,6 +165,49 @@ class STTProviderFactory:
154
  cls._providers[name.lower()] = provider_class
155
  logger.info(f"Registered STT provider: {name}")
156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
  # Legacy compatibility - create an ASRFactory alias
159
  class ASRFactory:
 
34
  SpeechRecognitionException: If provider is not available or creation fails
35
  """
36
  provider_name = provider_name.lower()
37
+
38
+ # Debug logging
39
+ logger.info(f"Attempting to create STT provider: '{provider_name}'")
40
+ logger.info(f"Available providers: {list(cls._providers.keys())}")
41
 
42
  if provider_name not in cls._providers:
43
+ # Check if this is a model name that should be mapped to a provider
44
+ mapped_provider = cls._map_model_to_provider(provider_name)
45
+ if mapped_provider:
46
+ logger.info(f"Mapped model '{provider_name}' to provider '{mapped_provider}'")
47
+ provider_name = mapped_provider
48
+ else:
49
+ logger.error(f"Unknown STT provider: {provider_name}. Available: {list(cls._providers.keys())}")
50
+ raise SpeechRecognitionException(f"Unknown STT provider: {provider_name}")
51
 
52
  provider_class = cls._providers[provider_name]
53
 
 
165
  cls._providers[name.lower()] = provider_class
166
  logger.info(f"Registered STT provider: {name}")
167
 
168
+ @classmethod
169
+ def _map_model_to_provider(cls, model_name: str) -> Optional[str]:
170
+ """
171
+ Map a specific model name to a provider name.
172
+
173
+ Args:
174
+ model_name: The model name to map
175
+
176
+ Returns:
177
+ Optional[str]: The provider name if mapping exists, None otherwise
178
+ """
179
+ # Define model-to-provider mappings
180
+ model_mapping = {
181
+ # Whisper model variants -> whisper provider
182
+ 'whisper-large': 'whisper',
183
+ 'whisper-large-v1': 'whisper',
184
+ 'whisper-large-v2': 'whisper',
185
+ 'whisper-large-v3': 'whisper',
186
+ 'whisper-medium': 'whisper',
187
+ 'whisper-medium.en': 'whisper',
188
+ 'whisper-small': 'whisper',
189
+ 'whisper-small.en': 'whisper',
190
+ 'whisper-base': 'whisper',
191
+ 'whisper-base.en': 'whisper',
192
+ 'whisper-tiny': 'whisper',
193
+ 'whisper-tiny.en': 'whisper',
194
+ # Legacy model names
195
+ 'faster-whisper': 'whisper',
196
+ 'openai-whisper': 'whisper',
197
+ }
198
+
199
+ # Try exact match first
200
+ if model_name.lower() in model_mapping:
201
+ return model_mapping[model_name.lower()]
202
+
203
+ # Try prefix matching (e.g., "whisper-large" matches "whisper")
204
+ for model_prefix, provider in model_mapping.items():
205
+ if model_name.lower().startswith(model_prefix.lower()):
206
+ logger.info(f"Prefix match: '{model_name}' -> '{provider}' (matched '{model_prefix}')")
207
+ return provider
208
+
209
+ return None
210
+
211
 
212
  # Legacy compatibility - create an ASRFactory alias
213
  class ASRFactory:
uv.lock CHANGED
@@ -87,6 +87,7 @@ dependencies = [
87
  { name = "nltk" },
88
  { name = "numpy" },
89
  { name = "ordered-set" },
 
90
  { name = "phonemizer-fork" },
91
  { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
92
  { name = "scipy", version = "1.16.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
@@ -121,6 +122,7 @@ requires-dist = [
121
  { name = "nltk", specifier = ">=3.8" },
122
  { name = "numpy", specifier = ">=1.26.0" },
123
  { name = "ordered-set", specifier = ">=4.1.0" },
 
124
  { name = "phonemizer-fork", specifier = ">=3.3.2" },
125
  { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" },
126
  { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" },
@@ -948,8 +950,7 @@ dependencies = [
948
  { name = "numpy" },
949
  { name = "orjson" },
950
  { name = "packaging" },
951
- { name = "pandas", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" },
952
- { name = "pandas", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" },
953
  { name = "pillow" },
954
  { name = "pydantic" },
955
  { name = "pydub" },
@@ -1861,53 +1862,15 @@ wheels = [
1861
  { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 },
1862
  ]
1863
 
1864
- [[package]]
1865
- name = "pandas"
1866
- version = "2.1.0"
1867
- source = { registry = "https://pypi.org/simple" }
1868
- resolution-markers = [
1869
- "python_full_version >= '3.13' and sys_platform == 'linux'",
1870
- "python_full_version >= '3.13' and sys_platform != 'linux'",
1871
- "python_full_version == '3.12.*' and sys_platform == 'linux'",
1872
- "python_full_version == '3.12.*' and sys_platform != 'linux'",
1873
- ]
1874
- dependencies = [
1875
- { name = "numpy", marker = "python_full_version >= '3.12'" },
1876
- { name = "python-dateutil", marker = "python_full_version >= '3.12'" },
1877
- { name = "pytz", marker = "python_full_version >= '3.12'" },
1878
- { name = "tzdata", marker = "python_full_version >= '3.12'" },
1879
- ]
1880
- sdist = { url = "https://files.pythonhosted.org/packages/6f/31/a4a8e7367856d9584d0332793edfe631182a9cca885f12dbe2dd77c10c4a/pandas-2.1.0.tar.gz", hash = "sha256:62c24c7fc59e42b775ce0679cfa7b14a5f9bfb7643cfbe708c960699e05fb918", size = 4263970 }
1881
- wheels = [
1882
- { url = "https://files.pythonhosted.org/packages/cf/ba/be69b6fa37c74699d333dbcbf0fc799eb31c35ce465651cdc4baf6a2e30d/pandas-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:40dd20439ff94f1b2ed55b393ecee9cb6f3b08104c2c40b0cb7186a2f0046242", size = 12118394 },
1883
- { url = "https://files.pythonhosted.org/packages/8d/08/1cf87814dcd87604807971abc743b12e635de36d820be7b50e2b6aa9e1b5/pandas-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d4f38e4fedeba580285eaac7ede4f686c6701a9e618d8a857b138a126d067f2f", size = 11306908 },
1884
- { url = "https://files.pythonhosted.org/packages/f3/21/8ea83d6990457c5253d9e6c40a3d2c8a3d383dfabb937b0a36a71ae43bde/pandas-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e6a0fe052cf27ceb29be9429428b4918f3740e37ff185658f40d8702f0b3e09", size = 15167237 },
1885
- { url = "https://files.pythonhosted.org/packages/fb/4f/4a4372b2e24439f559b73318683486831d75e59544ae02bf8dec8dd6f48b/pandas-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d81e1813191070440d4c7a413cb673052b3b4a984ffd86b8dd468c45742d3cc", size = 12662125 },
1886
- { url = "https://files.pythonhosted.org/packages/4c/a8/8ac4fa3970e64d7f62ebdcd47e507c2443d49090a3f402fa01f0e6e30b13/pandas-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eb20252720b1cc1b7d0b2879ffc7e0542dd568f24d7c4b2347cb035206936421", size = 13465263 },
1887
- { url = "https://files.pythonhosted.org/packages/c5/89/ce1c7dc497f9a20644f6a7d2dd5bce6378a48321955178197fa3b55d6fe3/pandas-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:38f74ef7ebc0ffb43b3d633e23d74882bce7e27bfa09607f3c5d3e03ffd9a4a5", size = 11097660 },
1888
- { url = "https://files.pythonhosted.org/packages/c3/05/c5c73d54ceb7d5e4b8c046d39a1bb7f38ee76ea556a002cf3317514f0196/pandas-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cda72cc8c4761c8f1d97b169661f23a86b16fdb240bdc341173aee17e4d6cedd", size = 12015015 },
1889
- { url = "https://files.pythonhosted.org/packages/e5/cd/c941b51e95992968e3e8abc7180f33b952478abd6943062051517a808db7/pandas-2.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d97daeac0db8c993420b10da4f5f5b39b01fc9ca689a17844e07c0a35ac96b4b", size = 11173830 },
1890
- { url = "https://files.pythonhosted.org/packages/e2/25/bfb5c7573e2b884b18e5ea993ee7aeb5a6915ea687174349fdc5f979ceec/pandas-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8c58b1113892e0c8078f006a167cc210a92bdae23322bb4614f2f0b7a4b510f", size = 15176284 },
1891
- { url = "https://files.pythonhosted.org/packages/d9/26/895a49ebddb4211f2d777150f38ef9e538deff6df7e179a3624c663efc98/pandas-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:629124923bcf798965b054a540f9ccdfd60f71361255c81fa1ecd94a904b9dd3", size = 12630840 },
1892
- { url = "https://files.pythonhosted.org/packages/bc/ad/d1f0a867064f62ffde917876cc09cfd53352af2b1f147c140fd1943a0c7a/pandas-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:70cf866af3ab346a10debba8ea78077cf3a8cd14bd5e4bed3d41555a3280041c", size = 13463414 },
1893
- { url = "https://files.pythonhosted.org/packages/b7/f8/32d6b5aa4c4bc045fa2c4c58f88c325facc54721956c6313f0afea8ea853/pandas-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:d53c8c1001f6a192ff1de1efe03b31a423d0eee2e9e855e69d004308e046e694", size = 11043589 },
1894
- ]
1895
-
1896
  [[package]]
1897
  name = "pandas"
1898
  version = "2.3.1"
1899
  source = { registry = "https://pypi.org/simple" }
1900
- resolution-markers = [
1901
- "python_full_version == '3.11.*' and sys_platform == 'linux'",
1902
- "python_full_version == '3.11.*' and sys_platform != 'linux'",
1903
- "python_full_version < '3.11' and sys_platform == 'linux'",
1904
- "python_full_version < '3.11' and sys_platform != 'linux'",
1905
- ]
1906
  dependencies = [
1907
- { name = "numpy", marker = "python_full_version < '3.12'" },
1908
- { name = "python-dateutil", marker = "python_full_version < '3.12'" },
1909
- { name = "pytz", marker = "python_full_version < '3.12'" },
1910
- { name = "tzdata", marker = "python_full_version < '3.12'" },
1911
  ]
1912
  sdist = { url = "https://files.pythonhosted.org/packages/d1/6f/75aa71f8a14267117adeeed5d21b204770189c0a0025acbdc03c337b28fc/pandas-2.3.1.tar.gz", hash = "sha256:0a95b9ac964fe83ce317827f80304d37388ea77616b1425f0ae41c9d2d0d7bb2", size = 4487493 }
1913
  wheels = [
 
87
  { name = "nltk" },
88
  { name = "numpy" },
89
  { name = "ordered-set" },
90
+ { name = "pandas" },
91
  { name = "phonemizer-fork" },
92
  { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
93
  { name = "scipy", version = "1.16.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
 
122
  { name = "nltk", specifier = ">=3.8" },
123
  { name = "numpy", specifier = ">=1.26.0" },
124
  { name = "ordered-set", specifier = ">=4.1.0" },
125
+ { name = "pandas", specifier = ">=2.2.0" },
126
  { name = "phonemizer-fork", specifier = ">=3.3.2" },
127
  { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" },
128
  { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" },
 
950
  { name = "numpy" },
951
  { name = "orjson" },
952
  { name = "packaging" },
953
+ { name = "pandas" },
 
954
  { name = "pillow" },
955
  { name = "pydantic" },
956
  { name = "pydub" },
 
1862
  { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 },
1863
  ]
1864
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1865
  [[package]]
1866
  name = "pandas"
1867
  version = "2.3.1"
1868
  source = { registry = "https://pypi.org/simple" }
 
 
 
 
 
 
1869
  dependencies = [
1870
+ { name = "numpy" },
1871
+ { name = "python-dateutil" },
1872
+ { name = "pytz" },
1873
+ { name = "tzdata" },
1874
  ]
1875
  sdist = { url = "https://files.pythonhosted.org/packages/d1/6f/75aa71f8a14267117adeeed5d21b204770189c0a0025acbdc03c337b28fc/pandas-2.3.1.tar.gz", hash = "sha256:0a95b9ac964fe83ce317827f80304d37388ea77616b1425f0ae41c9d2d0d7bb2", size = 4487493 }
1876
  wheels = [