Spaces:
Paused
Paused
Update audio_routes.py
Browse files- audio_routes.py +103 -7
audio_routes.py
CHANGED
|
@@ -141,15 +141,111 @@ async def get_tts_status():
|
|
| 141 |
}
|
| 142 |
}
|
| 143 |
|
| 144 |
-
# ===================== STT Endpoints
|
| 145 |
@router.post("/stt/transcribe")
|
| 146 |
async def transcribe_audio(request: STTRequest):
|
| 147 |
-
"""Transcribe audio to text
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
@router.get("/stt/status")
|
| 155 |
async def get_stt_status():
|
|
|
|
| 141 |
}
|
| 142 |
}
|
| 143 |
|
| 144 |
+
# ===================== STT Endpoints =====================
|
| 145 |
@router.post("/stt/transcribe")
|
| 146 |
async def transcribe_audio(request: STTRequest):
|
| 147 |
+
"""Transcribe audio to text"""
|
| 148 |
+
try:
|
| 149 |
+
from stt_factory import STTFactory
|
| 150 |
+
from stt_interface import STTConfig
|
| 151 |
+
import base64
|
| 152 |
+
|
| 153 |
+
# Create STT provider
|
| 154 |
+
stt_provider = STTFactory.create_provider()
|
| 155 |
+
|
| 156 |
+
if not stt_provider or not stt_provider.supports_realtime():
|
| 157 |
+
log_warning("π΅ STT disabled or doesn't support transcription")
|
| 158 |
+
raise HTTPException(
|
| 159 |
+
status_code=503,
|
| 160 |
+
detail="STT service not available"
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
# Get config
|
| 164 |
+
cfg = ConfigProvider.get()
|
| 165 |
+
stt_config = cfg.global_config.stt_provider.settings
|
| 166 |
+
|
| 167 |
+
# Decode audio data
|
| 168 |
+
audio_bytes = base64.b64decode(request.audio_data)
|
| 169 |
+
|
| 170 |
+
# Create STT config
|
| 171 |
+
config = STTConfig(
|
| 172 |
+
language=request.language or stt_config.get("language", "tr-TR"),
|
| 173 |
+
sample_rate=16000,
|
| 174 |
+
encoding=request.format.upper() if request.format else "WEBM_OPUS",
|
| 175 |
+
enable_punctuation=stt_config.get("enable_punctuation", True),
|
| 176 |
+
enable_word_timestamps=False,
|
| 177 |
+
model=stt_config.get("model", "latest_long"),
|
| 178 |
+
use_enhanced=stt_config.get("use_enhanced", True),
|
| 179 |
+
single_utterance=True,
|
| 180 |
+
interim_results=False
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
# Start streaming session
|
| 184 |
+
await stt_provider.start_streaming(config)
|
| 185 |
+
|
| 186 |
+
# Process audio
|
| 187 |
+
transcription = ""
|
| 188 |
+
confidence = 0.0
|
| 189 |
+
|
| 190 |
+
try:
|
| 191 |
+
async for result in stt_provider.stream_audio(audio_bytes):
|
| 192 |
+
if result.is_final:
|
| 193 |
+
transcription = result.text
|
| 194 |
+
confidence = result.confidence
|
| 195 |
+
break
|
| 196 |
+
finally:
|
| 197 |
+
# Stop streaming
|
| 198 |
+
await stt_provider.stop_streaming()
|
| 199 |
+
|
| 200 |
+
log_info(f"β
STT transcription completed: '{transcription[:50]}...'")
|
| 201 |
+
|
| 202 |
+
return {
|
| 203 |
+
"text": transcription,
|
| 204 |
+
"confidence": confidence,
|
| 205 |
+
"language": request.language,
|
| 206 |
+
"provider": stt_provider.get_provider_name()
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
except HTTPException:
|
| 210 |
+
raise
|
| 211 |
+
except Exception as e:
|
| 212 |
+
log_error("β STT transcription error", e)
|
| 213 |
+
raise HTTPException(
|
| 214 |
+
status_code=500,
|
| 215 |
+
detail=f"Transcription failed: {str(e)}"
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
@router.get("/stt/languages")
|
| 219 |
+
async def get_stt_languages():
|
| 220 |
+
"""Get supported STT languages"""
|
| 221 |
+
try:
|
| 222 |
+
from stt_factory import STTFactory
|
| 223 |
+
|
| 224 |
+
stt_provider = STTFactory.create_provider()
|
| 225 |
+
|
| 226 |
+
if not stt_provider:
|
| 227 |
+
return {
|
| 228 |
+
"languages": [],
|
| 229 |
+
"provider": "none",
|
| 230 |
+
"enabled": False
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
languages = stt_provider.get_supported_languages()
|
| 234 |
+
|
| 235 |
+
return {
|
| 236 |
+
"languages": languages,
|
| 237 |
+
"provider": stt_provider.get_provider_name(),
|
| 238 |
+
"enabled": True
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
except Exception as e:
|
| 242 |
+
log_error("β Error getting STT languages", e)
|
| 243 |
+
return {
|
| 244 |
+
"languages": [],
|
| 245 |
+
"provider": "error",
|
| 246 |
+
"enabled": False,
|
| 247 |
+
"error": str(e)
|
| 248 |
+
}
|
| 249 |
|
| 250 |
@router.get("/stt/status")
|
| 251 |
async def get_stt_status():
|