Spaces:
				
			
			
	
			
			
		Paused
		
	
	
	
			
			
	
	
	
	
		
		
		Paused
		
	| """Audio API endpoints for Flare | |
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
| Provides text-to-speech (TTS) and speech-to-text (STT) endpoints. | |
| """ | |
| from fastapi import APIRouter, HTTPException, Response, Body | |
| from pydantic import BaseModel | |
| from typing import Optional | |
| from datetime import datetime | |
| import sys | |
| from logger import log_info, log_error, log_warning, log_debug | |
| from tts_factory import TTSFactory | |
| from tts_preprocessor import TTSPreprocessor | |
| from config_provider import ConfigProvider | |
| router = APIRouter(tags=["audio"]) | |
| # ===================== Models ===================== | |
| class TTSRequest(BaseModel): | |
| text: str | |
| voice_id: Optional[str] = None | |
| language: Optional[str] = "tr-TR" | |
| class STTRequest(BaseModel): | |
| audio_data: str # Base64 encoded audio | |
| language: Optional[str] = "tr-TR" | |
| format: Optional[str] = "webm" # webm, wav, mp3 | |
| # ===================== Helpers ===================== | |
| def log(message: str): | |
| """Log helper with timestamp""" | |
| timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3] | |
| print(f"[{timestamp}] {message}") | |
| sys.stdout.flush() | |
| # ===================== TTS Endpoints ===================== | |
| async def generate_tts(request: TTSRequest): | |
| """Generate TTS audio from text - public endpoint for chat""" | |
| try: | |
| # Create TTS provider | |
| tts_provider = TTSFactory.create_provider() | |
| if not tts_provider: | |
| # Return empty response for no TTS | |
| log_info("π΅ TTS disabled - returning empty response") | |
| return Response( | |
| content=b"", | |
| media_type="audio/mpeg", | |
| headers={"X-TTS-Status": "disabled"} | |
| ) | |
| log_info(f"π€ TTS request: '{request.text[:50]}...' with provider: {tts_provider.get_provider_name()}") | |
| # Preprocess text if needed | |
| preprocessor = TTSPreprocessor(language=request.language) | |
| processed_text = preprocessor.preprocess( | |
| request.text, | |
| tts_provider.get_preprocessing_flags() | |
| ) | |
| log_debug(f"π Preprocessed text: {processed_text[:100]}...") | |
| # Generate audio | |
| audio_data = await tts_provider.synthesize( | |
| text=processed_text, | |
| voice_id=request.voice_id | |
| ) | |
| log_info(f"β TTS generated {len(audio_data)} bytes of audio") | |
| # Return audio as binary response | |
| return Response( | |
| content=audio_data, | |
| media_type="audio/mpeg", | |
| headers={ | |
| "Content-Disposition": 'inline; filename="tts_output.mp3"', | |
| "X-TTS-Provider": tts_provider.get_provider_name(), | |
| "X-TTS-Language": request.language, | |
| "Cache-Control": "no-cache" | |
| } | |
| ) | |
| except Exception as e: | |
| log_error("β TTS generation error", e) | |
| raise HTTPException( | |
| status_code=500, | |
| detail=f"TTS generation failed: {str(e)}" | |
| ) | |
| async def get_tts_voices(): | |
| """Get available TTS voices - public endpoint""" | |
| try: | |
| tts_provider = TTSFactory.create_provider() | |
| if not tts_provider: | |
| return { | |
| "voices": [], | |
| "provider": "none", | |
| "enabled": False | |
| } | |
| voices = tts_provider.get_supported_voices() | |
| # Convert dict to list format | |
| voice_list = [ | |
| {"id": voice_id, "name": voice_name} | |
| for voice_id, voice_name in voices.items() | |
| ] | |
| return { | |
| "voices": voice_list, | |
| "provider": tts_provider.get_provider_name(), | |
| "enabled": True | |
| } | |
| except Exception as e: | |
| log_error("β Error getting TTS voices", e) | |
| return { | |
| "voices": [], | |
| "provider": "error", | |
| "enabled": False, | |
| "error": str(e) | |
| } | |
| async def get_tts_status(): | |
| """Get TTS service status""" | |
| cfg = ConfigProvider.get() | |
| return { | |
| "enabled": cfg.global_config.tts_provider.name != "no_tts", | |
| "provider": cfg.global_config.tts_provider.name, | |
| "provider_config": { | |
| "name": cfg.global_config.tts_provider.name, | |
| "has_api_key": bool(cfg.global_config.tts_provider.api_key), | |
| "endpoint": cfg.global_config.tts_provider.endpoint | |
| } | |
| } | |
| # ===================== STT Endpoints (Future) ===================== | |
| async def transcribe_audio(request: STTRequest): | |
| """Transcribe audio to text - to be implemented""" | |
| # TODO: Implement when STT factory is ready | |
| raise HTTPException( | |
| status_code=501, | |
| detail="STT transcription not yet implemented" | |
| ) | |
| async def get_stt_status(): | |
| """Get STT service status""" | |
| cfg = ConfigProvider.get() | |
| return { | |
| "enabled": cfg.global_config.stt_provider.name != "no_stt", | |
| "provider": cfg.global_config.stt_provider.name, | |
| "provider_config": { | |
| "name": cfg.global_config.stt_provider.name, | |
| "has_api_key": bool(cfg.global_config.stt_provider.api_key), | |
| "endpoint": cfg.global_config.stt_provider.endpoint | |
| } | |
| } | 

