File size: 5,389 Bytes
bea4a17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167

"""Audio API endpoints for Flare
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Provides text-to-speech (TTS) and speech-to-text (STT) endpoints.
"""

from fastapi import APIRouter, HTTPException, Response, Body
from pydantic import BaseModel
from typing import Optional
from datetime import datetime
import sys

from logger import log_info, log_error, log_warning, log_debug
from tts_factory import TTSFactory
from tts_preprocessor import TTSPreprocessor
from config_provider import ConfigProvider

router = APIRouter(tags=["audio"])

# ===================== Models =====================
class TTSRequest(BaseModel):
    text: str
    voice_id: Optional[str] = None
    language: Optional[str] = "tr-TR"

class STTRequest(BaseModel):
    audio_data: str  # Base64 encoded audio
    language: Optional[str] = "tr-TR"
    format: Optional[str] = "webm"  # webm, wav, mp3

# ===================== Helpers =====================
def log(message: str):
    """Log helper with timestamp"""
    timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
    print(f"[{timestamp}] {message}")
    sys.stdout.flush()

# ===================== TTS Endpoints =====================
@router.post("/tts/generate")
async def generate_tts(request: TTSRequest):
    """Generate TTS audio from text - public endpoint for chat"""
    try:
        # Create TTS provider
        tts_provider = TTSFactory.create_provider()
        
        if not tts_provider:
            # Return empty response for no TTS
            log_info("πŸ“΅ TTS disabled - returning empty response")
            return Response(
                content=b"",
                media_type="audio/mpeg",
                headers={"X-TTS-Status": "disabled"}
            )
        
        log_info(f"🎀 TTS request: '{request.text[:50]}...' with provider: {tts_provider.get_provider_name()}")
        
        # Preprocess text if needed
        preprocessor = TTSPreprocessor(language=request.language)
        processed_text = preprocessor.preprocess(
            request.text,
            tts_provider.get_preprocessing_flags()
        )
        
        log_debug(f"πŸ“ Preprocessed text: {processed_text[:100]}...")
        
        # Generate audio
        audio_data = await tts_provider.synthesize(
            text=processed_text,
            voice_id=request.voice_id
        )
        
        log_info(f"βœ… TTS generated {len(audio_data)} bytes of audio")
        
        # Return audio as binary response
        return Response(
            content=audio_data,
            media_type="audio/mpeg",
            headers={
                "Content-Disposition": 'inline; filename="tts_output.mp3"',
                "X-TTS-Provider": tts_provider.get_provider_name(),
                "X-TTS-Language": request.language,
                "Cache-Control": "no-cache"
            }
        )
        
    except Exception as e:
        log_error("❌ TTS generation error", e)
        raise HTTPException(
            status_code=500, 
            detail=f"TTS generation failed: {str(e)}"
        )

@router.get("/tts/voices")
async def get_tts_voices():
    """Get available TTS voices - public endpoint"""
    try:
        tts_provider = TTSFactory.create_provider()
        
        if not tts_provider:
            return {
                "voices": [],
                "provider": "none",
                "enabled": False
            }
        
        voices = tts_provider.get_supported_voices()
        
        # Convert dict to list format
        voice_list = [
            {"id": voice_id, "name": voice_name}
            for voice_id, voice_name in voices.items()
        ]
        
        return {
            "voices": voice_list,
            "provider": tts_provider.get_provider_name(),
            "enabled": True
        }
        
    except Exception as e:
        log_error("❌ Error getting TTS voices", e)
        return {
            "voices": [],
            "provider": "error",
            "enabled": False,
            "error": str(e)
        }

@router.get("/tts/status")
async def get_tts_status():
    """Get TTS service status"""
    cfg = ConfigProvider.get()
    
    return {
        "enabled": cfg.global_config.tts_provider.name != "no_tts",
        "provider": cfg.global_config.tts_provider.name,
        "provider_config": {
            "name": cfg.global_config.tts_provider.name,
            "has_api_key": bool(cfg.global_config.tts_provider.api_key),
            "endpoint": cfg.global_config.tts_provider.endpoint
        }
    }

# ===================== STT Endpoints (Future) =====================
@router.post("/stt/transcribe")
async def transcribe_audio(request: STTRequest):
    """Transcribe audio to text - to be implemented"""
    # TODO: Implement when STT factory is ready
    raise HTTPException(
        status_code=501,
        detail="STT transcription not yet implemented"
    )

@router.get("/stt/status")
async def get_stt_status():
    """Get STT service status"""
    cfg = ConfigProvider.get()
    
    return {
        "enabled": cfg.global_config.stt_provider.name != "no_stt",
        "provider": cfg.global_config.stt_provider.name,
        "provider_config": {
            "name": cfg.global_config.stt_provider.name,
            "has_api_key": bool(cfg.global_config.stt_provider.api_key),
            "endpoint": cfg.global_config.stt_provider.endpoint
        }
    }