File size: 3,966 Bytes
5eaef53
7d3c0d1
405a073
 
5011794
7d3c0d1
515f8f3
5eaef53
405a073
7d3c0d1
 
 
 
515f8f3
 
5eaef53
 
 
515f8f3
 
 
 
5eaef53
405a073
 
515f8f3
 
 
 
 
405a073
5eaef53
 
 
 
 
 
 
515f8f3
5eaef53
 
 
 
 
 
 
 
 
 
 
515f8f3
7d3c0d1
5eaef53
7d3c0d1
 
 
 
 
 
 
515f8f3
 
 
 
5eaef53
515f8f3
 
 
5eaef53
515f8f3
5eaef53
515f8f3
 
 
 
 
 
 
 
 
405a073
515f8f3
405a073
 
515f8f3
5eaef53
515f8f3
5eaef53
 
515f8f3
5eaef53
7d3c0d1
515f8f3
 
 
 
 
5eaef53
7d3c0d1
 
 
5eaef53
7d3c0d1
5eaef53
515f8f3
 
 
 
405a073
7b02fdc
515f8f3
 
 
7b02fdc
7d3c0d1
 
 
7b02fdc
 
515f8f3
5eaef53
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse, StreamingResponse
from google import genai
from google.genai import types
import wave
import io
import os
from typing import Optional, List
from pydantic import BaseModel
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

app = FastAPI(
    title="Google GenAI TTS API with Multiple API Keys",
    description="Text-to-Speech API using Google GenAI with multiple API keys fallback.",
    version="1.2.0",
    docs_url="/docs",
    redoc_url=None
)

# Pydantic model for request body
class TTSRequest(BaseModel):
    text: str
    voice_name: Optional[str] = "Kore"
    cheerful: Optional[bool] = True
    sample_rate: Optional[int] = 24000
    channels: Optional[int] = 1
    sample_width: Optional[int] = 2

def get_api_keys() -> List[str]:
    """Retrieve list of API keys from environment variable"""
    api_keys = os.getenv("GEMINI_API_KEYS")
    if not api_keys:
        raise ValueError("No API keys found in GEMINI_API_KEYS environment variable.")
    return [key.strip() for key in api_keys.split(",") if key.strip()]

def initialize_genai_client():
    """Initialize the GenAI client by trying multiple API keys"""
    api_keys = get_api_keys()
    for key in api_keys:
        try:
            print(f"Trying API key: {key[:5]}...")  # Only show part for safety
            client = genai.Client(api_key=key)
            return client
        except Exception as e:
            print(f"Failed with key {key[:5]}... : {e}")

    raise ValueError("No valid API key could initialize the GenAI client.")

def generate_wave_bytes(pcm_data: bytes, channels: int, rate: int, sample_width: int) -> bytes:
    """Convert PCM audio data into WAV bytes."""
    with io.BytesIO() as wav_buffer:
        with wave.open(wav_buffer, "wb") as wf:
            wf.setnchannels(channels)
            wf.setsampwidth(sample_width)
            wf.setframerate(rate)
            wf.writeframes(pcm_data)
        return wav_buffer.getvalue()

@app.post("/api/generate-tts/")
async def generate_tts(request: TTSRequest):
    """
    Convert text to speech audio using Google GenAI.
    """
    try:
        client = initialize_genai_client()

        text_to_speak = f"Say cheerfully: {request.text}" if request.cheerful else request.text

        response = client.models.generate_content(
            model="gemini-2.5-flash-preview-tts",
            contents=text_to_speak,
            config=types.GenerateContentConfig(
                response_modalities=["AUDIO"],
                speech_config=types.SpeechConfig(
                    voice_config=types.VoiceConfig(
                        prebuilt_voice_config=types.PrebuiltVoiceConfig(
                            voice_name=request.voice_name,
                        )
                    )
                ),
            )
        )

        if not response.candidates or not response.candidates[0].content.parts:
            raise HTTPException(status_code=500, detail="No audio data received from GenAI.")

        audio_data = response.candidates[0].content.parts[0].inline_data.data

        wav_bytes = generate_wave_bytes(
            audio_data,
            channels=request.channels,
            rate=request.sample_rate,
            sample_width=request.sample_width
        )

        return StreamingResponse(
            io.BytesIO(wav_bytes),
            media_type="audio/wav",
            headers={"Content-Disposition": "attachment; filename=generated_audio.wav"}
        )

    except Exception as e:
        return JSONResponse(
            {"status": "error", "message": str(e)},
            status_code=500
        )

@app.get("/")
async def root():
    return {"message": "Google GenAI TTS API is running"}

@app.get("/health")
async def health_check():
    return {"status": "healthy"}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8080)