Spaces:
Running
Running
from fastapi import FastAPI, HTTPException | |
from fastapi.responses import JSONResponse, StreamingResponse | |
from google import genai | |
from google.genai import types | |
import wave | |
import io | |
import os | |
from typing import Optional, List | |
from pydantic import BaseModel | |
from dotenv import load_dotenv | |
# Load environment variables | |
load_dotenv() | |
app = FastAPI( | |
title="Google GenAI TTS API with Multiple API Keys", | |
description="Text-to-Speech API using Google GenAI with multiple API keys fallback.", | |
version="1.2.0", | |
docs_url="/docs", | |
redoc_url=None | |
) | |
# Pydantic model for request body | |
class TTSRequest(BaseModel): | |
text: str | |
voice_name: Optional[str] = "Kore" | |
cheerful: Optional[bool] = True | |
sample_rate: Optional[int] = 24000 | |
channels: Optional[int] = 1 | |
sample_width: Optional[int] = 2 | |
def get_api_keys() -> List[str]: | |
"""Retrieve list of API keys from environment variable""" | |
api_keys = os.getenv("GEMINI_API_KEYS") | |
if not api_keys: | |
raise ValueError("No API keys found in GEMINI_API_KEYS environment variable.") | |
return [key.strip() for key in api_keys.split(",") if key.strip()] | |
def initialize_genai_client(): | |
"""Initialize the GenAI client by trying multiple API keys""" | |
api_keys = get_api_keys() | |
for key in api_keys: | |
try: | |
print(f"Trying API key: {key[:5]}...") # Only show part for safety | |
client = genai.Client(api_key=key) | |
return client | |
except Exception as e: | |
print(f"Failed with key {key[:5]}... : {e}") | |
raise ValueError("No valid API key could initialize the GenAI client.") | |
def generate_wave_bytes(pcm_data: bytes, channels: int, rate: int, sample_width: int) -> bytes: | |
"""Convert PCM audio data into WAV bytes.""" | |
with io.BytesIO() as wav_buffer: | |
with wave.open(wav_buffer, "wb") as wf: | |
wf.setnchannels(channels) | |
wf.setsampwidth(sample_width) | |
wf.setframerate(rate) | |
wf.writeframes(pcm_data) | |
return wav_buffer.getvalue() | |
async def generate_tts(request: TTSRequest): | |
""" | |
Convert text to speech audio using Google GenAI. | |
""" | |
try: | |
client = initialize_genai_client() | |
text_to_speak = f"Say cheerfully: {request.text}" if request.cheerful else request.text | |
response = client.models.generate_content( | |
model="gemini-2.5-flash-preview-tts", | |
contents=text_to_speak, | |
config=types.GenerateContentConfig( | |
response_modalities=["AUDIO"], | |
speech_config=types.SpeechConfig( | |
voice_config=types.VoiceConfig( | |
prebuilt_voice_config=types.PrebuiltVoiceConfig( | |
voice_name=request.voice_name, | |
) | |
) | |
), | |
) | |
) | |
if not response.candidates or not response.candidates[0].content.parts: | |
raise HTTPException(status_code=500, detail="No audio data received from GenAI.") | |
audio_data = response.candidates[0].content.parts[0].inline_data.data | |
wav_bytes = generate_wave_bytes( | |
audio_data, | |
channels=request.channels, | |
rate=request.sample_rate, | |
sample_width=request.sample_width | |
) | |
return StreamingResponse( | |
io.BytesIO(wav_bytes), | |
media_type="audio/wav", | |
headers={"Content-Disposition": "attachment; filename=generated_audio.wav"} | |
) | |
except Exception as e: | |
return JSONResponse( | |
{"status": "error", "message": str(e)}, | |
status_code=500 | |
) | |
async def root(): | |
return {"message": "Google GenAI TTS API is running"} | |
async def health_check(): | |
return {"status": "healthy"} | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=8080) | |