Spaces:

Athspi-aitools
/

Aittsg

Running

App Files Files Community

Aittsg / app.py

Athspi

Update app.py

5eaef53 verified 20 days ago

raw

history blame

3.97 kB

	from fastapi import FastAPI, HTTPException
	from fastapi.responses import JSONResponse, StreamingResponse
	from google import genai
	from google.genai import types
	import wave
	import io
	import os
	from typing import Optional, List
	from pydantic import BaseModel
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	app = FastAPI(
	title="Google GenAI TTS API with Multiple API Keys",
	description="Text-to-Speech API using Google GenAI with multiple API keys fallback.",
	version="1.2.0",
	docs_url="/docs",
	redoc_url=None
	)

	# Pydantic model for request body
	class TTSRequest(BaseModel):
	text: str
	voice_name: Optional[str] = "Kore"
	cheerful: Optional[bool] = True
	sample_rate: Optional[int] = 24000
	channels: Optional[int] = 1
	sample_width: Optional[int] = 2

	def get_api_keys() -> List[str]:
	"""Retrieve list of API keys from environment variable"""
	api_keys = os.getenv("GEMINI_API_KEYS")
	if not api_keys:
	raise ValueError("No API keys found in GEMINI_API_KEYS environment variable.")
	return [key.strip() for key in api_keys.split(",") if key.strip()]

	def initialize_genai_client():
	"""Initialize the GenAI client by trying multiple API keys"""
	api_keys = get_api_keys()
	for key in api_keys:
	try:
	print(f"Trying API key: {key[:5]}...") # Only show part for safety
	client = genai.Client(api_key=key)
	return client
	except Exception as e:
	print(f"Failed with key {key[:5]}... : {e}")

	raise ValueError("No valid API key could initialize the GenAI client.")

	def generate_wave_bytes(pcm_data: bytes, channels: int, rate: int, sample_width: int) -> bytes:
	"""Convert PCM audio data into WAV bytes."""
	with io.BytesIO() as wav_buffer:
	with wave.open(wav_buffer, "wb") as wf:
	wf.setnchannels(channels)
	wf.setsampwidth(sample_width)
	wf.setframerate(rate)
	wf.writeframes(pcm_data)
	return wav_buffer.getvalue()

	@app.post("/api/generate-tts/")
	async def generate_tts(request: TTSRequest):
	"""
	Convert text to speech audio using Google GenAI.
	"""
	try:
	client = initialize_genai_client()

	text_to_speak = f"Say cheerfully: {request.text}" if request.cheerful else request.text

	response = client.models.generate_content(
	model="gemini-2.5-flash-preview-tts",
	contents=text_to_speak,
	config=types.GenerateContentConfig(
	response_modalities=["AUDIO"],
	speech_config=types.SpeechConfig(
	voice_config=types.VoiceConfig(
	prebuilt_voice_config=types.PrebuiltVoiceConfig(
	voice_name=request.voice_name,
	)
	)
	),
	)
	)

	if not response.candidates or not response.candidates[0].content.parts:
	raise HTTPException(status_code=500, detail="No audio data received from GenAI.")

	audio_data = response.candidates[0].content.parts[0].inline_data.data

	wav_bytes = generate_wave_bytes(
	audio_data,
	channels=request.channels,
	rate=request.sample_rate,
	sample_width=request.sample_width
	)

	return StreamingResponse(
	io.BytesIO(wav_bytes),
	media_type="audio/wav",
	headers={"Content-Disposition": "attachment; filename=generated_audio.wav"}
	)

	except Exception as e:
	return JSONResponse(
	{"status": "error", "message": str(e)},
	status_code=500
	)

	@app.get("/")
	async def root():
	return {"message": "Google GenAI TTS API is running"}

	@app.get("/health")
	async def health_check():
	return {"status": "healthy"}

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8080)