Aittsg / app.py
Athspi's picture
Update app.py
a09df36 verified
raw
history blame
2.95 kB
import os
import wave
from fastapi import FastAPI, Response, HTTPException
from pydantic import BaseModel
import google.generativeai as genai
from google.generativeai import types
# --- Configuration and API Key ---
# It is recommended to set your Google API key as a secret in your Hugging Face Space settings.
# The key for the secret should be 'GOOGLE_API_KEY'.
try:
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
raise ValueError("Google API key not found. Please set it in your Hugging Face Space secrets.")
genai.configure(api_key=GOOGLE_API_KEY)
except Exception as e:
# This will help in debugging if the key is not set.
print(f"Error during API key configuration: {e}")
# --- Pydantic Model for Request Body ---
class TextToSpeechRequest(BaseModel):
text: str = "Say cheerfully: Have a wonderful day!"
voice_name: str = "Kore"
output_filename: str = "output.wav"
# --- FastAPI App Initialization ---
app = FastAPI()
@app.get("/")
def read_root():
return {"message": "Welcome to the Text-to-Speech API using Gemini. Use the /generate-audio/ endpoint to create audio."}
@app.post("/generate-audio/")
async def generate_audio(request: TextToSpeechRequest):
"""
This endpoint generates audio from the provided text using Google's Gemini model.
"""
try:
# --- Text-to-Speech Generation ---
response = genai.generate_text(
model="gemini-2.5-flash-preview-tts",
prompt=request.text,
options=types.GenerationOptions(
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name=request.voice_name,
)
)
),
)
)
# --- Extract Audio Data ---
if not response.candidates or not response.candidates[0].content.parts or not response.candidates[0].content.parts[0].inline_data.data:
raise HTTPException(status_code=500, detail="Audio data could not be generated.")
audio_data = response.candidates[0].content.parts[0].inline_data.data
# --- Save to a WAV file in memory ---
import io
buffer = io.BytesIO()
with wave.open(buffer, "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(24000)
wf.writeframes(audio_data)
buffer.seek(0)
# --- Return Audio File as Response ---
return Response(content=buffer.getvalue(), media_type="audio/wav", headers={"Content-Disposition": f"attachment; filename={request.output_filename}"})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# To run this locally, use the command: uvicorn app:app --reload