Spaces:
Running
Running
import os | |
import wave | |
from fastapi import FastAPI, Response, HTTPException | |
from pydantic import BaseModel | |
import google.generativeai as genai | |
from google.generativeai import types | |
# --- Configuration and API Key --- | |
# It is recommended to set your Google API key as a secret in your Hugging Face Space settings. | |
# The key for the secret should be 'GOOGLE_API_KEY'. | |
try: | |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") | |
if not GOOGLE_API_KEY: | |
raise ValueError("Google API key not found. Please set it in your Hugging Face Space secrets.") | |
genai.configure(api_key=GOOGLE_API_KEY) | |
except Exception as e: | |
# This will help in debugging if the key is not set. | |
print(f"Error during API key configuration: {e}") | |
# --- Pydantic Model for Request Body --- | |
class TextToSpeechRequest(BaseModel): | |
text: str = "Say cheerfully: Have a wonderful day!" | |
voice_name: str = "Kore" | |
output_filename: str = "output.wav" | |
# --- FastAPI App Initialization --- | |
app = FastAPI() | |
def read_root(): | |
return {"message": "Welcome to the Text-to-Speech API using Gemini. Use the /generate-audio/ endpoint to create audio."} | |
async def generate_audio(request: TextToSpeechRequest): | |
""" | |
This endpoint generates audio from the provided text using Google's Gemini model. | |
""" | |
try: | |
# --- Text-to-Speech Generation --- | |
response = genai.generate_text( | |
model="gemini-2.5-flash-preview-tts", | |
prompt=request.text, | |
options=types.GenerationOptions( | |
response_modalities=["AUDIO"], | |
speech_config=types.SpeechConfig( | |
voice_config=types.VoiceConfig( | |
prebuilt_voice_config=types.PrebuiltVoiceConfig( | |
voice_name=request.voice_name, | |
) | |
) | |
), | |
) | |
) | |
# --- Extract Audio Data --- | |
if not response.candidates or not response.candidates[0].content.parts or not response.candidates[0].content.parts[0].inline_data.data: | |
raise HTTPException(status_code=500, detail="Audio data could not be generated.") | |
audio_data = response.candidates[0].content.parts[0].inline_data.data | |
# --- Save to a WAV file in memory --- | |
import io | |
buffer = io.BytesIO() | |
with wave.open(buffer, "wb") as wf: | |
wf.setnchannels(1) | |
wf.setsampwidth(2) | |
wf.setframerate(24000) | |
wf.writeframes(audio_data) | |
buffer.seek(0) | |
# --- Return Audio File as Response --- | |
return Response(content=buffer.getvalue(), media_type="audio/wav", headers={"Content-Disposition": f"attachment; filename={request.output_filename}"}) | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
# To run this locally, use the command: uvicorn app:app --reload |