Spaces:
Running
Running
File size: 2,952 Bytes
a09df36 94de3c6 a09df36 94de3c6 a09df36 6dde081 a09df36 22004d7 a09df36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import os
import wave
from fastapi import FastAPI, Response, HTTPException
from pydantic import BaseModel
import google.generativeai as genai
from google.generativeai import types
# --- Configuration and API Key ---
# It is recommended to set your Google API key as a secret in your Hugging Face Space settings.
# The key for the secret should be 'GOOGLE_API_KEY'.
try:
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
raise ValueError("Google API key not found. Please set it in your Hugging Face Space secrets.")
genai.configure(api_key=GOOGLE_API_KEY)
except Exception as e:
# This will help in debugging if the key is not set.
print(f"Error during API key configuration: {e}")
# --- Pydantic Model for Request Body ---
class TextToSpeechRequest(BaseModel):
text: str = "Say cheerfully: Have a wonderful day!"
voice_name: str = "Kore"
output_filename: str = "output.wav"
# --- FastAPI App Initialization ---
app = FastAPI()
@app.get("/")
def read_root():
return {"message": "Welcome to the Text-to-Speech API using Gemini. Use the /generate-audio/ endpoint to create audio."}
@app.post("/generate-audio/")
async def generate_audio(request: TextToSpeechRequest):
"""
This endpoint generates audio from the provided text using Google's Gemini model.
"""
try:
# --- Text-to-Speech Generation ---
response = genai.generate_text(
model="gemini-2.5-flash-preview-tts",
prompt=request.text,
options=types.GenerationOptions(
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name=request.voice_name,
)
)
),
)
)
# --- Extract Audio Data ---
if not response.candidates or not response.candidates[0].content.parts or not response.candidates[0].content.parts[0].inline_data.data:
raise HTTPException(status_code=500, detail="Audio data could not be generated.")
audio_data = response.candidates[0].content.parts[0].inline_data.data
# --- Save to a WAV file in memory ---
import io
buffer = io.BytesIO()
with wave.open(buffer, "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(24000)
wf.writeframes(audio_data)
buffer.seek(0)
# --- Return Audio File as Response ---
return Response(content=buffer.getvalue(), media_type="audio/wav", headers={"Content-Disposition": f"attachment; filename={request.output_filename}"})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# To run this locally, use the command: uvicorn app:app --reload |