Spaces:

Athspi-aitools
/

Aittsg

Running

App Files Files Community

Aittsg / app.py

Athspi

Update app.py

a09df36 verified 3 months ago

raw

history blame

2.95 kB

	import os
	import wave
	from fastapi import FastAPI, Response, HTTPException
	from pydantic import BaseModel
	import google.generativeai as genai
	from google.generativeai import types

	# --- Configuration and API Key ---
	# It is recommended to set your Google API key as a secret in your Hugging Face Space settings.
	# The key for the secret should be 'GOOGLE_API_KEY'.
	try:
	GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
	if not GOOGLE_API_KEY:
	raise ValueError("Google API key not found. Please set it in your Hugging Face Space secrets.")
	genai.configure(api_key=GOOGLE_API_KEY)
	except Exception as e:
	# This will help in debugging if the key is not set.
	print(f"Error during API key configuration: {e}")


	# --- Pydantic Model for Request Body ---
	class TextToSpeechRequest(BaseModel):
	text: str = "Say cheerfully: Have a wonderful day!"
	voice_name: str = "Kore"
	output_filename: str = "output.wav"


	# --- FastAPI App Initialization ---
	app = FastAPI()


	@app.get("/")
	def read_root():
	return {"message": "Welcome to the Text-to-Speech API using Gemini. Use the /generate-audio/ endpoint to create audio."}


	@app.post("/generate-audio/")
	async def generate_audio(request: TextToSpeechRequest):
	"""
	This endpoint generates audio from the provided text using Google's Gemini model.
	"""
	try:
	# --- Text-to-Speech Generation ---
	response = genai.generate_text(
	model="gemini-2.5-flash-preview-tts",
	prompt=request.text,
	options=types.GenerationOptions(
	response_modalities=["AUDIO"],
	speech_config=types.SpeechConfig(
	voice_config=types.VoiceConfig(
	prebuilt_voice_config=types.PrebuiltVoiceConfig(
	voice_name=request.voice_name,
	)
	)
	),
	)
	)

	# --- Extract Audio Data ---
	if not response.candidates or not response.candidates[0].content.parts or not response.candidates[0].content.parts[0].inline_data.data:
	raise HTTPException(status_code=500, detail="Audio data could not be generated.")

	audio_data = response.candidates[0].content.parts[0].inline_data.data

	# --- Save to a WAV file in memory ---
	import io
	buffer = io.BytesIO()
	with wave.open(buffer, "wb") as wf:
	wf.setnchannels(1)
	wf.setsampwidth(2)
	wf.setframerate(24000)
	wf.writeframes(audio_data)
	buffer.seek(0)

	# --- Return Audio File as Response ---
	return Response(content=buffer.getvalue(), media_type="audio/wav", headers={"Content-Disposition": f"attachment; filename={request.output_filename}"})

	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	# To run this locally, use the command: uvicorn app:app --reload