Spaces:

Athspi-aitools
/

Aittsg

Running

File size: 1,080 Bytes

4a54590
de5b9f0
6acc004
4a54590
 
a09df36
4a54590
a09df36
4a54590
a09df36
4a54590
de5b9f0
a09df36
6acc004
a09df36
6acc004
4a54590
 
 
 
 
 
 
a09df36
de5b9f0
 
 
 
 
 
 
a09df36
de5b9f0
a09df36
de5b9f0
 
a09df36
de5b9f0

from fastapi import FastAPI, Form
from fastapi.responses import FileResponse
import google.generativeai as genai
import wave
import os

app = FastAPI()

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

if not GOOGLE_API_KEY:
    raise ValueError("Set GOOGLE_API_KEY environment variable.")

genai.configure(api_key=GOOGLE_API_KEY)

model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")

def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
    with wave.open(filename, "wb") as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(sample_width)
        wf.setframerate(rate)
        wf.writeframes(pcm)

@app.post("/tts")
def tts(text: str = Form(...)):
    response = model.generate_content(
        text,
        generation_config={"response_mime_type": "audio/wav"},
        response_modality="AUDIO"
    )

    audio_data = response.candidates[0].content.parts[0].inline_data.data

    output_path = "out.wav"
    wave_file(output_path, audio_data)

    return FileResponse(output_path, media_type="audio/wav", filename="output.wav")