File size: 1,080 Bytes
4a54590
de5b9f0
6acc004
4a54590
 
a09df36
4a54590
a09df36
4a54590
a09df36
4a54590
de5b9f0
a09df36
6acc004
a09df36
6acc004
4a54590
 
 
 
 
 
 
a09df36
de5b9f0
 
 
 
 
 
 
a09df36
de5b9f0
a09df36
de5b9f0
 
a09df36
de5b9f0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from fastapi import FastAPI, Form
from fastapi.responses import FileResponse
import google.generativeai as genai
import wave
import os

app = FastAPI()

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

if not GOOGLE_API_KEY:
    raise ValueError("Set GOOGLE_API_KEY environment variable.")

genai.configure(api_key=GOOGLE_API_KEY)

model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")

def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
    with wave.open(filename, "wb") as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(sample_width)
        wf.setframerate(rate)
        wf.writeframes(pcm)

@app.post("/tts")
def tts(text: str = Form(...)):
    response = model.generate_content(
        text,
        generation_config={"response_mime_type": "audio/wav"},
        response_modality="AUDIO"
    )

    audio_data = response.candidates[0].content.parts[0].inline_data.data

    output_path = "out.wav"
    wave_file(output_path, audio_data)

    return FileResponse(output_path, media_type="audio/wav", filename="output.wav")