Spaces:
Running
Running
import os | |
import requests | |
import wave | |
import base64 | |
from fastapi import FastAPI, Form | |
from fastapi.responses import FileResponse, JSONResponse | |
from dotenv import load_dotenv | |
# Load API key | |
load_dotenv() | |
API_KEY = os.getenv("GEMINI_API_KEY") | |
if not API_KEY: | |
raise ValueError("Missing GEMINI_API_KEY in .env") | |
# REST endpoint with API key as query param | |
BASE_URL = ( | |
"https://generativelanguage.googleapis.com/" | |
"v1beta/models/gemini-2.5-flash-preview-tts:" | |
"generateContent" | |
f"?key={API_KEY}" | |
) | |
app = FastAPI(title="Gemini TTS Space") | |
def save_wav(path: str, pcm: bytes, channels=1, rate=24000, width=2): | |
with wave.open(path, "wb") as wf: | |
wf.setnchannels(channels) | |
wf.setsampwidth(width) | |
wf.setframerate(rate) | |
wf.writeframes(pcm) | |
def health(): | |
return {"status": "Gemini TTS Space is live!"} | |
def single_tts(prompt: str = Form(...), voice_name: str = Form(...)): | |
payload = { | |
"model": "gemini-2.5-flash-preview-tts", | |
"contents": [{"parts": [{"text": prompt}]}], | |
"generationConfig": { | |
"responseMimeType": "audio/wav", | |
"responseModalities": ["AUDIO"], | |
"speechConfig": { | |
"voiceConfig": { | |
"prebuiltVoiceConfig": {"voiceName": voice_name} | |
} | |
} | |
} | |
} | |
resp = requests.post(BASE_URL, json=payload) | |
if resp.status_code != 200: | |
return JSONResponse(status_code=resp.status_code, content=resp.json()) | |
data_b64 = resp.json()["candidates"][0]["content"]["parts"][0]["inlineData"]["data"] | |
pcm = base64.b64decode(data_b64) | |
out_path = "single_output.wav" | |
save_wav(out_path, pcm) | |
return FileResponse(out_path, media_type="audio/wav", filename=out_path) | |
def multi_tts( | |
prompt: str = Form(...), | |
speaker1: str = Form(...), voice1: str = Form(...), | |
speaker2: str = Form(...), voice2: str = Form(...) | |
): | |
payload = { | |
"model": "gemini-2.5-flash-preview-tts", | |
"contents": [{"parts": [{"text": prompt}]}], | |
"generationConfig": { | |
"responseMimeType": "audio/wav", | |
"responseModalities": ["AUDIO"], | |
"speechConfig": { | |
"multiSpeakerVoiceConfig": { | |
"speakerVoiceConfigs": [ | |
{ | |
"speaker": speaker1, | |
"voiceConfig": { | |
"prebuiltVoiceConfig": {"voiceName": voice1} | |
} | |
}, | |
{ | |
"speaker": speaker2, | |
"voiceConfig": { | |
"prebuiltVoiceConfig": {"voiceName": voice2} | |
} | |
} | |
] | |
} | |
} | |
} | |
} | |
resp = requests.post(BASE_URL, json=payload) | |
if resp.status_code != 200: | |
return JSONResponse(status_code=resp.status_code, content=resp.json()) | |
data_b64 = resp.json()["candidates"][0]["content"]["parts"][0]["inlineData"]["data"] | |
pcm = base64.b64decode(data_b64) | |
out_path = "multi_output.wav" | |
save_wav(out_path, pcm) | |
return FileResponse(out_path, media_type="audio/wav", filename=out_path) | |