Spaces:
Running
Running
File size: 3,131 Bytes
4a54590 5011794 6aa8d7a 5011794 a09df36 01919eb 5011794 01919eb 6aa8d7a a09df36 01919eb a09df36 6aa8d7a a09df36 01919eb 4a54590 01919eb 4a54590 01919eb 5011794 6aa8d7a 5875fa2 6aa8d7a 5875fa2 7f0d329 6aa8d7a 7f0d329 01919eb 6aa8d7a a71d68c 5875fa2 6aa8d7a 5875fa2 7f0d329 01919eb 6aa8d7a 7f0d329 6aa8d7a a71d68c 7f0d329 6aa8d7a 5011794 01919eb 6aa8d7a a71d68c 5875fa2 6aa8d7a 5875fa2 6aa8d7a 5875fa2 5011794 01919eb 6aa8d7a a09df36 6aa8d7a a71d68c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import os
import requests
import wave
import base64
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
from pydantic import BaseModel
from dotenv import load_dotenv
# Load API key
load_dotenv()
API_KEY = os.getenv("GEMINI_API_KEY")
if not API_KEY:
raise RuntimeError("Missing GEMINI_API_KEY in environment")
BASE_URL = (
"https://generativelanguage.googleapis.com/"
"v1beta/models/gemini-2.5-flash-preview-tts:"
"generateContent"
f"?key={API_KEY}"
)
app = FastAPI(title="Gemini TTS JSON API")
def save_wav(path: str, pcm: bytes, channels=1, rate=24000, width=2):
with wave.open(path, "wb") as wf:
wf.setnchannels(channels)
wf.setsampwidth(width)
wf.setframerate(rate)
wf.writeframes(pcm)
class SingleTTSRequest(BaseModel):
prompt: str
voice_name: str
class MultiTTSRequest(BaseModel):
prompt: str
speaker1: str
voice1: str
speaker2: str
voice2: str
@app.get("/")
def health():
return {"status": "Gemini TTS JSON API up and running!"}
@app.post("/single_tts")
def single_tts(req: SingleTTSRequest):
payload = {
"model": "gemini-2.5-flash-preview-tts",
"contents": [{"parts": [{"text": req.prompt}]}],
"config": {
"responseModalities": ["AUDIO"],
"speechConfig": {
"voiceConfig": {
"prebuiltVoiceConfig": {"voiceName": req.voice_name}
}
}
}
}
resp = requests.post(BASE_URL, json=payload)
if resp.status_code != 200:
raise HTTPException(status_code=resp.status_code, detail=resp.json())
b64 = resp.json()["candidates"][0]["content"]["parts"][0]["inlineData"]["data"]
pcm = base64.b64decode(b64)
out = "single_output.wav"
save_wav(out, pcm)
return FileResponse(out, media_type="audio/wav", filename=out)
@app.post("/multi_tts")
def multi_tts(req: MultiTTSRequest):
payload = {
"model": "gemini-2.5-flash-preview-tts",
"contents": [{"parts": [{"text": req.prompt}]}],
"config": {
"responseModalities": ["AUDIO"],
"speechConfig": {
"multiSpeakerVoiceConfig": {
"speakerVoiceConfigs": [
{
"speaker": req.speaker1,
"voiceConfig": {"prebuiltVoiceConfig": {"voiceName": req.voice1}}
},
{
"speaker": req.speaker2,
"voiceConfig": {"prebuiltVoiceConfig": {"voiceName": req.voice2}}
}
]
}
}
}
}
resp = requests.post(BASE_URL, json=payload)
if resp.status_code != 200:
raise HTTPException(status_code=resp.status_code, detail=resp.json())
b64 = resp.json()["candidates"][0]["content"]["parts"][0]["inlineData"]["data"]
pcm = base64.b64decode(b64)
out = "multi_output.wav"
save_wav(out, pcm)
return FileResponse(out, media_type="audio/wav", filename=out)
|