Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException
|
2 |
from fastapi.responses import JSONResponse, StreamingResponse
|
3 |
from google import genai
|
4 |
from google.genai import types
|
5 |
import wave
|
6 |
import io
|
7 |
import os
|
8 |
-
from typing import Optional
|
9 |
from pydantic import BaseModel
|
10 |
from dotenv import load_dotenv
|
11 |
|
@@ -13,13 +13,14 @@ from dotenv import load_dotenv
|
|
13 |
load_dotenv()
|
14 |
|
15 |
app = FastAPI(
|
16 |
-
title="Google GenAI TTS API",
|
17 |
-
description="
|
18 |
-
version="1.
|
19 |
docs_url="/docs",
|
20 |
redoc_url=None
|
21 |
)
|
22 |
|
|
|
23 |
class TTSRequest(BaseModel):
|
24 |
text: str
|
25 |
voice_name: Optional[str] = "Kore"
|
@@ -28,15 +29,28 @@ class TTSRequest(BaseModel):
|
|
28 |
channels: Optional[int] = 1
|
29 |
sample_width: Optional[int] = 2
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
def initialize_genai_client():
|
32 |
-
"""Initialize the GenAI client
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def generate_wave_bytes(pcm_data: bytes, channels: int, rate: int, sample_width: int) -> bytes:
|
39 |
-
"""
|
40 |
with io.BytesIO() as wav_buffer:
|
41 |
with wave.open(wav_buffer, "wb") as wf:
|
42 |
wf.setnchannels(channels)
|
@@ -48,24 +62,13 @@ def generate_wave_bytes(pcm_data: bytes, channels: int, rate: int, sample_width:
|
|
48 |
@app.post("/api/generate-tts/")
|
49 |
async def generate_tts(request: TTSRequest):
|
50 |
"""
|
51 |
-
|
52 |
-
|
53 |
-
Parameters:
|
54 |
-
- text: The text to convert to speech
|
55 |
-
- voice_name: Voice to use (default: 'Kore')
|
56 |
-
- cheerful: Whether to speak cheerfully (default: True)
|
57 |
-
- sample_rate: Audio sample rate (default: 24000)
|
58 |
-
- channels: Number of audio channels (default: 1)
|
59 |
-
- sample_width: Sample width in bytes (default: 2)
|
60 |
-
|
61 |
-
Returns:
|
62 |
-
- StreamingResponse with the WAV audio file
|
63 |
"""
|
64 |
try:
|
65 |
client = initialize_genai_client()
|
66 |
-
|
67 |
text_to_speak = f"Say cheerfully: {request.text}" if request.cheerful else request.text
|
68 |
-
|
69 |
response = client.models.generate_content(
|
70 |
model="gemini-2.5-flash-preview-tts",
|
71 |
contents=text_to_speak,
|
@@ -80,27 +83,25 @@ async def generate_tts(request: TTSRequest):
|
|
80 |
),
|
81 |
)
|
82 |
)
|
83 |
-
|
84 |
if not response.candidates or not response.candidates[0].content.parts:
|
85 |
-
raise HTTPException(status_code=500, detail="No audio data received from GenAI")
|
86 |
-
|
87 |
audio_data = response.candidates[0].content.parts[0].inline_data.data
|
88 |
-
|
89 |
wav_bytes = generate_wave_bytes(
|
90 |
audio_data,
|
91 |
channels=request.channels,
|
92 |
rate=request.sample_rate,
|
93 |
sample_width=request.sample_width
|
94 |
)
|
95 |
-
|
96 |
return StreamingResponse(
|
97 |
io.BytesIO(wav_bytes),
|
98 |
media_type="audio/wav",
|
99 |
-
headers={
|
100 |
-
"Content-Disposition": f"attachment; filename=generated_audio.wav"
|
101 |
-
}
|
102 |
)
|
103 |
-
|
104 |
except Exception as e:
|
105 |
return JSONResponse(
|
106 |
{"status": "error", "message": str(e)},
|
@@ -117,4 +118,4 @@ async def health_check():
|
|
117 |
|
118 |
if __name__ == "__main__":
|
119 |
import uvicorn
|
120 |
-
uvicorn.run(app, host="0.0.0.0", port=8080)
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException
|
2 |
from fastapi.responses import JSONResponse, StreamingResponse
|
3 |
from google import genai
|
4 |
from google.genai import types
|
5 |
import wave
|
6 |
import io
|
7 |
import os
|
8 |
+
from typing import Optional, List
|
9 |
from pydantic import BaseModel
|
10 |
from dotenv import load_dotenv
|
11 |
|
|
|
13 |
load_dotenv()
|
14 |
|
15 |
app = FastAPI(
|
16 |
+
title="Google GenAI TTS API with Multiple API Keys",
|
17 |
+
description="Text-to-Speech API using Google GenAI with multiple API keys fallback.",
|
18 |
+
version="1.2.0",
|
19 |
docs_url="/docs",
|
20 |
redoc_url=None
|
21 |
)
|
22 |
|
23 |
+
# Pydantic model for request body
|
24 |
class TTSRequest(BaseModel):
|
25 |
text: str
|
26 |
voice_name: Optional[str] = "Kore"
|
|
|
29 |
channels: Optional[int] = 1
|
30 |
sample_width: Optional[int] = 2
|
31 |
|
32 |
+
def get_api_keys() -> List[str]:
|
33 |
+
"""Retrieve list of API keys from environment variable"""
|
34 |
+
api_keys = os.getenv("GEMINI_API_KEYS")
|
35 |
+
if not api_keys:
|
36 |
+
raise ValueError("No API keys found in GEMINI_API_KEYS environment variable.")
|
37 |
+
return [key.strip() for key in api_keys.split(",") if key.strip()]
|
38 |
+
|
39 |
def initialize_genai_client():
|
40 |
+
"""Initialize the GenAI client by trying multiple API keys"""
|
41 |
+
api_keys = get_api_keys()
|
42 |
+
for key in api_keys:
|
43 |
+
try:
|
44 |
+
print(f"Trying API key: {key[:5]}...") # Only show part for safety
|
45 |
+
client = genai.Client(api_key=key)
|
46 |
+
return client
|
47 |
+
except Exception as e:
|
48 |
+
print(f"Failed with key {key[:5]}... : {e}")
|
49 |
+
|
50 |
+
raise ValueError("No valid API key could initialize the GenAI client.")
|
51 |
|
52 |
def generate_wave_bytes(pcm_data: bytes, channels: int, rate: int, sample_width: int) -> bytes:
|
53 |
+
"""Convert PCM audio data into WAV bytes."""
|
54 |
with io.BytesIO() as wav_buffer:
|
55 |
with wave.open(wav_buffer, "wb") as wf:
|
56 |
wf.setnchannels(channels)
|
|
|
62 |
@app.post("/api/generate-tts/")
|
63 |
async def generate_tts(request: TTSRequest):
|
64 |
"""
|
65 |
+
Convert text to speech audio using Google GenAI.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
"""
|
67 |
try:
|
68 |
client = initialize_genai_client()
|
69 |
+
|
70 |
text_to_speak = f"Say cheerfully: {request.text}" if request.cheerful else request.text
|
71 |
+
|
72 |
response = client.models.generate_content(
|
73 |
model="gemini-2.5-flash-preview-tts",
|
74 |
contents=text_to_speak,
|
|
|
83 |
),
|
84 |
)
|
85 |
)
|
86 |
+
|
87 |
if not response.candidates or not response.candidates[0].content.parts:
|
88 |
+
raise HTTPException(status_code=500, detail="No audio data received from GenAI.")
|
89 |
+
|
90 |
audio_data = response.candidates[0].content.parts[0].inline_data.data
|
91 |
+
|
92 |
wav_bytes = generate_wave_bytes(
|
93 |
audio_data,
|
94 |
channels=request.channels,
|
95 |
rate=request.sample_rate,
|
96 |
sample_width=request.sample_width
|
97 |
)
|
98 |
+
|
99 |
return StreamingResponse(
|
100 |
io.BytesIO(wav_bytes),
|
101 |
media_type="audio/wav",
|
102 |
+
headers={"Content-Disposition": "attachment; filename=generated_audio.wav"}
|
|
|
|
|
103 |
)
|
104 |
+
|
105 |
except Exception as e:
|
106 |
return JSONResponse(
|
107 |
{"status": "error", "message": str(e)},
|
|
|
118 |
|
119 |
if __name__ == "__main__":
|
120 |
import uvicorn
|
121 |
+
uvicorn.run(app, host="0.0.0.0", port=8080)
|