Athspi commited on
Commit
5eaef53
·
verified ·
1 Parent(s): 7d3c0d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -35
app.py CHANGED
@@ -1,11 +1,11 @@
1
- from fastapi import FastAPI, HTTPException, Request
2
  from fastapi.responses import JSONResponse, StreamingResponse
3
  from google import genai
4
  from google.genai import types
5
  import wave
6
  import io
7
  import os
8
- from typing import Optional
9
  from pydantic import BaseModel
10
  from dotenv import load_dotenv
11
 
@@ -13,13 +13,14 @@ from dotenv import load_dotenv
13
  load_dotenv()
14
 
15
  app = FastAPI(
16
- title="Google GenAI TTS API",
17
- description="API for text-to-speech conversion using Google GenAI",
18
- version="1.0.0",
19
  docs_url="/docs",
20
  redoc_url=None
21
  )
22
 
 
23
  class TTSRequest(BaseModel):
24
  text: str
25
  voice_name: Optional[str] = "Kore"
@@ -28,15 +29,28 @@ class TTSRequest(BaseModel):
28
  channels: Optional[int] = 1
29
  sample_width: Optional[int] = 2
30
 
 
 
 
 
 
 
 
31
  def initialize_genai_client():
32
- """Initialize the GenAI client with API key from environment"""
33
- api_key = os.getenv("GEMINI_API_KEY")
34
- if not api_key:
35
- raise ValueError("GEMINI_API_KEY environment variable not set")
36
- return genai.Client(api_key=api_key)
 
 
 
 
 
 
37
 
38
  def generate_wave_bytes(pcm_data: bytes, channels: int, rate: int, sample_width: int) -> bytes:
39
- """Generate WAV file bytes from PCM data"""
40
  with io.BytesIO() as wav_buffer:
41
  with wave.open(wav_buffer, "wb") as wf:
42
  wf.setnchannels(channels)
@@ -48,24 +62,13 @@ def generate_wave_bytes(pcm_data: bytes, channels: int, rate: int, sample_width:
48
  @app.post("/api/generate-tts/")
49
  async def generate_tts(request: TTSRequest):
50
  """
51
- Generate speech from text using Google GenAI TTS
52
-
53
- Parameters:
54
- - text: The text to convert to speech
55
- - voice_name: Voice to use (default: 'Kore')
56
- - cheerful: Whether to speak cheerfully (default: True)
57
- - sample_rate: Audio sample rate (default: 24000)
58
- - channels: Number of audio channels (default: 1)
59
- - sample_width: Sample width in bytes (default: 2)
60
-
61
- Returns:
62
- - StreamingResponse with the WAV audio file
63
  """
64
  try:
65
  client = initialize_genai_client()
66
-
67
  text_to_speak = f"Say cheerfully: {request.text}" if request.cheerful else request.text
68
-
69
  response = client.models.generate_content(
70
  model="gemini-2.5-flash-preview-tts",
71
  contents=text_to_speak,
@@ -80,27 +83,25 @@ async def generate_tts(request: TTSRequest):
80
  ),
81
  )
82
  )
83
-
84
  if not response.candidates or not response.candidates[0].content.parts:
85
- raise HTTPException(status_code=500, detail="No audio data received from GenAI")
86
-
87
  audio_data = response.candidates[0].content.parts[0].inline_data.data
88
-
89
  wav_bytes = generate_wave_bytes(
90
  audio_data,
91
  channels=request.channels,
92
  rate=request.sample_rate,
93
  sample_width=request.sample_width
94
  )
95
-
96
  return StreamingResponse(
97
  io.BytesIO(wav_bytes),
98
  media_type="audio/wav",
99
- headers={
100
- "Content-Disposition": f"attachment; filename=generated_audio.wav"
101
- }
102
  )
103
-
104
  except Exception as e:
105
  return JSONResponse(
106
  {"status": "error", "message": str(e)},
@@ -117,4 +118,4 @@ async def health_check():
117
 
118
  if __name__ == "__main__":
119
  import uvicorn
120
- uvicorn.run(app, host="0.0.0.0", port=8080)
 
1
+ from fastapi import FastAPI, HTTPException
2
  from fastapi.responses import JSONResponse, StreamingResponse
3
  from google import genai
4
  from google.genai import types
5
  import wave
6
  import io
7
  import os
8
+ from typing import Optional, List
9
  from pydantic import BaseModel
10
  from dotenv import load_dotenv
11
 
 
13
  load_dotenv()
14
 
15
  app = FastAPI(
16
+ title="Google GenAI TTS API with Multiple API Keys",
17
+ description="Text-to-Speech API using Google GenAI with multiple API keys fallback.",
18
+ version="1.2.0",
19
  docs_url="/docs",
20
  redoc_url=None
21
  )
22
 
23
+ # Pydantic model for request body
24
  class TTSRequest(BaseModel):
25
  text: str
26
  voice_name: Optional[str] = "Kore"
 
29
  channels: Optional[int] = 1
30
  sample_width: Optional[int] = 2
31
 
32
+ def get_api_keys() -> List[str]:
33
+ """Retrieve list of API keys from environment variable"""
34
+ api_keys = os.getenv("GEMINI_API_KEYS")
35
+ if not api_keys:
36
+ raise ValueError("No API keys found in GEMINI_API_KEYS environment variable.")
37
+ return [key.strip() for key in api_keys.split(",") if key.strip()]
38
+
39
  def initialize_genai_client():
40
+ """Initialize the GenAI client by trying multiple API keys"""
41
+ api_keys = get_api_keys()
42
+ for key in api_keys:
43
+ try:
44
+ print(f"Trying API key: {key[:5]}...") # Only show part for safety
45
+ client = genai.Client(api_key=key)
46
+ return client
47
+ except Exception as e:
48
+ print(f"Failed with key {key[:5]}... : {e}")
49
+
50
+ raise ValueError("No valid API key could initialize the GenAI client.")
51
 
52
  def generate_wave_bytes(pcm_data: bytes, channels: int, rate: int, sample_width: int) -> bytes:
53
+ """Convert PCM audio data into WAV bytes."""
54
  with io.BytesIO() as wav_buffer:
55
  with wave.open(wav_buffer, "wb") as wf:
56
  wf.setnchannels(channels)
 
62
  @app.post("/api/generate-tts/")
63
  async def generate_tts(request: TTSRequest):
64
  """
65
+ Convert text to speech audio using Google GenAI.
 
 
 
 
 
 
 
 
 
 
 
66
  """
67
  try:
68
  client = initialize_genai_client()
69
+
70
  text_to_speak = f"Say cheerfully: {request.text}" if request.cheerful else request.text
71
+
72
  response = client.models.generate_content(
73
  model="gemini-2.5-flash-preview-tts",
74
  contents=text_to_speak,
 
83
  ),
84
  )
85
  )
86
+
87
  if not response.candidates or not response.candidates[0].content.parts:
88
+ raise HTTPException(status_code=500, detail="No audio data received from GenAI.")
89
+
90
  audio_data = response.candidates[0].content.parts[0].inline_data.data
91
+
92
  wav_bytes = generate_wave_bytes(
93
  audio_data,
94
  channels=request.channels,
95
  rate=request.sample_rate,
96
  sample_width=request.sample_width
97
  )
98
+
99
  return StreamingResponse(
100
  io.BytesIO(wav_bytes),
101
  media_type="audio/wav",
102
+ headers={"Content-Disposition": "attachment; filename=generated_audio.wav"}
 
 
103
  )
104
+
105
  except Exception as e:
106
  return JSONResponse(
107
  {"status": "error", "message": str(e)},
 
118
 
119
  if __name__ == "__main__":
120
  import uvicorn
121
+ uvicorn.run(app, host="0.0.0.0", port=8080)