Athspi commited on
Commit
de5b9f0
·
verified ·
1 Parent(s): d5d02b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -40
app.py CHANGED
@@ -1,25 +1,20 @@
1
  from fastapi import FastAPI, Form
2
- from fastapi.responses import FileResponse, JSONResponse
3
  import google.generativeai as genai
4
- from google.generativeai.types import GenerationConfig, SpeechConfig, VoiceConfig, PrebuiltVoiceConfig
5
  import wave
6
  import os
7
 
8
  app = FastAPI()
9
 
10
- # Set your Google API key via environment variable
11
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
12
 
13
  if not GOOGLE_API_KEY:
14
- raise ValueError("GOOGLE_API_KEY environment variable not set.")
15
 
16
- # Configure the GenAI client
17
  genai.configure(api_key=GOOGLE_API_KEY)
18
 
19
- # Load the TTS model
20
  model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
21
 
22
- # Function to write PCM audio data to WAV file
23
  def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
24
  with wave.open(filename, "wb") as wf:
25
  wf.setnchannels(channels)
@@ -27,39 +22,17 @@ def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
27
  wf.setframerate(rate)
28
  wf.writeframes(pcm)
29
 
30
- # Root endpoint to confirm API is running
31
- @app.get("/")
32
- def read_root():
33
- return {"message": "✅ Gemini TTS FastAPI running on Hugging Face Spaces!"}
 
 
 
34
 
35
- # POST endpoint to generate TTS audio from text
36
- @app.post("/generate-audio")
37
- def generate_audio(text: str = Form(...)):
38
- try:
39
- response = model.generate_content(
40
- contents=text,
41
- generation_config=GenerationConfig(
42
- response_mime_type="audio/wav"
43
- ),
44
- response_modality="AUDIO",
45
- speech_config=SpeechConfig(
46
- voice_config=VoiceConfig(
47
- prebuilt_voice=PrebuiltVoiceConfig(
48
- voice_name="Kore"
49
- )
50
- )
51
- )
52
- )
53
 
54
- # Get raw audio data
55
- data = response.candidates[0].content.parts[0].inline_data.data
56
 
57
- # Save to file
58
- file_name = "output.wav"
59
- wave_file(file_name, data)
60
-
61
- # Return the file as response
62
- return FileResponse(file_name, media_type="audio/wav", filename="output.wav")
63
-
64
- except Exception as e:
65
- return JSONResponse(content={"error": str(e)}, status_code=500)
 
1
  from fastapi import FastAPI, Form
2
+ from fastapi.responses import FileResponse
3
  import google.generativeai as genai
 
4
  import wave
5
  import os
6
 
7
  app = FastAPI()
8
 
 
9
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
10
 
11
  if not GOOGLE_API_KEY:
12
+ raise ValueError("Set GOOGLE_API_KEY environment variable.")
13
 
 
14
  genai.configure(api_key=GOOGLE_API_KEY)
15
 
 
16
  model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
17
 
 
18
  def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
19
  with wave.open(filename, "wb") as wf:
20
  wf.setnchannels(channels)
 
22
  wf.setframerate(rate)
23
  wf.writeframes(pcm)
24
 
25
+ @app.post("/tts")
26
+ def tts(text: str = Form(...)):
27
+ response = model.generate_content(
28
+ text,
29
+ generation_config={"response_mime_type": "audio/wav"},
30
+ response_modality="AUDIO"
31
+ )
32
 
33
+ audio_data = response.candidates[0].content.parts[0].inline_data.data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ output_path = "out.wav"
36
+ wave_file(output_path, audio_data)
37
 
38
+ return FileResponse(output_path, media_type="audio/wav", filename="output.wav")