Athspi commited on
Commit
6acc004
·
verified ·
1 Parent(s): 4a54590

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -18
app.py CHANGED
@@ -1,22 +1,26 @@
1
  from fastapi import FastAPI, Form
2
  from fastapi.responses import FileResponse, JSONResponse
3
  from pydantic import BaseModel
4
- from google import generativeai as genai
5
- from google.generativeai import types
 
 
6
  import wave
7
  import os
8
 
9
  app = FastAPI()
10
 
11
- # Set your Google API key via environment variable for Spaces safety
12
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
13
 
14
  if not GOOGLE_API_KEY:
15
  raise ValueError("GOOGLE_API_KEY environment variable not set.")
16
 
17
- # Initialize the GenAI client
18
- client = genai.Client(api_key=GOOGLE_API_KEY)
19
 
 
 
20
 
21
  # Function to write PCM audio data to WAV file
22
  def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
@@ -27,32 +31,36 @@ def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
27
  wf.writeframes(pcm)
28
 
29
 
30
- # Root endpoint to check API is running
31
  @app.get("/")
32
  def read_root():
33
- return {"message": "🦜 Gemini TTS FastAPI running on Hugging Face Spaces!"}
34
 
35
 
36
  # POST endpoint to generate TTS audio from text
37
  @app.post("/generate-audio")
38
  def generate_audio(text: str = Form(...)):
39
  try:
40
- response = client.models.generate_content(
41
- model="gemini-2.5-flash-preview-tts",
42
  contents=text,
43
- config=types.GenerateContentConfig(
44
- response_modalities=["AUDIO"],
45
- speech_config=types.SpeechConfig(
46
- voice_config=types.VoiceConfig(
47
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
48
- voice_name="Kore"
49
- )
 
 
 
 
 
50
  )
51
- ),
52
  )
53
  )
54
 
55
- # Get audio data
56
  data = response.candidates[0].content.parts[0].inline_data.data
57
 
58
  # Save to file
 
1
  from fastapi import FastAPI, Form
2
  from fastapi.responses import FileResponse, JSONResponse
3
  from pydantic import BaseModel
4
+ import google.generativeai as genai
5
+ from google.generativeai.types import GenerateContentRequest, GenerateContentResponse, Content, Part
6
+ from google.generativeai.types import GenerateContentResponse, GenerateContentRequest, GenerationConfig
7
+ from google.generativeai.types import SafetySetting, HarmBlockThreshold, HarmCategory, GenerateContentRequest, Content
8
  import wave
9
  import os
10
 
11
  app = FastAPI()
12
 
13
+ # Set your Google API key via environment variable
14
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
15
 
16
  if not GOOGLE_API_KEY:
17
  raise ValueError("GOOGLE_API_KEY environment variable not set.")
18
 
19
+ # Configure the GenAI client
20
+ genai.configure(api_key=GOOGLE_API_KEY)
21
 
22
+ # Load the TTS model
23
+ model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
24
 
25
  # Function to write PCM audio data to WAV file
26
  def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
 
31
  wf.writeframes(pcm)
32
 
33
 
34
+ # Root endpoint to check API status
35
  @app.get("/")
36
  def read_root():
37
+ return {"message": " Gemini TTS FastAPI running on Hugging Face Spaces!"}
38
 
39
 
40
  # POST endpoint to generate TTS audio from text
41
  @app.post("/generate-audio")
42
  def generate_audio(text: str = Form(...)):
43
  try:
44
+ response = model.generate_content(
 
45
  contents=text,
46
+ generation_config=genai.types.GenerationConfig(
47
+ response_mime_type="audio/wav"
48
+ ),
49
+ safety_settings=[
50
+ SafetySetting(category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold=HarmBlockThreshold.BLOCK_NONE)
51
+ ],
52
+ tools=None,
53
+ response_modality="AUDIO",
54
+ speech_config=genai.types.SpeechConfig(
55
+ voice_config=genai.types.VoiceConfig(
56
+ prebuilt_voice_config=genai.types.PrebuiltVoiceConfig(
57
+ voice_name="Kore"
58
  )
59
+ )
60
  )
61
  )
62
 
63
+ # Get raw audio data
64
  data = response.candidates[0].content.parts[0].inline_data.data
65
 
66
  # Save to file