Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,25 +1,20 @@
|
|
1 |
from fastapi import FastAPI, Form
|
2 |
-
from fastapi.responses import FileResponse
|
3 |
import google.generativeai as genai
|
4 |
-
from google.generativeai.types import GenerationConfig, SpeechConfig, VoiceConfig, PrebuiltVoiceConfig
|
5 |
import wave
|
6 |
import os
|
7 |
|
8 |
app = FastAPI()
|
9 |
|
10 |
-
# Set your Google API key via environment variable
|
11 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
12 |
|
13 |
if not GOOGLE_API_KEY:
|
14 |
-
raise ValueError("GOOGLE_API_KEY environment variable
|
15 |
|
16 |
-
# Configure the GenAI client
|
17 |
genai.configure(api_key=GOOGLE_API_KEY)
|
18 |
|
19 |
-
# Load the TTS model
|
20 |
model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
|
21 |
|
22 |
-
# Function to write PCM audio data to WAV file
|
23 |
def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
|
24 |
with wave.open(filename, "wb") as wf:
|
25 |
wf.setnchannels(channels)
|
@@ -27,39 +22,17 @@ def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
|
|
27 |
wf.setframerate(rate)
|
28 |
wf.writeframes(pcm)
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
34 |
|
35 |
-
|
36 |
-
@app.post("/generate-audio")
|
37 |
-
def generate_audio(text: str = Form(...)):
|
38 |
-
try:
|
39 |
-
response = model.generate_content(
|
40 |
-
contents=text,
|
41 |
-
generation_config=GenerationConfig(
|
42 |
-
response_mime_type="audio/wav"
|
43 |
-
),
|
44 |
-
response_modality="AUDIO",
|
45 |
-
speech_config=SpeechConfig(
|
46 |
-
voice_config=VoiceConfig(
|
47 |
-
prebuilt_voice=PrebuiltVoiceConfig(
|
48 |
-
voice_name="Kore"
|
49 |
-
)
|
50 |
-
)
|
51 |
-
)
|
52 |
-
)
|
53 |
|
54 |
-
|
55 |
-
|
56 |
|
57 |
-
|
58 |
-
file_name = "output.wav"
|
59 |
-
wave_file(file_name, data)
|
60 |
-
|
61 |
-
# Return the file as response
|
62 |
-
return FileResponse(file_name, media_type="audio/wav", filename="output.wav")
|
63 |
-
|
64 |
-
except Exception as e:
|
65 |
-
return JSONResponse(content={"error": str(e)}, status_code=500)
|
|
|
1 |
from fastapi import FastAPI, Form
|
2 |
+
from fastapi.responses import FileResponse
|
3 |
import google.generativeai as genai
|
|
|
4 |
import wave
|
5 |
import os
|
6 |
|
7 |
app = FastAPI()
|
8 |
|
|
|
9 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
10 |
|
11 |
if not GOOGLE_API_KEY:
|
12 |
+
raise ValueError("Set GOOGLE_API_KEY environment variable.")
|
13 |
|
|
|
14 |
genai.configure(api_key=GOOGLE_API_KEY)
|
15 |
|
|
|
16 |
model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
|
17 |
|
|
|
18 |
def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
|
19 |
with wave.open(filename, "wb") as wf:
|
20 |
wf.setnchannels(channels)
|
|
|
22 |
wf.setframerate(rate)
|
23 |
wf.writeframes(pcm)
|
24 |
|
25 |
+
@app.post("/tts")
|
26 |
+
def tts(text: str = Form(...)):
|
27 |
+
response = model.generate_content(
|
28 |
+
text,
|
29 |
+
generation_config={"response_mime_type": "audio/wav"},
|
30 |
+
response_modality="AUDIO"
|
31 |
+
)
|
32 |
|
33 |
+
audio_data = response.candidates[0].content.parts[0].inline_data.data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
output_path = "out.wav"
|
36 |
+
wave_file(output_path, audio_data)
|
37 |
|
38 |
+
return FileResponse(output_path, media_type="audio/wav", filename="output.wav")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|