Spaces:

Athspi-aitools
/

Aittsg

Running

App Files Files Community

Athspi commited on Jun 26

Commit

4a54590

verified ·

1 Parent(s): 78bb362

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -52

app.py CHANGED Viewed

@@ -1,81 +1,66 @@
-import os
-import wave
-from fastapi import FastAPI, Response, HTTPException
 from pydantic import BaseModel
-import google.generativeai as genai
 from google.generativeai import types
-# --- Configuration and API Key ---
-# It is recommended to set your Google API key as a secret in your Hugging Face Space settings.
-# The key for the secret should be 'GOOGLE_API_KEY'.
-try:
-    GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
-    if not GOOGLE_API_KEY:
-        raise ValueError("Google API key not found. Please set it in your Hugging Face Space secrets.")
-    genai.configure(api_key=GOOGLE_API_KEY)
-except Exception as e:
-    # This will help in debugging if the key is not set.
-    print(f"Error during API key configuration: {e}")
-# --- Pydantic Model for Request Body ---
-class TextToSpeechRequest(BaseModel):
-    text: str = "Say cheerfully: Have a wonderful day!"
-    voice_name: str = "Kore"
-    output_filename: str = "output.wav"
-# --- FastAPI App Initialization ---
-app = FastAPI()
 @app.get("/")
 def read_root():
-    return {"message": "Welcome to the Text-to-Speech API using Gemini. Use the /generate-audio/ endpoint to create audio."}
-@app.post("/generate-audio/")
-async def generate_audio(request: TextToSpeechRequest):
-    """
-    This endpoint generates audio from the provided text using Google's Gemini model.
-    """
     try:
-        # --- Text-to-Speech Generation ---
-        response = genai.generate_text(
             model="gemini-2.5-flash-preview-tts",
-            prompt=request.text,
-            options=types.GenerationOptions(
                 response_modalities=["AUDIO"],
                 speech_config=types.SpeechConfig(
                     voice_config=types.VoiceConfig(
                         prebuilt_voice_config=types.PrebuiltVoiceConfig(
-                            voice_name=request.voice_name,
                         )
                     )
                 ),
             )
         )
-        # --- Extract Audio Data ---
-        if not response.candidates or not response.candidates[0].content.parts or not response.candidates[0].content.parts[0].inline_data.data:
-            raise HTTPException(status_code=500, detail="Audio data could not be generated.")
-        audio_data = response.candidates[0].content.parts[0].inline_data.data
-        # --- Save to a WAV file in memory ---
-        import io
-        buffer = io.BytesIO()
-        with wave.open(buffer, "wb") as wf:
-            wf.setnchannels(1)
-            wf.setsampwidth(2)
-            wf.setframerate(24000)
-            wf.writeframes(audio_data)
-        buffer.seek(0)
-        # --- Return Audio File as Response ---
-        return Response(content=buffer.getvalue(), media_type="audio/wav", headers={"Content-Disposition": f"attachment; filename={request.output_filename}"})
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-# To run this locally, use the command: uvicorn app:app --reload

+from fastapi import FastAPI, Form
+from fastapi.responses import FileResponse, JSONResponse
 from pydantic import BaseModel
+from google import generativeai as genai
 from google.generativeai import types
+import wave
+import os
+app = FastAPI()
+# Set your Google API key via environment variable for Spaces safety
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+if not GOOGLE_API_KEY:
+    raise ValueError("GOOGLE_API_KEY environment variable not set.")
+# Initialize the GenAI client
+client = genai.Client(api_key=GOOGLE_API_KEY)
+# Function to write PCM audio data to WAV file
+def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
+    with wave.open(filename, "wb") as wf:
+        wf.setnchannels(channels)
+        wf.setsampwidth(sample_width)
+        wf.setframerate(rate)
+        wf.writeframes(pcm)
+# Root endpoint to check API is running
 @app.get("/")
 def read_root():
+    return {"message": "🦜 Gemini TTS FastAPI running on Hugging Face Spaces!"}
+# POST endpoint to generate TTS audio from text
+@app.post("/generate-audio")
+def generate_audio(text: str = Form(...)):
     try:
+        response = client.models.generate_content(
             model="gemini-2.5-flash-preview-tts",
+            contents=text,
+            config=types.GenerateContentConfig(
                 response_modalities=["AUDIO"],
                 speech_config=types.SpeechConfig(
                     voice_config=types.VoiceConfig(
                         prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                            voice_name="Kore"
                         )
                     )
                 ),
             )
         )
+        # Get audio data
+        data = response.candidates[0].content.parts[0].inline_data.data
+        # Save to file
+        file_name = "output.wav"
+        wave_file(file_name, data)
+        # Return the file as response
+        return FileResponse(file_name, media_type="audio/wav", filename="output.wav")
     except Exception as e:
+        return JSONResponse(content={"error": str(e)}, status_code=500)