Spaces:

Athspi-aitools
/

Aittsg

Running

App Files Files Community

Athspi commited on Jun 26

Commit

405a073

verified ·

1 Parent(s): c9eaebb

Update app.py

Browse files

Files changed (1) hide show

app.py +186 -61

app.py CHANGED Viewed

@@ -1,71 +1,196 @@
 import os
 import wave
 import gradio as gr
-import google.generativeai as genai
-# Set your API Key (or via Hugging Face Secrets / os.environ)
-GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
-if not GOOGLE_API_KEY:
-    raise ValueError("Please set your GOOGLE_API_KEY environment variable.")
-# Configure Generative AI
-genai.configure(api_key=GOOGLE_API_KEY)
-# Initialize Gemini TTS model
-model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
-# Function to save raw PCM data to WAV file
-def save_wave(filename, pcm_data, channels=1, rate=24000, sample_width=2):
-    with wave.open(filename, 'wb') as wf:
-        wf.setnchannels(channels)
-        wf.setsampwidth(sample_width)
-        wf.setframerate(rate)
-        wf.writeframes(pcm_data)
-# Function to handle TTS generation
-def generate_tts(text):
-    if not text.strip():
-        return None, "Please enter some text."
     try:
-        response = model.generate_content(
-            text,
-            generation_config={"response_mime_type": "audio/wav"},
-            response_modality="AUDIO"
-        )
-        # Extract audio data from response
-        audio_data = response.candidates[0].content.parts[0].inline_data.data
-        output_filename = "output.wav"
-        save_wave(output_filename, audio_data)
-        return output_filename, "Audio generated successfully!"
     except Exception as e:
-        return None, f"Error: {str(e)}"
-# Gradio Interface
-with gr.Blocks() as demo:
-    gr.Markdown("## 🎙️ Gemini 2.5 Text-to-Speech Demo")
-    with gr.Row():
-        text_input = gr.Textbox(label="Enter text to convert to speech")
-    with gr.Row():
-        submit_button = gr.Button("Generate Speech")
-    with gr.Row():
-        audio_output = gr.Audio(label="Generated Audio", type="filepath")
-        status_output = gr.Textbox(label="Status")
-    submit_button.click(
-        fn=generate_tts,
-        inputs=[text_input],
-        outputs=[audio_output, status_output]
-    )
-# Launch Gradio app
 if __name__ == "__main__":
-    demo.launch()

+from fastapi import FastAPI, HTTPException
+from fastapi.responses import FileResponse
+from google import genai
+from google.genai import types
 import os
 import wave
+import tempfile
 import gradio as gr
+import uvicorn
+from typing import Optional
+from pydantic import BaseModel
+# ─── Configuration ────────────────────────────────────────────────────────────
+class Config:
+    API_KEYS = [
+        os.getenv("GOOGLE_API_KEY_1"),
+        os.getenv("GOOGLE_API_KEY_2")
+    ]
+    MODEL_CHOICES = ["gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"]
+    VOICE_NAMES = [
+        "Achernar", "Achird", "Algenib", "Algieba", "Alnilam", "Aoede", "Autonoe",
+        "Callirrhoe", "Charon", "Despina", "Enceladus", "Erinome", "Fenrir", "Gacrux",
+        "Iapetus", "Kore", "Laomedeia", "Leda", "Orus", "Puck", "Pulcherrima", "Rasalgethi",
+        "Sadachbia", "Sadaltager", "Schedar", "Sulafat", "Umbriel", "Vindemiatrix", "Zephyr", "Zubenelgenubi"
+    ]
+# ─── API Models ───────────────────────────────────────────────────────────────
+class TTSRequest(BaseModel):
+    text: str
+    model: str = Config.MODEL_CHOICES[0]
+    voice_name: str = Config.VOICE_NAMES[0]
+    api_key: Optional[str] = None
+# ─── Core TTS Engine ──────────────────────────────────────────────────────────
+class TTSEngine:
+    def __init__(self):
+        self.clients = {}
+        self.init_clients()
+    def init_clients(self):
+        for idx, key in enumerate(Config.API_KEYS):
+            if key:
+                try:
+                    self.clients[f"client_{idx}"] = genai.Client(api_key=key)
+                except Exception as e:
+                    print(f"Failed to initialize client with key {idx}: {str(e)}")
+    def generate_audio(self, request: TTSRequest) -> str:
+        client = None
+        used_key = request.api_key
+        # Try user-provided key first
+        if used_key:
+            try:
+                client = genai.Client(api_key=used_key)
+            except Exception:
+                pass
+        # Fallback to pre-configured clients
+        if not client:
+            for client_name, existing_client in self.clients.items():
+                try:
+                    response = existing_client.models.generate_content(
+                        model=request.model,
+                        contents=request.text,
+                        config=types.GenerateContentConfig(
+                            response_modalities=["AUDIO"],
+                            speech_config=types.SpeechConfig(
+                                voice_config=types.VoiceConfig(
+                                    prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                                        voice_name=request.voice_name
+                                    )
+                                )
+                            ),
+                        ),
+                    )
+                    client = existing_client
+                    used_key = client_name
+                    break
+                except Exception:
+                    continue
+        if not client:
+            raise HTTPException(status_code=500, detail="No working API keys available")
+        try:
+            response = client.models.generate_content(
+                model=request.model,
+                contents=request.text,
+                config=types.GenerateContentConfig(
+                    response_modalities=["AUDIO"],
+                    speech_config=types.SpeechConfig(
+                        voice_config=types.VoiceConfig(
+                            prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                                voice_name=request.voice_name
+                            )
+                        )
+                    ),
+                ),
+            )
+            if response.candidates and response.candidates[0].content.parts:
+                pcm_data = response.candidates[0].content.parts[0].inline_data.data
+                return self.create_wave_file(pcm_data)
+            raise ValueError("Invalid response format from API")
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+    @staticmethod
+    def create_wave_file(pcm_bytes: bytes) -> str:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+            with wave.open(tmp_file.name, "wb") as wf:
+                wf.setnchannels(1)
+                wf.setsampwidth(2)
+                wf.setframerate(24000)
+                wf.writeframes(pcm_bytes)
+            return tmp_file.name
+# ─── FastAPI App ──────────────────────────────────────────────────────────────
+app = FastAPI(title="Gemini TTS API", version="1.0")
+engine = TTSEngine()
+@app.post("/generate/")
+async def generate_tts(request: TTSRequest):
     try:
+        audio_path = engine.generate_audio(request)
+        return FileResponse(audio_path, media_type="audio/wav")
+    except HTTPException:
+        raise
     except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# ─── Gradio Interface ─────────────────────────────────────────────────────────
+def create_gradio_interface():
+    with gr.Blocks(title="Gemini TTS", theme=gr.themes.Soft()) as interface:
+        gr.Markdown("## 🎤 Gemini TTS Voice Generator")
+        with gr.Row():
+            with gr.Column():
+                model_choice = gr.Dropdown(
+                    choices=Config.MODEL_CHOICES,
+                    value=Config.MODEL_CHOICES[0],
+                    label="Model"
+                )
+                voice_name = gr.Dropdown(
+                    choices=Config.VOICE_NAMES,
+                    value=Config.VOICE_NAMES[0],
+                    label="Voice"
+                )
+                api_key = gr.Textbox(
+                    label="API Key (optional)",
+                    type="password",
+                    placeholder="Leave empty to use configured keys"
+                )
+            with gr.Column():
+                text_input = gr.Textbox(
+                    label="Text to Convert",
+                    lines=5,
+                    placeholder="Enter text here..."
+                )
+                generate_btn = gr.Button("Generate Speech")
+                audio_output = gr.Audio(label="Generated Audio")
+                error_output = gr.Textbox(label="Error", visible=False)
+        def generate(text, model, voice, key):
+            try:
+                request = TTSRequest(text=text, model=model, voice_name=voice, api_key=key or None)
+                audio_path = engine.generate_audio(request)
+                return audio_path, ""
+            except Exception as e:
+                return None, str(e)
+        generate_btn.click(
+            generate,
+            inputs=[text_input, model_choice, voice_name, api_key],
+            outputs=[audio_output, error_output]
+        )
+    return interface
+# ─── Deployment Options ───────────────────────────────────────────────────────
+gradio_app = create_gradio_interface()
+# For Hugging Face Spaces
+app = gr.mount_gradio_app(app, gradio_app, path="/")
+# For local development
 if __name__ == "__main__":
+    uvicorn.run(
+        "main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True,
+        workers=2
+    )