Athspi commited on
Commit
b4357ba
·
verified ·
1 Parent(s): 8274800

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -31
app.py CHANGED
@@ -3,6 +3,11 @@ import google.generativeai as genai
3
  import time
4
  import os
5
 
 
 
 
 
 
6
  # --- Helper Function ---
7
  def create_unique_wav_file(audio_data):
8
  """Saves audio data to a uniquely named WAV file and returns the path."""
@@ -25,32 +30,30 @@ def create_unique_wav_file(audio_data):
25
 
26
 
27
  # --- Core API Logic ---
28
- def synthesize_speech(api_key, text):
29
  """
30
  Synthesizes speech from text using the Gemini API.
31
- This function takes an API key and text, validates them, configures the
32
- Gemini client, calls the Text-to-Speech API, and saves the resulting audio.
33
  """
34
- # 1. Validate Inputs
35
- if not api_key:
36
- raise gr.Error("API Key is required. Please enter your Google AI API Key.")
37
  if not text or not text.strip():
38
  raise gr.Error("Please enter some text to synthesize.")
39
 
40
  try:
41
- # 2. Configure the Gemini API
42
- # This sets up the API key for all subsequent genai calls.
43
- genai.configure(api_key=api_key)
44
 
45
  # 3. Call the Text-to-Speech Model
46
  # We use the 'tts-1' model which is optimized for this task.
47
- # The prompt itself instructs the model on the desired tone.
48
  model = genai.GenerativeModel(model_name='tts-1')
49
 
50
  # The API can be instructed on tone and style directly in the prompt.
51
  prompt = f"Speak the following text in a cheerful and friendly voice: '{text}'"
52
 
53
- response = model.generate_content(prompt, response_mime_type="audio/wav")
 
54
 
55
  # 4. Process the Response and Save the Audio File
56
  # The audio data is conveniently located in the `audio_content` attribute.
@@ -64,33 +67,24 @@ def synthesize_speech(api_key, text):
64
  except Exception as e:
65
  # Provide a more informative error message in the UI.
66
  print(f"An error occurred: {e}")
67
- raise gr.Error(f"Failed to synthesize speech. Please check your API key and network connection. Error: {e}")
68
 
69
  # --- Gradio User Interface ---
70
  with gr.Blocks(theme=gr.themes.Soft()) as iface:
71
  gr.Markdown(
72
  """
73
  # ✨ Gemini Text-to-Speech Synthesizer
74
- Enter your Google AI API Key and the text you want to convert to speech.
75
- The audio will be generated with a cheerful tone.
76
  """
77
  )
78
 
79
- with gr.Row():
80
- # Input for the user's API key. Type="password" hides the input.
81
- api_key_input = gr.Textbox(
82
- label="Google AI API Key",
83
- type="password",
84
- placeholder="Enter your API key here...",
85
- scale=1
86
- )
87
- # Input for the text to be synthesized.
88
- text_input = gr.Textbox(
89
- label="Text to Synthesize",
90
- placeholder="Hello! Welcome to the text-to-speech demonstration.",
91
- lines=3,
92
- scale=2
93
- )
94
 
95
  # Button to trigger the synthesis process.
96
  submit_btn = gr.Button("Generate Speech", variant="primary")
@@ -99,9 +93,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
99
  audio_output = gr.Audio(label="Generated Audio", type="filepath")
100
 
101
  # Connect the button click event to the core function.
 
102
  submit_btn.click(
103
  fn=synthesize_speech,
104
- inputs=[api_key_input, text_input],
105
  outputs=audio_output
106
  )
107
 
@@ -118,6 +113,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
118
  )
119
 
120
  # --- Main execution block ---
121
- # To run this script, save it as app.py and run `python app.py` in your terminal.
 
122
  if __name__ == "__main__":
123
  iface.launch()
 
3
  import time
4
  import os
5
 
6
+ # --- Load API Key from Hugging Face Secrets ---
7
+ # IMPORTANT: For this to work on Hugging Face Spaces, you must go to your Space's
8
+ # settings and add a secret named "GOOGLE_API_KEY" with your Google AI API key as the value.
9
+ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
10
+
11
  # --- Helper Function ---
12
  def create_unique_wav_file(audio_data):
13
  """Saves audio data to a uniquely named WAV file and returns the path."""
 
30
 
31
 
32
  # --- Core API Logic ---
33
+ def synthesize_speech(text):
34
  """
35
  Synthesizes speech from text using the Gemini API.
36
+ This function uses the API key loaded from Hugging Face secrets.
 
37
  """
38
+ # 1. Validate Inputs (API Key and Text)
39
+ if not GOOGLE_API_KEY:
40
+ raise gr.Error("Google API Key not found. Please ensure you have set the GOOGLE_API_KEY secret in your Hugging Face Space settings.")
41
  if not text or not text.strip():
42
  raise gr.Error("Please enter some text to synthesize.")
43
 
44
  try:
45
+ # 2. Configure the Gemini API with the loaded key
46
+ genai.configure(api_key=GOOGLE_API_KEY)
 
47
 
48
  # 3. Call the Text-to-Speech Model
49
  # We use the 'tts-1' model which is optimized for this task.
 
50
  model = genai.GenerativeModel(model_name='tts-1')
51
 
52
  # The API can be instructed on tone and style directly in the prompt.
53
  prompt = f"Speak the following text in a cheerful and friendly voice: '{text}'"
54
 
55
+ # The tts-1 model implicitly returns audio/wav format.
56
+ response = model.generate_content(prompt)
57
 
58
  # 4. Process the Response and Save the Audio File
59
  # The audio data is conveniently located in the `audio_content` attribute.
 
67
  except Exception as e:
68
  # Provide a more informative error message in the UI.
69
  print(f"An error occurred: {e}")
70
+ raise gr.Error(f"Failed to synthesize speech. Please check your network connection and that your API key is valid. Error: {e}")
71
 
72
  # --- Gradio User Interface ---
73
  with gr.Blocks(theme=gr.themes.Soft()) as iface:
74
  gr.Markdown(
75
  """
76
  # ✨ Gemini Text-to-Speech Synthesizer
77
+ This app uses an API key stored securely in Hugging Face secrets.
78
+ Just enter the text you want to convert to speech!
79
  """
80
  )
81
 
82
+ # Input for the text to be synthesized.
83
+ text_input = gr.Textbox(
84
+ label="Text to Synthesize",
85
+ placeholder="Hello! Welcome to the text-to-speech demonstration.",
86
+ lines=4,
87
+ )
 
 
 
 
 
 
 
 
 
88
 
89
  # Button to trigger the synthesis process.
90
  submit_btn = gr.Button("Generate Speech", variant="primary")
 
93
  audio_output = gr.Audio(label="Generated Audio", type="filepath")
94
 
95
  # Connect the button click event to the core function.
96
+ # The API key is now handled internally and not needed as an input.
97
  submit_btn.click(
98
  fn=synthesize_speech,
99
+ inputs=[text_input],
100
  outputs=audio_output
101
  )
102
 
 
113
  )
114
 
115
  # --- Main execution block ---
116
+ # To deploy, push this file and a requirements.txt to a Hugging Face Space
117
+ # and set the GOOGLE_API_KEY in the repository secrets.
118
  if __name__ == "__main__":
119
  iface.launch()