Spaces:

Athspi
/

Gsgsgsg

Running

App Files Files Community

Gsgsgsg / app.py

Athspi

Update app.py

b4357ba verified 8 days ago

raw

history blame

4.65 kB

	import gradio as gr
	import google.generativeai as genai
	import time
	import os

	# --- Load API Key from Hugging Face Secrets ---
	# IMPORTANT: For this to work on Hugging Face Spaces, you must go to your Space's
	# settings and add a secret named "GOOGLE_API_KEY" with your Google AI API key as the value.
	GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")

	# --- Helper Function ---
	def create_unique_wav_file(audio_data):
	"""Saves audio data to a uniquely named WAV file and returns the path."""
	# Create a directory to store audio outputs if it doesn't exist
	output_dir = "audio_outputs"
	os.makedirs(output_dir, exist_ok=True)

	# Generate a unique filename using a timestamp
	timestamp = int(time.time())
	file_name = os.path.join(output_dir, f'speech_output_{timestamp}.wav')

	# The API returns a complete WAV file, so we just write the bytes directly.
	try:
	with open(file_name, 'wb') as f:
	f.write(audio_data)
	return file_name
	except Exception as e:
	print(f"Error saving wave file: {e}")
	raise gr.Error(f"Could not save audio file. Error: {e}")


	# --- Core API Logic ---
	def synthesize_speech(text):
	"""
	Synthesizes speech from text using the Gemini API.
	This function uses the API key loaded from Hugging Face secrets.
	"""
	# 1. Validate Inputs (API Key and Text)
	if not GOOGLE_API_KEY:
	raise gr.Error("Google API Key not found. Please ensure you have set the GOOGLE_API_KEY secret in your Hugging Face Space settings.")
	if not text or not text.strip():
	raise gr.Error("Please enter some text to synthesize.")

	try:
	# 2. Configure the Gemini API with the loaded key
	genai.configure(api_key=GOOGLE_API_KEY)

	# 3. Call the Text-to-Speech Model
	# We use the 'tts-1' model which is optimized for this task.
	model = genai.GenerativeModel(model_name='tts-1')

	# The API can be instructed on tone and style directly in the prompt.
	prompt = f"Speak the following text in a cheerful and friendly voice: '{text}'"

	# The tts-1 model implicitly returns audio/wav format.
	response = model.generate_content(prompt)

	# 4. Process the Response and Save the Audio File
	# The audio data is conveniently located in the `audio_content` attribute.
	if response.audio_content:
	audio_file_path = create_unique_wav_file(response.audio_content)
	return audio_file_path
	else:
	# Handle cases where audio might not be generated
	raise gr.Error("The API did not return audio data. Please check your text or try again.")

	except Exception as e:
	# Provide a more informative error message in the UI.
	print(f"An error occurred: {e}")
	raise gr.Error(f"Failed to synthesize speech. Please check your network connection and that your API key is valid. Error: {e}")

	# --- Gradio User Interface ---
	with gr.Blocks(theme=gr.themes.Soft()) as iface:
	gr.Markdown(
	"""
	# ✨ Gemini Text-to-Speech Synthesizer
	This app uses an API key stored securely in Hugging Face secrets.
	Just enter the text you want to convert to speech!
	"""
	)

	# Input for the text to be synthesized.
	text_input = gr.Textbox(
	label="Text to Synthesize",
	placeholder="Hello! Welcome to the text-to-speech demonstration.",
	lines=4,
	)

	# Button to trigger the synthesis process.
	submit_btn = gr.Button("Generate Speech", variant="primary")

	# Component to display the generated audio.
	audio_output = gr.Audio(label="Generated Audio", type="filepath")

	# Connect the button click event to the core function.
	# The API key is now handled internally and not needed as an input.
	submit_btn.click(
	fn=synthesize_speech,
	inputs=[text_input],
	outputs=audio_output
	)

	# Provide example text for users to try easily.
	gr.Examples(
	examples=[
	"The weather is wonderful today, perfect for a walk in the park.",
	"I am so excited to try out this new text-to-speech feature!",
	"Congratulations on your amazing achievement!",
	"This is a demonstration of high-quality speech synthesis."
	],
	inputs=[text_input],
	label="Example Prompts"
	)

	# --- Main execution block ---
	# To deploy, push this file and a requirements.txt to a Hugging Face Space
	# and set the GOOGLE_API_KEY in the repository secrets.
	if __name__ == "__main__":
	iface.launch()