Spaces:

Athspi-aitools
/

Aittsg

Running

App Files Files Community

Aittsg / app.py

Athspi

Update app.py

c9eaebb verified 3 months ago

raw

history blame

2.04 kB

	import os
	import wave
	import gradio as gr
	import google.generativeai as genai

	# Set your API Key (or via Hugging Face Secrets / os.environ)
	GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")

	if not GOOGLE_API_KEY:
	raise ValueError("Please set your GOOGLE_API_KEY environment variable.")

	# Configure Generative AI
	genai.configure(api_key=GOOGLE_API_KEY)

	# Initialize Gemini TTS model
	model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")

	# Function to save raw PCM data to WAV file
	def save_wave(filename, pcm_data, channels=1, rate=24000, sample_width=2):
	with wave.open(filename, 'wb') as wf:
	wf.setnchannels(channels)
	wf.setsampwidth(sample_width)
	wf.setframerate(rate)
	wf.writeframes(pcm_data)

	# Function to handle TTS generation
	def generate_tts(text):
	if not text.strip():
	return None, "Please enter some text."

	try:
	response = model.generate_content(
	text,
	generation_config={"response_mime_type": "audio/wav"},
	response_modality="AUDIO"
	)

	# Extract audio data from response
	audio_data = response.candidates[0].content.parts[0].inline_data.data

	output_filename = "output.wav"
	save_wave(output_filename, audio_data)

	return output_filename, "Audio generated successfully!"

	except Exception as e:
	return None, f"Error: {str(e)}"

	# Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("## 🎙️ Gemini 2.5 Text-to-Speech Demo")

	with gr.Row():
	text_input = gr.Textbox(label="Enter text to convert to speech")

	with gr.Row():
	submit_button = gr.Button("Generate Speech")

	with gr.Row():
	audio_output = gr.Audio(label="Generated Audio", type="filepath")
	status_output = gr.Textbox(label="Status")

	submit_button.click(
	fn=generate_tts,
	inputs=[text_input],
	outputs=[audio_output, status_output]
	)

	# Launch Gradio app
	if __name__ == "__main__":
	demo.launch()