Spaces:

Athspi
/

Whshhs

Runtime error

App Files Files Community

Whshhs / app.py

Athspi

Update app.py

b8a34b4 verified 2 months ago

raw

history blame

1.91 kB

	import os
	import asyncio
	from google import genai
	from google.genai import types
	import gradio as gr

	# Set your Gemini API key (configure via Hugging Face Secrets)
	API_KEY = os.getenv("GEMINI_API_KEY")

	client = genai.Client(api_key=API_KEY)

	async def generate_audio(text):
	config = types.LiveConnectConfig(
	response_modalities=["audio"],
	speech_config=types.SpeechConfig(
	voice_config=types.VoiceConfig(
	prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Puck")
	)
	),
	system_instruction=types.Content(
	parts=[types.Part.from_text("Repeat user input exactly without explanation")],
	role="user"
	),
	)

	audio_data = b""
	async with client.aio.live.connect(model="models/gemini-2.0-flash-exp", config=config) as session:
	await session.send(input=text, end_of_turn=True)
	async for response in session.receive():
	if data := response.data:
	audio_data += data

	# Save as WAV file (16-bit PCM, 24kHz)
	with open("output.wav", "wb") as f:
	f.write(b"RIFF\x00\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\x00\x7d\x00\x00\x02\x00\x10\x00data\x00\x00\x00\x00")
	f.write(audio_data)

	return "output.wav"

	def tts(text):
	if not text.strip():
	return None
	try:
	asyncio.run(generate_audio(text))
	return "output.wav"
	except Exception as e:
	return f"Error: {str(e)}"

	# Gradio Interface
	iface = gr.Interface(
	fn=tts,
	inputs=gr.Textbox(label="Enter Text", placeholder="Type here..."),
	outputs=gr.Audio(label="TTS Output", type="filepath"),
	examples=["Hello, this is a test.", "How are you today?"],
	title="Gemini TTS Demo",
	description="Convert text to speech using Google's Gemini 2.0 Flash model"
	)

	if __name__ == "__main__":
	iface.launch()