Spaces:

shukdevdatta123
/

VocalForge-AI

Running

App Files Files Community

VocalForge-AI / app.py

shukdevdatta123

Update app.py

9414725 verified 10 days ago

raw

history blame contribute delete

1.99 kB

	import gradio as gr
	import torch
	import tempfile
	import soundfile as sf
	from tortoise.api import TextToSpeech
	from tortoise.utils.audio import load_audio

	# 1) Initialize the Tortoise TTS engine at startup
	tts = TextToSpeech() # Downloads and caches models automatically

	# 2) Define a helper to generate speech from a reference clip + text
	def generate_speech(reference_audio_path, text):
	"""
	reference_audio_path: filepath to a WAV sampled at 22 050 Hz
	text: the string to synthesize
	returns: path to a 24 kHz WAV file with your cloned voice
	"""
	# ✅ FIXED: Provide sampling_rate as a required positional argument
	ref_waveform = load_audio(reference_audio_path, 22050)

	# Generate speech using 'fast' preset (alternatives: ultra_fast, standard, high_quality)
	output_tensor = tts.tts_with_preset(
	text,
	voice_samples=[ref_waveform],
	preset="fast"
	)

	# Save to temp WAV (float32, 24 kHz)
	wav_np = output_tensor.squeeze().cpu().numpy()
	tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
	sf.write(tmp.name, wav_np, samplerate=24000)
	return tmp.name

	# 3) Build the Gradio interface
	with gr.Blocks(title="Tortoise Voice Cloning TTS") as app:
	gr.Markdown("## 🗣️ Voice Cloning with Tortoise TTS")
	gr.Markdown(
	"Upload a ~10 sec WAV clip (22 050 Hz), enter English text, "
	"and hear it spoken back in your voice!"
	)

	with gr.Row():
	voice_sample = gr.Audio(type="filepath", label="🎙️ Upload Reference Voice (22 050 Hz WAV)")
	text_input = gr.Textbox(label="💬 Text to Synthesize", placeholder="e.g., Hello, world!")

	generate_btn = gr.Button("🔊 Generate Speech")
	output_audio = gr.Audio(label="📢 Cloned Speech Output (24 kHz)", interactive=False)

	generate_btn.click(
	fn=generate_speech,
	inputs=[voice_sample, text_input],
	outputs=output_audio
	)

	if __name__ == "__main__":
	app.launch()