Spaces:

shukdevdattaEX
/

VocalForge-AI-Stable

Sleeping

App Files Files Community

shukdevdattaEX commited on Jul 12

Commit

29543b1

verified ·

1 Parent(s): 63564a9

Upload 2 files

Browse files

Files changed (2) hide show

app.py +56 -0
requirements.txt +11 -0

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import gradio as gr
+import torch
+import tempfile
+import soundfile as sf
+from tortoise.api import TextToSpeech
+from tortoise.utils.audio import load_audio
+# 1) Initialize the Tortoise TTS engine at startup
+tts = TextToSpeech()  # Downloads and caches models automatically
+# 2) Define a helper to generate speech from a reference clip + text
+def generate_speech(reference_audio_path, text):
+    """
+    reference_audio_path: filepath to a WAV sampled at 22 050 Hz
+    text: the string to synthesize
+    returns: path to a 24 kHz WAV file with your cloned voice
+    """
+    # ✅ FIXED: Provide sampling_rate as a required positional argument
+    ref_waveform = load_audio(reference_audio_path, 22050)
+    # Generate speech using 'fast' preset (alternatives: ultra_fast, standard, high_quality)
+    output_tensor = tts.tts_with_preset(
+        text,
+        voice_samples=[ref_waveform],
+        preset="fast"
+    )
+    # Save to temp WAV (float32, 24 kHz)
+    wav_np = output_tensor.squeeze().cpu().numpy()
+    tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    sf.write(tmp.name, wav_np, samplerate=24000)
+    return tmp.name
+# 3) Build the Gradio interface
+with gr.Blocks(title="Tortoise Voice Cloning TTS") as app:
+    gr.Markdown("## 🗣️ Voice Cloning with Tortoise TTS")
+    gr.Markdown(
+        "Upload a ~10 sec WAV clip (22 050 Hz), enter English text, "
+        "and hear it spoken back in **your** voice!"
+    )
+    with gr.Row():
+        voice_sample = gr.Audio(type="filepath", label="🎙️ Upload Reference Voice (22 050 Hz WAV)")
+        text_input   = gr.Textbox(label="💬 Text to Synthesize", placeholder="e.g., Hello, world!")
+    generate_btn = gr.Button("🔊 Generate Speech")
+    output_audio = gr.Audio(label="📢 Cloned Speech Output (24 kHz)", interactive=False)
+    generate_btn.click(
+        fn=generate_speech,
+        inputs=[voice_sample, text_input],
+        outputs=output_audio
+    )
+if __name__ == "__main__":
+    app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+torch
+torchaudio
+transformers
+datasets
+librosa
+soundfile
+numpy
+sentencepiece
+TTS
+tortoise-tts
+pycryptodome