Spaces:

shukdevdatta123
/

VocalForge-AI

Running

shukdevdatta123 commited on Jul 2

Commit

8d69e71

verified ·

1 Parent(s): 9c4336f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ from tortoise.api import TextToSpeech
 from tortoise.utils.audio import load_audio
 # 1) Initialize the Tortoise TTS engine at startup
-tts = TextToSpeech()  # downloads and caches models automatically
 # 2) Define a helper to generate speech from a reference clip + text
 def generate_speech(reference_audio_path, text):
@@ -15,22 +15,20 @@ def generate_speech(reference_audio_path, text):
     text: the string to synthesize
     returns: path to a 24 kHz WAV file with your cloned voice
     """
-    # Load and resample the reference clip to 22 050 Hz as a torch tensor
-    # (load_audio handles mono conversion)
-    ref_waveform = load_audio(reference_audio_path, sr=22050)
-    # Synthesize: one clip, use the 'fast' preset for decent speed/quality tradeoff
-    # returns a Tensor of shape (1, S) at 24 kHz :contentReference[oaicite:1]{index=1}
     output_tensor = tts.tts_with_preset(
         text,
         voice_samples=[ref_waveform],
         preset="fast"
     )
-    # Convert to NumPy and save to a temporary WAV (float32, 24 kHz)
     wav_np = output_tensor.squeeze().cpu().numpy()
     tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-    sf.write(tmp.name, wav_np, samplerate=24000)  # sample rate is 24 kHz :contentReference[oaicite:2]{index=2}
     return tmp.name
 # 3) Build the Gradio interface
@@ -55,4 +53,4 @@ with gr.Blocks(title="Tortoise Voice Cloning TTS") as app:
     )
 if __name__ == "__main__":
-    app.launch()

 from tortoise.utils.audio import load_audio
 # 1) Initialize the Tortoise TTS engine at startup
+tts = TextToSpeech()  # Downloads and caches models automatically
 # 2) Define a helper to generate speech from a reference clip + text
 def generate_speech(reference_audio_path, text):
     text: the string to synthesize
     returns: path to a 24 kHz WAV file with your cloned voice
     """
+    # Load the reference clip (Tortoise auto-resamples to 22 050 Hz)
+    ref_waveform = load_audio(reference_audio_path)
+    # Generate speech using 'fast' preset
     output_tensor = tts.tts_with_preset(
         text,
         voice_samples=[ref_waveform],
         preset="fast"
     )
+    # Save to temp WAV (float32, 24 kHz)
     wav_np = output_tensor.squeeze().cpu().numpy()
     tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    sf.write(tmp.name, wav_np, samplerate=24000)
     return tmp.name
 # 3) Build the Gradio interface
     )
 if __name__ == "__main__":
+    app.launch()