Spaces:

Athspi
/

Gggggg

Sleeping

App Files Files Community

Athspi commited on Mar 18

Commit

a8b416b

verified ·

1 Parent(s): 8da735f

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -28

app.py CHANGED Viewed

@@ -2,40 +2,43 @@ import gradio as gr
 import torch
 import torchaudio
-# Load MMS-TTS components
-bundle = torchaudio.pipelines.MMS_TTS_ENG
-text_processor = bundle.get_text_processor()
-tacotron2 = bundle.get_tacotron2()
-waveglow = bundle.get_waveglow()
-# Set up device
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-tacotron2 = tacotron2.to(device)
-waveglow = waveglow.to(device)
 def synthesize_speech(text):
     try:
         if not text.strip():
-            raise ValueError("Text input cannot be empty")
         with torch.inference_mode():
-            # Process text input
             processed, lengths = text_processor(text)
             processed = processed.to(device)
             lengths = lengths.to(device)
-            # Generate spectrogram
-            spec, spec_lengths = tacotron2(processed, lengths)
             # Generate waveform
-            waveform, lengths = waveglow(spec, spec_lengths)
-        # Convert to numpy array for Gradio
         waveform = waveform.cpu().squeeze().numpy()
-        return (bundle.sample_rate, waveform)
     except Exception as e:
-        return f"Error: {str(e)}", None
 # Create Gradio interface
 interface = gr.Interface(
@@ -45,19 +48,18 @@ interface = gr.Interface(
         placeholder="Enter text to synthesize...",
         lines=3
     ),
-    outputs=gr.Audio(
-        label="Generated Speech",
-        type="numpy"
-    ),
     title="MMS-TTS English Text-to-Speech",
-    description="Convert text to speech using Facebook's MMS-TTS-ENG model",
     examples=[
-        ["Hello! This is a text-to-speech demonstration."],
         ["The quick brown fox jumps over the lazy dog."],
-        ["Natural language processing is fascinating!"]
     ]
 )
-# Launch the application
 if __name__ == "__main__":
-    interface.launch(server_name="0.0.0.0" if torch.cuda.is_available() else None)

 import torch
 import torchaudio
+# Initialize MMS-TTS pipeline
+def load_models():
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    bundle = torchaudio.pipelines.MMS_TTS.get_bundle("eng")
+    # Load components
+    text_processor = bundle.get_text_processor()
+    tacotron2 = bundle.get_tacotron2().to(device)
+    vocoder = bundle.get_vocoder().to(device)
+    return text_processor, tacotron2, vocoder, device
+text_processor, tacotron2, vocoder, device = load_models()
 def synthesize_speech(text):
     try:
         if not text.strip():
+            return None, "Please enter some text to synthesize"
         with torch.inference_mode():
+            # Process text
             processed, lengths = text_processor(text)
             processed = processed.to(device)
             lengths = lengths.to(device)
+            # Generate mel spectrogram
+            mel_spec, mel_lengths = tacotron2(processed, lengths)
             # Generate waveform
+            waveform = vocoder(mel_spec)
+        # Convert to numpy array
         waveform = waveform.cpu().squeeze().numpy()
+        return (bundle.sample_rate, waveform), None
     except Exception as e:
+        return None, f"Error: {str(e)}"
 # Create Gradio interface
 interface = gr.Interface(
         placeholder="Enter text to synthesize...",
         lines=3
     ),
+    outputs=[
+        gr.Audio(label="Generated Speech"),
+        gr.Textbox(label="Error Message", visible=False)
+    ],
     title="MMS-TTS English Text-to-Speech",
+    description="Convert text to speech using Facebook's MMS-TTS model",
     examples=[
+        ["Hello! This is a working text-to-speech demonstration."],
         ["The quick brown fox jumps over the lazy dog."],
+        ["Natural language processing is truly fascinating!"]
     ]
 )
 if __name__ == "__main__":
+    interface.launch()