Spaces:

Athspi
/

Gggggg

Sleeping

App Files Files Community

Athspi commited on Mar 18

Commit

1f5fde3

verified ·

1 Parent(s): a8b416b

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -39

app.py CHANGED Viewed

@@ -1,44 +1,34 @@
 import gradio as gr
 import torch
-import torchaudio
-# Initialize MMS-TTS pipeline
-def load_models():
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    bundle = torchaudio.pipelines.MMS_TTS.get_bundle("eng")
-    # Load components
-    text_processor = bundle.get_text_processor()
-    tacotron2 = bundle.get_tacotron2().to(device)
-    vocoder = bundle.get_vocoder().to(device)
-    return text_processor, tacotron2, vocoder, device
-text_processor, tacotron2, vocoder, device = load_models()
 def synthesize_speech(text):
     try:
         if not text.strip():
-            return None, "Please enter some text to synthesize"
-        with torch.inference_mode():
-            # Process text
-            processed, lengths = text_processor(text)
-            processed = processed.to(device)
-            lengths = lengths.to(device)
-            # Generate mel spectrogram
-            mel_spec, mel_lengths = tacotron2(processed, lengths)
-            # Generate waveform
-            waveform = vocoder(mel_spec)
-        # Convert to numpy array
-        waveform = waveform.cpu().squeeze().numpy()
-        return (bundle.sample_rate, waveform), None
     except Exception as e:
-        return None, f"Error: {str(e)}"
 # Create Gradio interface
 interface = gr.Interface(
@@ -48,18 +38,19 @@ interface = gr.Interface(
         placeholder="Enter text to synthesize...",
         lines=3
     ),
-    outputs=[
-        gr.Audio(label="Generated Speech"),
-        gr.Textbox(label="Error Message", visible=False)
-    ],
     title="MMS-TTS English Text-to-Speech",
-    description="Convert text to speech using Facebook's MMS-TTS model",
     examples=[
-        ["Hello! This is a working text-to-speech demonstration."],
         ["The quick brown fox jumps over the lazy dog."],
-        ["Natural language processing is truly fascinating!"]
     ]
 )
 if __name__ == "__main__":
-    interface.launch()

 import gradio as gr
 import torch
+from transformers import VitsModel, VitsTokenizer
+# Load the MMS-TTS model and tokenizer from Hugging Face
+MODEL_NAME = "facebook/mms-tts-eng"
+tokenizer = VitsTokenizer.from_pretrained(MODEL_NAME)
+model = VitsModel.from_pretrained(MODEL_NAME)
+# Set up device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
 def synthesize_speech(text):
     try:
         if not text.strip():
+            raise ValueError("Text input cannot be empty")
+        # Tokenize input text
+        inputs = tokenizer(text, return_tensors="pt").to(device)
+        # Generate speech
+        with torch.no_grad():
+            speech = model(**inputs).waveform.cpu().squeeze().numpy()
+        # Return sample rate and waveform
+        sample_rate = model.config.sampling_rate
+        return (sample_rate, speech)
     except Exception as e:
+        return f"Error: {str(e)}", None
 # Create Gradio interface
 interface = gr.Interface(
         placeholder="Enter text to synthesize...",
         lines=3
     ),
+    outputs=gr.Audio(
+        label="Generated Speech",
+        type="numpy"
+    ),
     title="MMS-TTS English Text-to-Speech",
+    description="Convert text to speech using Facebook's MMS-TTS-ENG model",
     examples=[
+        ["Hello! This is a text-to-speech demonstration."],
         ["The quick brown fox jumps over the lazy dog."],
+        ["Natural language processing is fascinating!"]
     ]
 )
+# Launch the application
 if __name__ == "__main__":
+    interface.launch(server_name="0.0.0.0" if torch.cuda.is_available() else None)