Spaces:

leenag
/

Multilingual_TTS

Running

App Files Files Community

leenag commited on May 7

Commit

c953361

verified ·

1 Parent(s): ae05afc

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -14

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 from transformers import VitsModel, AutoTokenizer
 import soundfile as sf
 import tempfile
 LANG_MODEL_MAP = {
     "English": "facebook/mms-tts-eng",
@@ -12,7 +13,7 @@ LANG_MODEL_MAP = {
     "Kannada": "facebook/mms-tts-kan"
 }
-device = "cuda" if torch.cuda.is_available() else "cpu"
 cache = {}
 def load_model_and_tokenizer(language):
@@ -24,26 +25,36 @@ def load_model_and_tokenizer(language):
     return cache[model_name]
 def tts(language, text):
-    tokenizer, model = load_model_and_tokenizer(language)
-    inputs = tokenizer(text, return_tensors="pt").to(device)
-    with torch.no_grad():
-        output = model(**inputs)
-    # Save waveform to temp file
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
-        sf.write(f.name, output.waveform.cpu().numpy(), samplerate=16000)
-        return f.name
 iface = gr.Interface(
     fn=tts,
     inputs=[
-        gr.Dropdown(choices=list(LANG_MODEL_MAP.keys()), label="Select Language"),
-        gr.Textbox(label="Enter Text", placeholder="Type something...")
     ],
-    outputs=gr.Audio(type="filepath", label="Synthesized Audio"),
-    title="Multilingual Text-to-Speech (MMS)",
-    description="Generate speech in English, Hindi, Tamil, Malayalam, or Kannada using Meta's MMS TTS models."
 )
 if __name__ == "__main__":

 from transformers import VitsModel, AutoTokenizer
 import soundfile as sf
 import tempfile
+import os
 LANG_MODEL_MAP = {
     "English": "facebook/mms-tts-eng",
     "Kannada": "facebook/mms-tts-kan"
 }
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 cache = {}
 def load_model_and_tokenizer(language):
     return cache[model_name]
 def tts(language, text):
+    try:
+        if not text.strip():
+            return "Please enter some text.", None
+        tokenizer, model = load_model_and_tokenizer(language)
+        inputs = tokenizer(text, return_tensors="pt").to(device)
+        with torch.no_grad():
+            output = model(**inputs)
+        # Save to temporary WAV file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+            sf.write(f.name, output.waveform.cpu().numpy(), samplerate=16000)
+            return "Here is your audio output", f.name
+    except Exception as e:
+        return f"Error: {str(e)}", None
 iface = gr.Interface(
     fn=tts,
     inputs=[
+        gr.Dropdown(label="Select Language", choices=list(LANG_MODEL_MAP.keys()), value="English"),
+        gr.Textbox(label="Enter Text")
+    ],
+    outputs=[
+        gr.Textbox(label="Status"),
+        gr.Audio(label="Synthesized Speech", type="filepath")
     ],
+    title="Multilingual TTS with Meta MMS",
+    description="Generate speech from text using Meta's MMS models for English, Hindi, Tamil, Malayalam, and Kannada."
 )
 if __name__ == "__main__":