leenag commited on
Commit
c953361
·
verified ·
1 Parent(s): ae05afc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -14
app.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  from transformers import VitsModel, AutoTokenizer
4
  import soundfile as sf
5
  import tempfile
 
6
 
7
  LANG_MODEL_MAP = {
8
  "English": "facebook/mms-tts-eng",
@@ -12,7 +13,7 @@ LANG_MODEL_MAP = {
12
  "Kannada": "facebook/mms-tts-kan"
13
  }
14
 
15
- device = "cuda" if torch.cuda.is_available() else "cpu"
16
  cache = {}
17
 
18
  def load_model_and_tokenizer(language):
@@ -24,26 +25,36 @@ def load_model_and_tokenizer(language):
24
  return cache[model_name]
25
 
26
  def tts(language, text):
27
- tokenizer, model = load_model_and_tokenizer(language)
28
- inputs = tokenizer(text, return_tensors="pt").to(device)
 
29
 
30
- with torch.no_grad():
31
- output = model(**inputs)
32
 
33
- # Save waveform to temp file
34
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
35
- sf.write(f.name, output.waveform.cpu().numpy(), samplerate=16000)
36
- return f.name
 
 
 
 
 
 
37
 
38
  iface = gr.Interface(
39
  fn=tts,
40
  inputs=[
41
- gr.Dropdown(choices=list(LANG_MODEL_MAP.keys()), label="Select Language"),
42
- gr.Textbox(label="Enter Text", placeholder="Type something...")
 
 
 
 
43
  ],
44
- outputs=gr.Audio(type="filepath", label="Synthesized Audio"),
45
- title="Multilingual Text-to-Speech (MMS)",
46
- description="Generate speech in English, Hindi, Tamil, Malayalam, or Kannada using Meta's MMS TTS models."
47
  )
48
 
49
  if __name__ == "__main__":
 
3
  from transformers import VitsModel, AutoTokenizer
4
  import soundfile as sf
5
  import tempfile
6
+ import os
7
 
8
  LANG_MODEL_MAP = {
9
  "English": "facebook/mms-tts-eng",
 
13
  "Kannada": "facebook/mms-tts-kan"
14
  }
15
 
16
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
  cache = {}
18
 
19
  def load_model_and_tokenizer(language):
 
25
  return cache[model_name]
26
 
27
  def tts(language, text):
28
+ try:
29
+ if not text.strip():
30
+ return "Please enter some text.", None
31
 
32
+ tokenizer, model = load_model_and_tokenizer(language)
33
+ inputs = tokenizer(text, return_tensors="pt").to(device)
34
 
35
+ with torch.no_grad():
36
+ output = model(**inputs)
37
+
38
+ # Save to temporary WAV file
39
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
40
+ sf.write(f.name, output.waveform.cpu().numpy(), samplerate=16000)
41
+ return "Here is your audio output", f.name
42
+
43
+ except Exception as e:
44
+ return f"Error: {str(e)}", None
45
 
46
  iface = gr.Interface(
47
  fn=tts,
48
  inputs=[
49
+ gr.Dropdown(label="Select Language", choices=list(LANG_MODEL_MAP.keys()), value="English"),
50
+ gr.Textbox(label="Enter Text")
51
+ ],
52
+ outputs=[
53
+ gr.Textbox(label="Status"),
54
+ gr.Audio(label="Synthesized Speech", type="filepath")
55
  ],
56
+ title="Multilingual TTS with Meta MMS",
57
+ description="Generate speech from text using Meta's MMS models for English, Hindi, Tamil, Malayalam, and Kannada."
 
58
  )
59
 
60
  if __name__ == "__main__":