File size: 632 Bytes
fa52b2c 9dd385e 96b4603 9dd385e fa52b2c 9dd385e fa52b2c 9dd385e fa52b2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
import gradio as gr
from transformers import VitsModel, AutoTokenizer
import torch
import scipy.io.wavfile as wavfile
model = VitsModel.from_pretrained("facebook/mms-tts-eng")
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
text = "some example text in the English language"
def greet(text):
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
output = model(**inputs).waveform
out = output[0]
wavfile.write("tmp.wav", rate=16000, data=out)
return open("tmp.wav", "rb").read()
iface = gr.Interface(fn=greet, inputs="text", outputs="audio")
iface.launch() |