import gradio as gr | |
import torch | |
import scipy.io.wavfile as wavfile | |
from transformers import AutoProcessor, SeamlessM4TModel | |
tokenizer = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium") | |
model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium") | |
text = "some example text in the English language" | |
def greet(text): | |
inputs = tokenizer(text, return_tensors="pt") | |
with torch.no_grad(): | |
output = model(**inputs).waveform | |
out = output[0] | |
wavfile.write("tmp.wav", rate=16000, data=out) | |
return open("tmp.wav", "rb").read() | |
iface = gr.Interface(fn=greet, inputs="text", outputs="audio") | |
iface.launch() |