|
import gradio as gr |
|
from transformers import AutoProcessor, BarkModel |
|
import scipy |
|
|
|
processor = AutoProcessor.from_pretrained("suno/bark-small") |
|
model = BarkModel.from_pretrained("suno/bark-small") |
|
model = model.to_bettertransformer() |
|
|
|
def greet(text): |
|
inputs = processor( |
|
text=[text], |
|
return_tensors="pt", |
|
) |
|
speech_values = model.generate(**inputs, do_sample=True) |
|
scipy.io.wavfile.write("tmp.wav", rate=24000, data=speech_values.cpu().numpy().squeeze()) |
|
return open("tmp.wav", "rb").read() |
|
|
|
iface = gr.Interface(fn=greet, inputs="text", outputs="audio") |
|
iface.launch() |