File size: 1,030 Bytes
48ed3fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import gradio as gr
from transformers import VitsModel, AutoTokenizer
import torch
import scipy.io.wavfile
import tempfile

# Load the Somali TTS model
model = VitsModel.from_pretrained("facebook/mms-tts-som")
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-som")

def somali_text_to_speech(text):
    inputs = tokenizer(text, return_tensors="pt")

    with torch.no_grad():
        output = model(**inputs)
    
    waveform = output.waveform.squeeze().cpu().numpy()

    # Save waveform to a temporary WAV file
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
        scipy.io.wavfile.write(tmp.name, rate=model.config.sampling_rate, data=waveform)
        return tmp.name

# Launch Gradio Interface
gr.Interface(
    fn=somali_text_to_speech,
    inputs=gr.Textbox(label="Enter Somali Text"),
    outputs=gr.Audio(label="Generated Somali Speech"),
    title="Somali Text-to-Speech",
    description="Type Somali text and hear it spoken using Hugging Face's VitsModel."
).launch(share=True)