File size: 1,219 Bytes
2e0ffd8
a2f2a2c
efc1876
b838660
 
2e0ffd8
efc1876
d25d7d6
2071de1
efc1876
 
 
 
b838660
efc1876
b838660
2071de1
efc1876
b838660
 
2071de1
b838660
 
a2f2a2c
b838660
2071de1
b838660
 
 
 
 
2e0ffd8
 
 
d49f9f1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import gradio as gr
from transformers import pipeline
from datasets import load_dataset
import soundfile as sf
import torch

# Initialize the TTS pipeline from Huggingface
synthesizer = pipeline("text-to-speech", model="Futuresony/output")

# Load the speaker embeddings dataset
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)

def text_to_speech(text):
    # Convert the generated text to speech
    speech = synthesizer(text, forward_params={"speaker_embeddings": speaker_embedding})
    
    # Save the generated speech to a file
    output_file = "generated_speech.wav"
    sf.write(output_file, speech["audio"], samplerate=speech["sampling_rate"])
    
    # Return the path to the audio file for playback
    return output_file

# Create the Gradio interface
demo = gr.Interface(
    fn=text_to_speech,
    inputs=gr.Textbox(label="Enter Text", placeholder="Type something..."),
    outputs=gr.Audio(label="Generated Speech"),
    title="Text-to-Speech Generator",
    description="Enter text and generate speech using a pre-trained TTS model."
)

if __name__ == "__main__":
    demo.launch()