Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
from datasets import load_dataset | |
import soundfile as sf | |
import torch | |
# Initialize the TTS pipeline from Huggingface | |
synthesizer = pipeline("text-to-speech", model="Futuresony/output") | |
# Load the speaker embeddings dataset | |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") | |
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) | |
def text_to_speech(text): | |
# Convert the generated text to speech | |
speech = synthesizer(text, forward_params={"speaker_embeddings": speaker_embedding}) | |
# Save the generated speech to a file | |
output_file = "generated_speech.wav" | |
sf.write(output_file, speech["audio"], samplerate=speech["sampling_rate"]) | |
# Return the path to the audio file for playback | |
return output_file | |
# Create the Gradio interface | |
demo = gr.Interface( | |
fn=text_to_speech, | |
inputs=gr.Textbox(label="Enter Text", placeholder="Type something..."), | |
outputs=gr.Audio(label="Generated Speech"), | |
title="Text-to-Speech Generator", | |
description="Enter text and generate speech using a pre-trained TTS model." | |
) | |
if __name__ == "__main__": | |
demo.launch() |