Spaces:
Sleeping
Sleeping
File size: 1,219 Bytes
2e0ffd8 a2f2a2c efc1876 b838660 2e0ffd8 efc1876 d25d7d6 2071de1 efc1876 b838660 efc1876 b838660 2071de1 efc1876 b838660 2071de1 b838660 a2f2a2c b838660 2071de1 b838660 2e0ffd8 d49f9f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import gradio as gr
from transformers import pipeline
from datasets import load_dataset
import soundfile as sf
import torch
# Initialize the TTS pipeline from Huggingface
synthesizer = pipeline("text-to-speech", model="Futuresony/output")
# Load the speaker embeddings dataset
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
def text_to_speech(text):
# Convert the generated text to speech
speech = synthesizer(text, forward_params={"speaker_embeddings": speaker_embedding})
# Save the generated speech to a file
output_file = "generated_speech.wav"
sf.write(output_file, speech["audio"], samplerate=speech["sampling_rate"])
# Return the path to the audio file for playback
return output_file
# Create the Gradio interface
demo = gr.Interface(
fn=text_to_speech,
inputs=gr.Textbox(label="Enter Text", placeholder="Type something..."),
outputs=gr.Audio(label="Generated Speech"),
title="Text-to-Speech Generator",
description="Enter text and generate speech using a pre-trained TTS model."
)
if __name__ == "__main__":
demo.launch() |