Spaces:

Futuresony
/

Me

Sleeping

File size: 1,219 Bytes

2e0ffd8
a2f2a2c
efc1876
b838660
 
2e0ffd8
efc1876
d25d7d6
2071de1
efc1876
 
 
 
b838660
efc1876
b838660
2071de1
efc1876
b838660
 
2071de1
b838660
 
a2f2a2c
b838660
2071de1
b838660
 
 
 
 
2e0ffd8
 
 
d49f9f1

import gradio as gr
from transformers import pipeline
from datasets import load_dataset
import soundfile as sf
import torch

# Initialize the TTS pipeline from Huggingface
synthesizer = pipeline("text-to-speech", model="Futuresony/output")

# Load the speaker embeddings dataset
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)

def text_to_speech(text):
    # Convert the generated text to speech
    speech = synthesizer(text, forward_params={"speaker_embeddings": speaker_embedding})
    
    # Save the generated speech to a file
    output_file = "generated_speech.wav"
    sf.write(output_file, speech["audio"], samplerate=speech["sampling_rate"])
    
    # Return the path to the audio file for playback
    return output_file

# Create the Gradio interface
demo = gr.Interface(
    fn=text_to_speech,
    inputs=gr.Textbox(label="Enter Text", placeholder="Type something..."),
    outputs=gr.Audio(label="Generated Speech"),
    title="Text-to-Speech Generator",
    description="Enter text and generate speech using a pre-trained TTS model."
)

if __name__ == "__main__":
    demo.launch()