Spaces:

Futuresony
/

Me

Sleeping

Me / app.py

Update app.py

d49f9f1 verified 7 months ago

1.22 kB

	import gradio as gr
	from transformers import pipeline
	from datasets import load_dataset
	import soundfile as sf
	import torch

	# Initialize the TTS pipeline from Huggingface
	synthesizer = pipeline("text-to-speech", model="Futuresony/output")

	# Load the speaker embeddings dataset
	embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
	speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)

	def text_to_speech(text):
	# Convert the generated text to speech
	speech = synthesizer(text, forward_params={"speaker_embeddings": speaker_embedding})

	# Save the generated speech to a file
	output_file = "generated_speech.wav"
	sf.write(output_file, speech["audio"], samplerate=speech["sampling_rate"])

	# Return the path to the audio file for playback
	return output_file

	# Create the Gradio interface
	demo = gr.Interface(
	fn=text_to_speech,
	inputs=gr.Textbox(label="Enter Text", placeholder="Type something..."),
	outputs=gr.Audio(label="Generated Speech"),
	title="Text-to-Speech Generator",
	description="Enter text and generate speech using a pre-trained TTS model."
	)

	if __name__ == "__main__":
	demo.launch()