Spaces:

Athspi
/

Gggggg

Sleeping

Gggggg / app.py

Update app.py

4ded59c verified 3 months ago

1.7 kB

	import gradio as gr
	import torch
	from transformers import VitsModel, VitsTokenizer

	# Load the MMS-TTS model and tokenizer from Hugging Face
	MODEL_NAME = "facebook/mms-tts-tam"
	tokenizer = VitsTokenizer.from_pretrained(MODEL_NAME)
	model = VitsModel.from_pretrained(MODEL_NAME)

	# Set up device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	def synthesize_speech(text):
	try:
	if not text.strip():
	raise ValueError("Text input cannot be empty")

	# Tokenize input text
	inputs = tokenizer(text, return_tensors="pt").to(device)

	# Generate speech
	with torch.no_grad():
	speech = model(**inputs).waveform.cpu().squeeze().numpy()

	# Return sample rate and waveform
	sample_rate = model.config.sampling_rate
	return (sample_rate, speech)

	except Exception as e:
	return f"Error: {str(e)}", None

	# Create Gradio interface
	interface = gr.Interface(
	fn=synthesize_speech,
	inputs=gr.Textbox(
	label="Input Text",
	placeholder="Enter text to synthesize...",
	lines=3
	),
	outputs=gr.Audio(
	label="Generated Speech",
	type="numpy"
	),
	title="MMS-TTS English Text-to-Speech",
	description="Convert text to speech using Facebook's MMS-TTS-ENG model",
	examples=[
	["Hello! This is a text-to-speech demonstration."],
	["The quick brown fox jumps over the lazy dog."],
	["Natural language processing is fascinating!"]
	]
	)

	# Launch the application
	if __name__ == "__main__":
	interface.launch(server_name="0.0.0.0" if torch.cuda.is_available() else None)