Spaces:

leenag
/

Multilingual_TTS

Running

App Files Files Community

Multilingual_TTS / app.py

leenag

Create app.py

66d0bf1 verified 5 months ago

raw

history blame

2.18 kB

	import gradio as gr
	import torch
	from parler_tts import ParlerTTSForConditionalGeneration
	from transformers import AutoTokenizer
	import soundfile as sf
	import uuid

	device = "cuda" if torch.cuda.is_available() else "cpu"
	model_name = "ai4bharat/indic-parler-tts"
	model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	desc_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)

	def synthesize(language, text, gender, emotion, speed, pitch, quality):
	desc = (
	f"A native {language} {gender.lower()} speaker with a {emotion.lower()} and expressive tone, "
	f"speaking at a {speed.lower()} rate with {pitch.lower()} pitch and {quality.lower()} voice quality."
	)
	desc_inputs = desc_tokenizer(desc, return_tensors="pt").to(device)
	text_inputs = tokenizer(text, return_tensors="pt").to(device)

	gen_audio = model.generate(
	input_ids=desc_inputs.input_ids,
	attention_mask=desc_inputs.attention_mask,
	prompt_input_ids=text_inputs.input_ids,
	prompt_attention_mask=torch.ones_like(text_inputs.input_ids).to(device)
	)

	audio_np = gen_audio.cpu().numpy().squeeze()
	filename = f"{uuid.uuid4()}.wav"
	sf.write(filename, audio_np, model.config.sampling_rate)
	return filename

	iface = gr.Interface(
	fn=synthesize,
	inputs=[
	gr.Dropdown(["Malayalam", "English", "Hindi", "Tamil"], label="Language"),
	gr.Textbox(label="Text to Synthesize", lines=4),
	gr.Radio(["Male", "Female"], label="Speaker Gender"),
	gr.Dropdown(["Neutral", "Happy", "Sad", "Angry"], label="Emotion"),
	gr.Dropdown(["Slow", "Moderate", "Fast"], label="Speaking Rate"),
	gr.Dropdown(["Low", "Normal", "High"], label="Pitch"),
	gr.Dropdown(["Basic", "Refined"], label="Voice Quality"),
	],
	outputs=gr.Audio(type="filepath", label="Synthesized Audio"),
	allow_flagging="never",
	title="Multilingual TTS using Indic Parler-TTS",
	description="Type text, choose a speaker style, and get synthesized speech for Malayalam, Hindi, Tamil, or English."
	)

	iface.launch()