Spaces:

sulaimank
/

luganda-TTS

Sleeping

App Files Files Community

luganda-TTS / app.py

sulaimank

Update app.py

5dab9a1 verified 23 days ago

raw

history blame

9.39 kB

	import os
	import tempfile
	import gradio as gr
	from huggingface_hub import hf_hub_download
	from TTS.utils.synthesizer import Synthesizer
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Max input text length
	MAX_TXT_LEN = 400

	# Map simple names to checkpoint files
	MODEL_INFO = {
	"Model 1": "checkpoint_2080000.pth",
	"Model 2": "checkpoint_2085000.pth",
	"Model 3": "checkpoint_2090000.pth",
	"Model 4": "checkpoint_2095000.pth",
	"Model 5": "checkpoint_2100000.pth",
	}

	# Cache for loaded synthesizers to avoid reloading
	synthesizer_cache = {}

	def download_config():
	"""Download and cache the config file."""
	try:
	config_path = hf_hub_download("sulaimank/luganda_LMs", filename="config.json")
	logger.info(f"Config downloaded to: {config_path}")
	return config_path
	except Exception as e:
	logger.error(f"Failed to download config: {e}")
	raise

	# Download config once at startup
	config_path = download_config()

	def load_synth(model_choice: str):
	"""Load synthesizer with caching to improve performance."""
	if model_choice in synthesizer_cache:
	logger.info(f"Using cached synthesizer for {model_choice}")
	return synthesizer_cache[model_choice]

	try:
	model_file = MODEL_INFO[model_choice]
	model_path = hf_hub_download("sulaimank/luganda_LMs", filename=model_file)
	synthesizer = Synthesizer(tts_checkpoint=model_path, tts_config_path=config_path)

	# Cache the synthesizer
	synthesizer_cache[model_choice] = synthesizer
	logger.info(f"Loaded and cached synthesizer for {model_choice}")
	return synthesizer
	except Exception as e:
	logger.error(f"Failed to load synthesizer for {model_choice}: {e}")
	raise

	def tts(text: str, model_choice: str):
	"""Generate TTS audio from text."""
	if not text.strip():
	return None, "⚠️ Please enter some text to synthesize."

	# Truncate if too long
	original_length = len(text)
	if len(text) > MAX_TXT_LEN:
	text = text[:MAX_TXT_LEN]
	warning_msg = f"⚠️ Input truncated from {original_length} to {MAX_TXT_LEN} characters."
	else:
	warning_msg = f"✅ Processing {len(text)} characters."

	try:
	logger.info(f"Generating TTS for: '{text[:50]}...' using {model_choice}")
	synthesizer = load_synth(model_choice)
	wav = synthesizer.tts(text)

	# Save to temporary file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
	synthesizer.save_wav(wav, fp.name)
	logger.info(f"Audio saved to: {fp.name}")
	return fp.name, warning_msg

	except Exception as e:
	error_msg = f"❌ Error generating speech: {str(e)}"
	logger.error(error_msg)
	return None, error_msg

	# Enhanced examples with more variety
	examples = [
	["Nalubaale y'ennyanja esinga obunene mu Uganda.", "Model 1"],
	["Abantu bangi tebamnyi kuwandika bulungi Luganda.", "Model 3"],
	["Kampala kye kibuga kya Uganda ekikulu.", "Model 5"],
	["Webale nnyingi olw'obuyambi bwo.", "Model 2"],
	["Enkya tugenda okusoma ebitabo ebipya.", "Model 4"],
	]

	# Custom CSS for better styling and centering
	custom_css = """
	/* Main container centering */
	.gradio-container {
	max-width: 1200px !important;
	margin: 0 auto !important;
	padding: 20px !important;
	}

	/* Center all content */
	.main-content {
	max-width: 1000px;
	margin: 0 auto;
	padding: 0 20px;
	}

	/* Status message styling */
	.status-message {
	padding: 12px;
	border-radius: 8px;
	margin: 10px 0;
	text-align: center;
	font-weight: 500;
	}

	/* Center radio buttons */
	.radio-group {
	display: flex;
	flex-direction: column;
	align-items: center;
	}

	/* Better button styling */
	.generate-btn {
	margin: 20px auto;
	display: block;
	min-width: 200px;
	}

	/* Examples section */
	.examples-section {
	margin: 30px 0;
	padding: 20px;
	background-color: #fafbfc;
	border-radius: 12px;
	border: 1px solid #e1e5e9;
	}

	/* Center examples title */
	.examples-title {
	text-align: center;
	font-size: 1.1em;
	font-weight: 600;
	margin-bottom: 15px;
	color: #374151;
	}

	/* Footer styling */
	.footer {
	margin-top: 40px;
	padding: 20px;
	text-align: center;
	border-top: 1px solid #e1e5e9;
	}

	/* Input components centering */
	.input-section {
	padding: 20px 0;
	}

	/* Audio output centering */
	.audio-section {
	display: flex;
	flex-direction: column;
	align-items: center;
	padding: 20px;
	}
	"""

	with gr.Blocks(
	theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue"),
	css=custom_css,
	title="Luganda TTS"
	) as demo:

	with gr.Column(elem_classes=["main-content"]):
	# Header
	gr.Markdown(
	"""
	<div style="text-align: center; padding: 2em 0; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); margin: -20px -20px 30px -20px; border-radius: 0 0 20px 20px; color: white;">
	<h1 style="margin: 0; font-size: 2.5em; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">🗣️ Luganda TTS 🇺🇬</h1>
	<p style="font-size: 1.3em; margin: 15px 0 0 0; opacity: 0.95;">
	Convert text into natural Luganda speech using fine-tuned neural models<br>
	<span style="font-size: 0.9em; opacity: 0.8;">Choose from 5 different model checkpoints trained on Luganda data</span>
	</p>
	</div>
	"""
	)

	with gr.Row(equal_height=False):
	with gr.Column(scale=3, elem_classes=["input-section"]):
	text_input = gr.Textbox(
	label=f"📝 Enter Luganda Text (max {MAX_TXT_LEN} characters)",
	placeholder="Wandika wano ekigambo mu Luganda...",
	value="Gyebale ko ssebo.",
	lines=4,
	max_lines=6,
	)

	gr.Markdown(
	"<h3 style='text-align: center; margin: 20px 0 10px 0; color: #4c1d95;'>🎛️ Model Selection</h3>"
	)
	model_choice = gr.Radio(
	label="Choose TTS Model",
	choices=list(MODEL_INFO.keys()),
	value="Model 3",
	interactive=True,
	elem_classes=["radio-group"]
	)

	run_btn = gr.Button(
	"🔊 Generate Speech",
	variant="primary",
	size="lg",
	elem_classes=["generate-btn"]
	)

	with gr.Column(scale=2, elem_classes=["audio-section"]):
	gr.Markdown(
	"<h3 style='text-align: center; margin: 0 0 15px 0; color: #4c1d95;'>🎵 Generated Audio</h3>"
	)
	audio_output = gr.Audio(
	label="Generated Speech",
	type="filepath",
	show_download_button=True
	)

	status_output = gr.Textbox(
	label="Status",
	interactive=False,
	show_label=False,
	container=False,
	elem_classes=["status-message"]
	)

	# Examples section
	with gr.Column(elem_classes=["examples-section"]):
	gr.Markdown("<div class='examples-title'>💡 Try these Luganda examples:</div>")
	gr.Examples(
	examples=examples,
	inputs=[text_input, model_choice],
	outputs=[audio_output, status_output],
	fn=tts,
	cache_examples=False,
	label=""
	)

	# Connect the generate button
	run_btn.click(
	fn=tts,
	inputs=[text_input, model_choice],
	outputs=[audio_output, status_output]
	)

	# Footer
	gr.Markdown(
	"""
	<div class="footer">
	<div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 25px; border-radius: 15px; color: white; text-shadow: 1px 1px 2px rgba(0,0,0,0.3);">
	<h3 style="margin: 0 0 10px 0; font-size: 1.3em;">🚀 Technical Details</h3>
	<p style="margin: 5px 0; font-size: 1.1em;">
	<strong>Powered by:</strong> Coqui TTS Framework<br>
	<strong>Models:</strong> Fine-tuned on Luganda speech data<br>
	<strong>Hosting:</strong> Hugging Face Spaces
	</p>
	</div>
	<div style="margin-top: 20px; padding: 15px; background-color: #f8f9ff; border-radius: 10px; border: 1px solid #e1e5e9;">
	<p style="margin: 0; font-size: 0.95em; color: #6b7280;">
	💡 <strong>Tips for best results:</strong> Use proper Luganda spelling, punctuation, and avoid mixing languages
	</p>
	</div>
	</div>
	"""
	)

	if __name__ == "__main__":
	demo.launch(
	share=False,
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True
	)