sulaimank commited on
Commit
f6ff058
·
verified ·
1 Parent(s): 4f2ec6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -227
app.py CHANGED
@@ -3,267 +3,114 @@ import tempfile
3
  import gradio as gr
4
  from huggingface_hub import hf_hub_download
5
  from TTS.utils.synthesizer import Synthesizer
6
- import logging
7
-
8
- # Configure logging
9
- logging.basicConfig(level=logging.INFO)
10
- logger = logging.getLogger(__name__)
11
 
12
  # Max input text length
13
  MAX_TXT_LEN = 400
14
 
15
- # Map simple names to checkpoint files
16
  MODEL_INFO = {
17
  "Model 1": "checkpoint_2080000.pth",
18
- "Model 2": "checkpoint_2085000.pth",
19
  "Model 3": "checkpoint_2090000.pth",
20
  "Model 4": "checkpoint_2095000.pth",
21
  "Model 5": "checkpoint_2100000.pth",
22
  }
23
 
24
- # Cache for loaded synthesizers to avoid reloading
25
- synthesizer_cache = {}
26
-
27
- def download_config():
28
- """Download and cache the config file."""
29
- try:
30
- config_path = hf_hub_download("sulaimank/luganda_LMs", filename="config.json")
31
- logger.info(f"Config downloaded to: {config_path}")
32
- return config_path
33
- except Exception as e:
34
- logger.error(f"Failed to download config: {e}")
35
- raise
36
 
37
- # Download config once at startup
38
- config_path = download_config()
39
-
40
- def load_synth(model_choice: str):
41
- """Load synthesizer with caching to improve performance."""
42
- if model_choice in synthesizer_cache:
43
- logger.info(f"Using cached synthesizer for {model_choice}")
44
- return synthesizer_cache[model_choice]
45
-
46
- try:
47
- model_file = MODEL_INFO[model_choice]
48
- model_path = hf_hub_download("sulaimank/luganda_LMs", filename=model_file)
49
- synthesizer = Synthesizer(tts_checkpoint=model_path, tts_config_path=config_path)
50
-
51
- # Cache the synthesizer
52
- synthesizer_cache[model_choice] = synthesizer
53
- logger.info(f"Loaded and cached synthesizer for {model_choice}")
54
- return synthesizer
55
- except Exception as e:
56
- logger.error(f"Failed to load synthesizer for {model_choice}: {e}")
57
- raise
58
 
59
  def tts(text: str, model_choice: str):
60
- """Generate TTS audio from text."""
61
- if not text.strip():
62
- return None, "⚠️ Please enter some text to synthesize."
63
-
64
- # Truncate if too long
65
- original_length = len(text)
66
  if len(text) > MAX_TXT_LEN:
67
  text = text[:MAX_TXT_LEN]
68
- warning_msg = f"⚠️ Input truncated from {original_length} to {MAX_TXT_LEN} characters."
69
- else:
70
- warning_msg = f"✅ Processing {len(text)} characters."
71
-
72
- try:
73
- logger.info(f"Generating TTS for: '{text[:50]}...' using {model_choice}")
74
- synthesizer = load_synth(model_choice)
75
- wav = synthesizer.tts(text)
76
-
77
- # Save to temporary file
78
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
79
- synthesizer.save_wav(wav, fp.name)
80
- logger.info(f"Audio saved to: {fp.name}")
81
- return fp.name, warning_msg
82
-
83
- except Exception as e:
84
- error_msg = f"❌ Error generating speech: {str(e)}"
85
- logger.error(error_msg)
86
- return None, error_msg
87
 
88
  # Example sentences
89
  examples = [
90
  ["Nalubaale y'ennyanja esinga obunene mu Uganda.", "Model 1"],
91
- ["Abantu bangi tebamnyi kuwandika bulungi Luganda.", "Model 3"],
92
  ["Kampala kye kibuga kya Uganda ekikulu.", "Model 5"],
93
- ["Webale nnyingi olw'obuyambi bwo.", "Model 2"],
94
- ["Enkya tugenda okusoma ebitabo ebipya.", "Model 4"],
95
  ]
96
 
97
- # Custom CSS for better styling and centering
98
- custom_css = """
99
- /* Main container centering */
100
- .gradio-container {
101
- max-width: 1400px !important;
102
- margin: 0 auto !important;
103
- padding: 10px !important;
104
- }
105
-
106
- /* Content wrapper */
107
- .main-content {
108
- max-width: 1400px;
109
- margin: 0 auto;
110
- padding: 0 10px;
111
- }
112
-
113
- /* Heading center */
114
- .header {
115
- text-align: center;
116
- padding: 2em 0;
117
- }
118
- .header h1 {
119
- font-size: 2.5em;
120
- margin-bottom: 0.3em;
121
- }
122
- .header p {
123
- font-size: 1.2em;
124
- margin: 0.2em 0;
125
- }
126
 
127
- /* Make text, radio, and audio boxes wider */
128
- .input-section textarea,
129
- .input-section .wrap,
130
- .audio-section audio {
131
- width: 100% !important;
132
- min-width: 700px;
133
- }
134
 
135
- /* Radio group horizontal and wide */
136
- .radio-group {
137
- display: flex;
138
- flex-direction: row;
139
- justify-content: space-between;
140
- flex-wrap: wrap;
141
- gap: 15px;
142
- }
143
- .radio-group label {
144
- flex: 1;
145
- min-width: 150px;
146
- padding: 12px 20px !important;
147
- border-radius: 20px !important;
148
- text-align: center;
149
- }
150
 
151
- /* Bigger button */
152
- .generate-btn {
153
- margin: 25px auto;
154
- display: block;
155
- min-width: 250px;
156
- font-size: 1.1em;
157
- }
158
- """
159
 
160
- with gr.Blocks(
161
- theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue"),
162
- css=custom_css,
163
- title="Luganda TTS"
164
- ) as demo:
165
-
166
- with gr.Column(elem_classes=["main-content"]):
167
- # Header
168
- gr.Markdown(
169
- """
170
- <div class="header" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); margin: -20px -20px 30px -20px; border-radius: 0 0 20px 20px; color: white;">
171
- <h1>🗣️ Luganda TTS 🇺🇬</h1>
172
- <p>Convert text into natural Luganda speech using fine-tuned neural models</p>
173
- <p style="font-size: 0.9em; opacity: 0.9;">Choose from 5 different model checkpoints trained on Luganda data</p>
174
- </div>
175
- """
176
- )
177
-
178
- with gr.Row(equal_height=False):
179
- with gr.Column(scale=5, elem_classes=["input-section"]):
180
- text_input = gr.Textbox(
181
- label=f"📝 Enter Luganda Text (max {MAX_TXT_LEN} characters)",
182
- placeholder="Wandika wano ekigambo mu Luganda...",
183
- value="Gyebale ko ssebo.",
184
- lines=4,
185
- max_lines=6,
186
- )
187
-
188
- gr.Markdown(
189
- "<h3 style='text-align: center; margin: 20px 0 10px 0; color: #4c1d95;'>🎛️ Model Selection</h3>"
190
- )
191
- model_choice = gr.Radio(
192
- label="Choose TTS Model",
193
- choices=list(MODEL_INFO.keys()),
194
- value="Model 3",
195
- interactive=True,
196
- elem_classes=["radio-group"]
197
- )
198
-
199
- run_btn = gr.Button(
200
- "🔊 Generate Speech",
201
- variant="primary",
202
- size="lg",
203
- elem_classes=["generate-btn"]
204
- )
205
-
206
- with gr.Column(scale=4, elem_classes=["audio-section"]):
207
- gr.Markdown(
208
- "<h3 style='text-align: center; margin: 0 0 15px 0; color: #4c1d95;'>🎵 Generated Audio</h3>"
209
- )
210
- audio_output = gr.Audio(
211
- label="Generated Speech",
212
- type="filepath",
213
- show_download_button=True
214
- )
215
-
216
- status_output = gr.Textbox(
217
- label="Status",
218
- interactive=False,
219
- show_label=False,
220
- container=False,
221
- )
222
-
223
- # Examples section
224
- with gr.Column():
225
- gr.Markdown("<div style='text-align:center; font-size:1.1em; font-weight:600;'>💡 Try these Luganda examples:</div>")
226
- gr.Examples(
227
- examples=examples,
228
- inputs=[text_input, model_choice],
229
- outputs=[audio_output, status_output],
230
- fn=tts,
231
- cache_examples=False,
232
- label=""
233
- )
234
-
235
- # Connect the generate button
236
- run_btn.click(
237
- fn=tts,
238
  inputs=[text_input, model_choice],
239
- outputs=[audio_output, status_output]
 
 
240
  )
241
-
 
 
 
 
 
242
  # Footer
243
  # gr.Markdown(
244
  # """
245
- # <div style="margin-top: 40px; padding: 20px; text-align: center; border-top: 1px solid #e1e5e9;">
246
- # <div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 25px; border-radius: 15px; color: white;">
247
- # <h3 style="margin: 0 0 10px 0; font-size: 1.3em;">🚀 Technical Details</h3>
248
- # <p style="margin: 5px 0; font-size: 1.1em;">
249
- # # <strong>Powered by:</strong> Coqui TTS Framework<br>
250
- # # <strong>Models:</strong> Fine-tuned on Luganda speech data<br>
251
- # <strong>Hosting:</strong> Hugging Face Spaces
252
- # </p>
253
- # </div>
254
- # <div style="margin-top: 20px; padding: 15px; background-color: #f8f9ff; border-radius: 10px; border: 1px solid #e1e5e9;">
255
- # <p style="margin: 0; font-size: 0.95em; color: #6b7280;">
256
- # 💡 <strong>Tips:</strong> Use proper Luganda spelling, punctuation, and avoid mixing languages
257
- # </p>
258
- # </div>
259
  # </div>
260
  # """
261
  # )
262
 
263
  if __name__ == "__main__":
264
- demo.launch(
265
- share=False,
266
- server_name="0.0.0.0",
267
- server_port=7860,
268
- show_error=True
269
- )
 
3
  import gradio as gr
4
  from huggingface_hub import hf_hub_download
5
  from TTS.utils.synthesizer import Synthesizer
 
 
 
 
 
6
 
7
  # Max input text length
8
  MAX_TXT_LEN = 400
9
 
10
+ # Map simple names (Model 1, Model 2...) to checkpoint files
11
  MODEL_INFO = {
12
  "Model 1": "checkpoint_2080000.pth",
13
+ "Model 2": "checkpoint_2085000.pth",
14
  "Model 3": "checkpoint_2090000.pth",
15
  "Model 4": "checkpoint_2095000.pth",
16
  "Model 5": "checkpoint_2100000.pth",
17
  }
18
 
19
+ # Download config once
20
+ config_path = hf_hub_download("sulaimank/luganda_LMs", filename="config.json")
 
 
 
 
 
 
 
 
 
 
21
 
22
+ def load_synth(model_file):
23
+ """Download and initialize the chosen synthesizer"""
24
+ model_path = hf_hub_download("sulaimank/luganda_LMs", filename=model_file)
25
+ return Synthesizer(
26
+ tts_checkpoint=model_path,
27
+ tts_config_path=config_path
28
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  def tts(text: str, model_choice: str):
 
 
 
 
 
 
31
  if len(text) > MAX_TXT_LEN:
32
  text = text[:MAX_TXT_LEN]
33
+ print(f"⚠️ Input truncated to {MAX_TXT_LEN} characters.")
34
+
35
+ synthesizer = load_synth(MODEL_INFO[model_choice])
36
+ wav = synthesizer.tts(text)
37
+
38
+ # Save temp wav file for playback
39
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
40
+ synthesizer.save_wav(wav, fp.name)
41
+ return fp.name
 
 
 
 
 
 
 
 
 
 
42
 
43
  # Example sentences
44
  examples = [
45
  ["Nalubaale y'ennyanja esinga obunene mu Uganda.", "Model 1"],
46
+ ["Abantu bangi tebamanyi kuwandika bulungi Luganda.", "Model 3"],
47
  ["Kampala kye kibuga kya Uganda ekikulu.", "Model 5"],
 
 
48
  ]
49
 
50
+ # Modern Gradio Blocks UI
51
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue")) as demo:
52
+ # Header
53
+ gr.Markdown(
54
+ """
55
+ <div style="text-align: center; padding: 1em 0;">
56
+ <h1>🗣️ Luganda TTS 🗣️</h1>
57
+ <p style="font-size: 1.2em;">
58
+ Convert text into natural Luganda speech with fine-tuned neural TTS models.<br>
59
+ Select a model below and type some Luganda text.
60
+ </p>
61
+ </div>
62
+ """
63
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ # Centered card container
66
+ gr.Markdown(
67
+ """
68
+ <div style='max-width: 900px; margin: auto; padding: 20px; background: #ffffff;
69
+ border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1);'>
70
+ """
71
+ )
72
 
73
+ with gr.Row():
74
+ with gr.Column(scale=3):
75
+ text_input = gr.Textbox(
76
+ label="Enter Luganda Text",
77
+ placeholder="Wandika wano ekigambo mu Luganda...",
78
+ value="Gyebale ko ssebo.",
79
+ lines=3,
80
+ )
81
+ model_choice = gr.Radio(
82
+ label="Choose Model",
83
+ choices=list(MODEL_INFO.keys()),
84
+ value="Model 1",
85
+ interactive=True,
86
+ )
87
+ run_btn = gr.Button("🔊 Generate Speech", variant="primary")
88
 
89
+ with gr.Column(scale=2):
90
+ audio_output = gr.Audio(label="Generated Speech", type="filepath")
 
 
 
 
 
 
91
 
92
+ gr.Examples(
93
+ examples=examples,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  inputs=[text_input, model_choice],
95
+ outputs=[audio_output],
96
+ fn=tts,
97
+ cache_examples=False,
98
  )
99
+
100
+ run_btn.click(fn=tts, inputs=[text_input, model_choice], outputs=audio_output)
101
+
102
+ # Close card
103
+ gr.Markdown("</div>")
104
+
105
  # Footer
106
  # gr.Markdown(
107
  # """
108
+ # ---
109
+ # <div style="text-align: center; font-size: 0.9em; color: gray;">
110
+ # 🚀 Developed with <b>Coqui TTS</b> · Hosted on <b>Hugging Face Spaces</b>
 
 
 
 
 
 
 
 
 
 
 
111
  # </div>
112
  # """
113
  # )
114
 
115
  if __name__ == "__main__":
116
+ demo.launch()