Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -134,13 +134,15 @@ def load_model_for_language(language_choice):
|
|
134 |
|
135 |
# ---------------- HELPERS ---------------- #
|
136 |
def get_random_sentence(language_choice):
|
|
|
|
|
|
|
137 |
sentence = random.choice(SENTENCE_BANK[language_choice])
|
138 |
-
# Add simple transliteration for Tamil and Malayalam
|
139 |
if language_choice in ["Tamil", "Malayalam"]:
|
140 |
-
|
141 |
-
return
|
142 |
else:
|
143 |
-
return sentence
|
144 |
|
145 |
def is_script(text, lang_name):
|
146 |
pattern = SCRIPT_PATTERNS.get(lang_name)
|
@@ -170,7 +172,7 @@ def transliterate_to_hk(text, lang_choice):
|
|
170 |
return text
|
171 |
|
172 |
def transliterate_to_simple_roman(text, lang_choice):
|
173 |
-
"""Transliterate to
|
174 |
if not text or not text.strip():
|
175 |
return ""
|
176 |
|
@@ -178,41 +180,40 @@ def transliterate_to_simple_roman(text, lang_choice):
|
|
178 |
return text # Return as-is for English
|
179 |
|
180 |
try:
|
181 |
-
#
|
182 |
if lang_choice == "Tamil":
|
183 |
-
|
184 |
-
tamil_map = {
|
185 |
-
'அ': 'a', 'ஆ': 'aa', 'இ': 'i', 'ஈ': 'ee', 'உ': 'u', 'ஊ': 'oo',
|
186 |
-
'எ': 'e', 'ஏ': 'e', 'ஐ': 'ai', 'ஒ': 'o', 'ஓ': 'o', 'ஔ': 'au',
|
187 |
-
'க': 'ka', 'ங': 'nga', 'ச': 'cha', 'ஞ': 'nya', 'ட': 'ta', 'ண': 'na',
|
188 |
-
'த': 'tha', 'ந': 'na', 'ப': 'pa', 'ம': 'ma', 'ய': 'ya', 'ர': 'ra',
|
189 |
-
'ல': 'la', 'வ': 'va', 'ழ': 'zha', 'ள': 'la', 'ற': 'ra', 'ன': 'na',
|
190 |
-
'்': '', 'ா': 'aa', 'ி': 'i', 'ீ': 'ee', 'ு': 'u', 'ூ': 'oo',
|
191 |
-
'ெ': 'e', 'ே': 'e', 'ை': 'ai', 'ொ': 'o', 'ோ': 'o', 'ௌ': 'au'
|
192 |
-
}
|
193 |
-
simple_text = ""
|
194 |
-
for char in text:
|
195 |
-
simple_text += tamil_map.get(char, char)
|
196 |
-
return simple_text
|
197 |
-
|
198 |
elif lang_choice == "Malayalam":
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
|
|
214 |
|
215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
|
217 |
except Exception as e:
|
218 |
print(f"Transliteration error: {e}")
|
@@ -301,7 +302,7 @@ def create_tabular_feedback(intended, actual, lang_choice):
|
|
301 |
<h4 style='color: #3498db; margin-bottom: 10px;'>🎯 Target Sentence (How to Read)</h4>
|
302 |
<div style='font-size: 20px; font-family: monospace; color: #2c3e50; line-height: 1.4;'>
|
303 |
<strong>Original:</strong> {intended}<br>
|
304 |
-
<strong>
|
305 |
</div>
|
306 |
</div>
|
307 |
"""
|
@@ -315,7 +316,7 @@ def create_tabular_feedback(intended, actual, lang_choice):
|
|
315 |
<tr style='border-bottom: 2px solid #ddd;'>
|
316 |
<th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Type</th>
|
317 |
<th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Original Text</th>
|
318 |
-
<th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50;'>
|
319 |
</tr>
|
320 |
</thead>
|
321 |
<tbody>
|
@@ -587,6 +588,13 @@ with gr.Blocks(title="Pronunciation Comparator", theme=gr.themes.Soft()) as demo
|
|
587 |
interactive=False,
|
588 |
placeholder="Click 'Generate Practice Sentence' to get started..."
|
589 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
590 |
|
591 |
with gr.Row():
|
592 |
with gr.Column():
|
@@ -605,7 +613,7 @@ with gr.Blocks(title="Pronunciation Comparator", theme=gr.themes.Soft()) as demo
|
|
605 |
with gr.Row():
|
606 |
with gr.Column():
|
607 |
pass1_out = gr.Textbox(label="🗣️ What You Said", interactive=False)
|
608 |
-
actual_roman_out = gr.Textbox(label="🔤 Your Pronunciation (
|
609 |
with gr.Column():
|
610 |
wer_out = gr.Textbox(label="📊 Word Error Rate", interactive=False)
|
611 |
cer_out = gr.Textbox(label="📈 Character Error Rate", interactive=False)
|
@@ -614,10 +622,22 @@ with gr.Blocks(title="Pronunciation Comparator", theme=gr.themes.Soft()) as demo
|
|
614 |
feedback_display = gr.HTML()
|
615 |
|
616 |
# Event handlers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
617 |
gen_btn.click(
|
618 |
-
fn=
|
619 |
inputs=[lang_choice],
|
620 |
-
outputs=[intended_display]
|
621 |
)
|
622 |
|
623 |
analyze_btn.click(
|
|
|
134 |
|
135 |
# ---------------- HELPERS ---------------- #
|
136 |
def get_random_sentence(language_choice):
|
137 |
+
return random.choice(SENTENCE_BANK[language_choice])
|
138 |
+
|
139 |
+
def get_random_sentence_with_transliteration(language_choice):
|
140 |
sentence = random.choice(SENTENCE_BANK[language_choice])
|
|
|
141 |
if language_choice in ["Tamil", "Malayalam"]:
|
142 |
+
transliteration = transliterate_to_simple_roman(sentence, language_choice)
|
143 |
+
return sentence, transliteration
|
144 |
else:
|
145 |
+
return sentence, ""
|
146 |
|
147 |
def is_script(text, lang_name):
|
148 |
pattern = SCRIPT_PATTERNS.get(lang_name)
|
|
|
172 |
return text
|
173 |
|
174 |
def transliterate_to_simple_roman(text, lang_choice):
|
175 |
+
"""Transliterate to Thanglish/Manglish - natural romanization used by speakers"""
|
176 |
if not text or not text.strip():
|
177 |
return ""
|
178 |
|
|
|
180 |
return text # Return as-is for English
|
181 |
|
182 |
try:
|
183 |
+
# First get IAST, then convert to natural romanization
|
184 |
if lang_choice == "Tamil":
|
185 |
+
iast_text = transliterate(text, sanscript.TAMIL, sanscript.IAST)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
elif lang_choice == "Malayalam":
|
187 |
+
iast_text = transliterate(text, sanscript.MALAYALAM, sanscript.IAST)
|
188 |
+
else:
|
189 |
+
return text
|
190 |
+
|
191 |
+
# Convert IAST to natural Thanglish/Manglish
|
192 |
+
natural_map = {
|
193 |
+
# Remove all diacritics and make it natural
|
194 |
+
'ā': 'a', 'ī': 'i', 'ū': 'u', 'ē': 'e', 'ō': 'o',
|
195 |
+
'ṅ': 'ng', 'ñ': 'nj', 'ṭ': 't', 'ḍ': 'd', 'ṇ': 'n',
|
196 |
+
'ṟ': 'r', 'ṉ': 'n', 'ḷ': 'l', 'ḻ': 'zh', 'ṛ': 'ru',
|
197 |
+
'ś': 'sh', 'ṣ': 'sh', 'ḥ': 'h', 'ṃ': 'm', 'ṁ': 'm',
|
198 |
+
'r̥': 'ri', 'l̥': 'li',
|
199 |
+
# Common combinations
|
200 |
+
'kṣ': 'ksh', 'jñ': 'gn', 'śr': 'shr',
|
201 |
+
# Remove virama marks
|
202 |
+
'·': '', 'ŕ': 'r', 'ľ': 'l',
|
203 |
+
# Handle long vowels naturally
|
204 |
+
'aa': 'a', 'ii': 'i', 'uu': 'u', 'ee': 'e', 'oo': 'o'
|
205 |
+
}
|
206 |
|
207 |
+
natural_text = iast_text
|
208 |
+
for iast, natural in natural_map.items():
|
209 |
+
natural_text = natural_text.replace(iast, natural)
|
210 |
+
|
211 |
+
# Additional cleanup for natural flow
|
212 |
+
natural_text = natural_text.replace('zhz', 'zh') # Double zh fix
|
213 |
+
natural_text = natural_text.replace('nnn', 'nn') # Triple n fix
|
214 |
+
natural_text = natural_text.replace('lll', 'll') # Triple l fix
|
215 |
+
|
216 |
+
return natural_text if natural_text else text
|
217 |
|
218 |
except Exception as e:
|
219 |
print(f"Transliteration error: {e}")
|
|
|
302 |
<h4 style='color: #3498db; margin-bottom: 10px;'>🎯 Target Sentence (How to Read)</h4>
|
303 |
<div style='font-size: 20px; font-family: monospace; color: #2c3e50; line-height: 1.4;'>
|
304 |
<strong>Original:</strong> {intended}<br>
|
305 |
+
<strong>Thanglish/Manglish:</strong> <span style='color: #e67e22; font-weight: bold;'>{intended_roman}</span>
|
306 |
</div>
|
307 |
</div>
|
308 |
"""
|
|
|
316 |
<tr style='border-bottom: 2px solid #ddd;'>
|
317 |
<th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Type</th>
|
318 |
<th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Original Text</th>
|
319 |
+
<th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50;'>Thanglish/Manglish</th>
|
320 |
</tr>
|
321 |
</thead>
|
322 |
<tbody>
|
|
|
588 |
interactive=False,
|
589 |
placeholder="Click 'Generate Practice Sentence' to get started..."
|
590 |
)
|
591 |
+
|
592 |
+
intended_transliteration = gr.Textbox(
|
593 |
+
label="🔤 How to Read (Thanglish/Manglish)",
|
594 |
+
interactive=False,
|
595 |
+
placeholder="Natural romanization will appear here...",
|
596 |
+
visible=False
|
597 |
+
)
|
598 |
|
599 |
with gr.Row():
|
600 |
with gr.Column():
|
|
|
613 |
with gr.Row():
|
614 |
with gr.Column():
|
615 |
pass1_out = gr.Textbox(label="🗣️ What You Said", interactive=False)
|
616 |
+
actual_roman_out = gr.Textbox(label="🔤 Your Pronunciation (Thanglish/Manglish)", interactive=False)
|
617 |
with gr.Column():
|
618 |
wer_out = gr.Textbox(label="📊 Word Error Rate", interactive=False)
|
619 |
cer_out = gr.Textbox(label="📈 Character Error Rate", interactive=False)
|
|
|
622 |
feedback_display = gr.HTML()
|
623 |
|
624 |
# Event handlers
|
625 |
+
def update_transliteration_visibility(language_choice):
|
626 |
+
if language_choice in ["Tamil", "Malayalam"]:
|
627 |
+
return gr.update(visible=True)
|
628 |
+
else:
|
629 |
+
return gr.update(visible=False, value="")
|
630 |
+
|
631 |
+
lang_choice.change(
|
632 |
+
fn=update_transliteration_visibility,
|
633 |
+
inputs=[lang_choice],
|
634 |
+
outputs=[intended_transliteration]
|
635 |
+
)
|
636 |
+
|
637 |
gen_btn.click(
|
638 |
+
fn=get_random_sentence_with_transliteration,
|
639 |
inputs=[lang_choice],
|
640 |
+
outputs=[intended_display, intended_transliteration]
|
641 |
)
|
642 |
|
643 |
analyze_btn.click(
|