sudhanm commited on
Commit
d031a29
·
verified ·
1 Parent(s): fbcc894

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -42
app.py CHANGED
@@ -134,13 +134,15 @@ def load_model_for_language(language_choice):
134
 
135
  # ---------------- HELPERS ---------------- #
136
  def get_random_sentence(language_choice):
 
 
 
137
  sentence = random.choice(SENTENCE_BANK[language_choice])
138
- # Add simple transliteration for Tamil and Malayalam
139
  if language_choice in ["Tamil", "Malayalam"]:
140
- simple_roman = transliterate_to_simple_roman(sentence, language_choice)
141
- return f"{sentence}\n\n📖 Read as: {simple_roman}"
142
  else:
143
- return sentence
144
 
145
  def is_script(text, lang_name):
146
  pattern = SCRIPT_PATTERNS.get(lang_name)
@@ -170,7 +172,7 @@ def transliterate_to_hk(text, lang_choice):
170
  return text
171
 
172
  def transliterate_to_simple_roman(text, lang_choice):
173
- """Transliterate to very simple, easy-to-read Roman letters"""
174
  if not text or not text.strip():
175
  return ""
176
 
@@ -178,41 +180,40 @@ def transliterate_to_simple_roman(text, lang_choice):
178
  return text # Return as-is for English
179
 
180
  try:
181
- # Direct character mapping to simple Roman letters
182
  if lang_choice == "Tamil":
183
- # Tamil to simple Roman mapping
184
- tamil_map = {
185
- 'அ': 'a', 'ஆ': 'aa', 'இ': 'i', 'ஈ': 'ee', 'உ': 'u', 'ஊ': 'oo',
186
- 'எ': 'e', 'ஏ': 'e', 'ஐ': 'ai', 'ஒ': 'o', 'ஓ': 'o', 'ஔ': 'au',
187
- 'க': 'ka', 'ங': 'nga', 'ச': 'cha', 'ஞ': 'nya', 'ட': 'ta', 'ண': 'na',
188
- 'த': 'tha', 'ந': 'na', 'ப': 'pa', 'ம': 'ma', 'ய': 'ya', 'ர': 'ra',
189
- 'ல': 'la', 'வ': 'va', 'ழ': 'zha', 'ள': 'la', 'ற': 'ra', 'ன': 'na',
190
- '்': '', 'ா': 'aa', 'ி': 'i', 'ீ': 'ee', 'ு': 'u', 'ூ': 'oo',
191
- 'ெ': 'e', 'ே': 'e', 'ை': 'ai', 'ொ': 'o', 'ோ': 'o', 'ௌ': 'au'
192
- }
193
- simple_text = ""
194
- for char in text:
195
- simple_text += tamil_map.get(char, char)
196
- return simple_text
197
-
198
  elif lang_choice == "Malayalam":
199
- # Malayalam to simple Roman mapping
200
- malayalam_map = {
201
- 'അ': 'a', 'ആ': 'aa', 'ഇ': 'i', 'ഈ': 'ee', 'ഉ': 'u', 'ഊ': 'oo',
202
- 'എ': 'e', 'ഏ': 'e', 'ഐ': 'ai', 'ഒ': 'o', 'ഓ': 'o', 'ഔ': 'au',
203
- 'ക': 'ka', 'ഗ': 'ga', 'ങ': 'nga', 'ച': 'cha', 'ജ': 'ja', 'ഞ': 'nya',
204
- 'ട': 'ta', 'ഡ': 'da', 'ണ': 'na', 'ത': 'tha', 'ദ': 'da', 'ന': 'na',
205
- 'പ': 'pa', 'ബ': 'ba', 'മ': 'ma', 'യ': 'ya', 'ര': 'ra', 'ല': 'la',
206
- '': 'va', '': 'sha', '': 'sha', '': 'sa', '': 'ha', 'ള': 'la',
207
- '': '', '': 'aa', 'ി': 'i', '': 'ee', '': 'u', 'ൂ': 'oo',
208
- '': 'e', '': 'e', '': 'ai', '': 'o', '': 'o', 'ൌ': 'au'
209
- }
210
- simple_text = ""
211
- for char in text:
212
- simple_text += malayalam_map.get(char, char)
213
- return simple_text
 
 
 
 
214
 
215
- return text
 
 
 
 
 
 
 
 
 
216
 
217
  except Exception as e:
218
  print(f"Transliteration error: {e}")
@@ -301,7 +302,7 @@ def create_tabular_feedback(intended, actual, lang_choice):
301
  <h4 style='color: #3498db; margin-bottom: 10px;'>🎯 Target Sentence (How to Read)</h4>
302
  <div style='font-size: 20px; font-family: monospace; color: #2c3e50; line-height: 1.4;'>
303
  <strong>Original:</strong> {intended}<br>
304
- <strong>Read as:</strong> <span style='color: #e67e22; font-weight: bold;'>{intended_roman}</span>
305
  </div>
306
  </div>
307
  """
@@ -315,7 +316,7 @@ def create_tabular_feedback(intended, actual, lang_choice):
315
  <tr style='border-bottom: 2px solid #ddd;'>
316
  <th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Type</th>
317
  <th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Original Text</th>
318
- <th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50;'>Simple English Sounds</th>
319
  </tr>
320
  </thead>
321
  <tbody>
@@ -587,6 +588,13 @@ with gr.Blocks(title="Pronunciation Comparator", theme=gr.themes.Soft()) as demo
587
  interactive=False,
588
  placeholder="Click 'Generate Practice Sentence' to get started..."
589
  )
 
 
 
 
 
 
 
590
 
591
  with gr.Row():
592
  with gr.Column():
@@ -605,7 +613,7 @@ with gr.Blocks(title="Pronunciation Comparator", theme=gr.themes.Soft()) as demo
605
  with gr.Row():
606
  with gr.Column():
607
  pass1_out = gr.Textbox(label="🗣️ What You Said", interactive=False)
608
- actual_roman_out = gr.Textbox(label="🔤 Your Pronunciation (Simple Sounds)", interactive=False)
609
  with gr.Column():
610
  wer_out = gr.Textbox(label="📊 Word Error Rate", interactive=False)
611
  cer_out = gr.Textbox(label="📈 Character Error Rate", interactive=False)
@@ -614,10 +622,22 @@ with gr.Blocks(title="Pronunciation Comparator", theme=gr.themes.Soft()) as demo
614
  feedback_display = gr.HTML()
615
 
616
  # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
617
  gen_btn.click(
618
- fn=get_random_sentence,
619
  inputs=[lang_choice],
620
- outputs=[intended_display]
621
  )
622
 
623
  analyze_btn.click(
 
134
 
135
  # ---------------- HELPERS ---------------- #
136
  def get_random_sentence(language_choice):
137
+ return random.choice(SENTENCE_BANK[language_choice])
138
+
139
+ def get_random_sentence_with_transliteration(language_choice):
140
  sentence = random.choice(SENTENCE_BANK[language_choice])
 
141
  if language_choice in ["Tamil", "Malayalam"]:
142
+ transliteration = transliterate_to_simple_roman(sentence, language_choice)
143
+ return sentence, transliteration
144
  else:
145
+ return sentence, ""
146
 
147
  def is_script(text, lang_name):
148
  pattern = SCRIPT_PATTERNS.get(lang_name)
 
172
  return text
173
 
174
  def transliterate_to_simple_roman(text, lang_choice):
175
+ """Transliterate to Thanglish/Manglish - natural romanization used by speakers"""
176
  if not text or not text.strip():
177
  return ""
178
 
 
180
  return text # Return as-is for English
181
 
182
  try:
183
+ # First get IAST, then convert to natural romanization
184
  if lang_choice == "Tamil":
185
+ iast_text = transliterate(text, sanscript.TAMIL, sanscript.IAST)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  elif lang_choice == "Malayalam":
187
+ iast_text = transliterate(text, sanscript.MALAYALAM, sanscript.IAST)
188
+ else:
189
+ return text
190
+
191
+ # Convert IAST to natural Thanglish/Manglish
192
+ natural_map = {
193
+ # Remove all diacritics and make it natural
194
+ 'ā': 'a', 'ī': 'i', 'ū': 'u', 'ē': 'e', 'ō': 'o',
195
+ '': 'ng', 'ñ': 'nj', '': 't', '': 'd', '': 'n',
196
+ '': 'r', '': 'n', '': 'l', '': 'zh', '': 'ru',
197
+ 'ś': 'sh', 'ṣ': 'sh', 'ḥ': 'h', 'ṃ': 'm', 'ṁ': 'm',
198
+ 'r̥': 'ri', 'l̥': 'li',
199
+ # Common combinations
200
+ 'kṣ': 'ksh', 'jñ': 'gn', 'śr': 'shr',
201
+ # Remove virama marks
202
+ '·': '', 'ŕ': 'r', 'ľ': 'l',
203
+ # Handle long vowels naturally
204
+ 'aa': 'a', 'ii': 'i', 'uu': 'u', 'ee': 'e', 'oo': 'o'
205
+ }
206
 
207
+ natural_text = iast_text
208
+ for iast, natural in natural_map.items():
209
+ natural_text = natural_text.replace(iast, natural)
210
+
211
+ # Additional cleanup for natural flow
212
+ natural_text = natural_text.replace('zhz', 'zh') # Double zh fix
213
+ natural_text = natural_text.replace('nnn', 'nn') # Triple n fix
214
+ natural_text = natural_text.replace('lll', 'll') # Triple l fix
215
+
216
+ return natural_text if natural_text else text
217
 
218
  except Exception as e:
219
  print(f"Transliteration error: {e}")
 
302
  <h4 style='color: #3498db; margin-bottom: 10px;'>🎯 Target Sentence (How to Read)</h4>
303
  <div style='font-size: 20px; font-family: monospace; color: #2c3e50; line-height: 1.4;'>
304
  <strong>Original:</strong> {intended}<br>
305
+ <strong>Thanglish/Manglish:</strong> <span style='color: #e67e22; font-weight: bold;'>{intended_roman}</span>
306
  </div>
307
  </div>
308
  """
 
316
  <tr style='border-bottom: 2px solid #ddd;'>
317
  <th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Type</th>
318
  <th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Original Text</th>
319
+ <th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50;'>Thanglish/Manglish</th>
320
  </tr>
321
  </thead>
322
  <tbody>
 
588
  interactive=False,
589
  placeholder="Click 'Generate Practice Sentence' to get started..."
590
  )
591
+
592
+ intended_transliteration = gr.Textbox(
593
+ label="🔤 How to Read (Thanglish/Manglish)",
594
+ interactive=False,
595
+ placeholder="Natural romanization will appear here...",
596
+ visible=False
597
+ )
598
 
599
  with gr.Row():
600
  with gr.Column():
 
613
  with gr.Row():
614
  with gr.Column():
615
  pass1_out = gr.Textbox(label="🗣️ What You Said", interactive=False)
616
+ actual_roman_out = gr.Textbox(label="🔤 Your Pronunciation (Thanglish/Manglish)", interactive=False)
617
  with gr.Column():
618
  wer_out = gr.Textbox(label="📊 Word Error Rate", interactive=False)
619
  cer_out = gr.Textbox(label="📈 Character Error Rate", interactive=False)
 
622
  feedback_display = gr.HTML()
623
 
624
  # Event handlers
625
+ def update_transliteration_visibility(language_choice):
626
+ if language_choice in ["Tamil", "Malayalam"]:
627
+ return gr.update(visible=True)
628
+ else:
629
+ return gr.update(visible=False, value="")
630
+
631
+ lang_choice.change(
632
+ fn=update_transliteration_visibility,
633
+ inputs=[lang_choice],
634
+ outputs=[intended_transliteration]
635
+ )
636
+
637
  gen_btn.click(
638
+ fn=get_random_sentence_with_transliteration,
639
  inputs=[lang_choice],
640
+ outputs=[intended_display, intended_transliteration]
641
  )
642
 
643
  analyze_btn.click(