sudhanm commited on
Commit
35b317d
·
verified ·
1 Parent(s): 57ea064

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +391 -203
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import random
3
  import difflib
4
  import re
 
5
  import jiwer
6
  import torch
7
  from transformers import WhisperForConditionalGeneration, WhisperProcessor
@@ -74,6 +75,266 @@ SENTENCE_BANK = {
74
  ]
75
  }
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  # ---------------- MEMORY OPTIMIZED MODEL LOADING ---------------- #
78
  # Store only currently loaded model to save memory
79
  current_model = {"language": None, "model": None, "processor": None}
@@ -139,8 +400,11 @@ def get_random_sentence(language_choice):
139
  def get_random_sentence_with_transliteration(language_choice):
140
  sentence = random.choice(SENTENCE_BANK[language_choice])
141
  if language_choice in ["Tamil", "Malayalam"]:
142
- transliteration = transliterate_to_simple_roman(sentence, language_choice)
143
- return sentence, transliteration
 
 
 
144
  else:
145
  return sentence, ""
146
 
@@ -171,88 +435,12 @@ def transliterate_to_hk(text, lang_choice):
171
  print(f"Transliteration error: {e}")
172
  return text
173
 
 
174
  def transliterate_to_simple_roman(text, lang_choice):
175
- """Transliterate to Thanglish/Manglish - natural romanization used by speakers"""
176
- if not text or not text.strip():
177
- return ""
178
-
179
- if lang_choice == "English":
180
- return text # Return as-is for English
181
-
182
- try:
183
- # First get IAST, then convert to natural romanization
184
- if lang_choice == "Tamil":
185
- iast_text = transliterate(text, sanscript.TAMIL, sanscript.IAST)
186
- elif lang_choice == "Malayalam":
187
- iast_text = transliterate(text, sanscript.MALAYALAM, sanscript.IAST)
188
- else:
189
- return text
190
-
191
- # Comprehensive cleanup to remove ALL diacritics and make it natural
192
- natural_map = {
193
- # Vowels with diacritics
194
- 'ā': 'a', 'á': 'a', 'à': 'a', 'â': 'a', 'ä': 'a',
195
- 'ī': 'i', 'í': 'i', 'ì': 'i', 'î': 'i', 'ï': 'i',
196
- 'ū': 'u', 'ú': 'u', 'ù': 'u', 'û': 'u', 'ü': 'u',
197
- 'ē': 'e', 'é': 'e', 'è': 'e', 'ê': 'e', 'ë': 'e',
198
- 'ō': 'o', 'ó': 'o', 'ò': 'o', 'ô': 'o', 'ö': 'o',
199
-
200
- # Consonants with diacritics
201
- 'ṅ': 'ng', 'ň': 'n', 'ñ': 'nj', 'ń': 'n',
202
- 'ṭ': 't', 'ť': 't', 'ţ': 't',
203
- 'ḍ': 'd', 'ď': 'd', 'ḏ': 'd',
204
- 'ṇ': 'n', 'ņ': 'n', 'ṉ': 'n',
205
- 'ṟ': 'r', 'ř': 'r', 'ŕ': 'r', 'ṛ': 'ru',
206
- 'ḷ': 'l', 'ľ': 'l', 'ļ': 'l', 'ḻ': 'zh',
207
- 'ś': 'sh', 'š': 'sh', 'ṣ': 'sh', 'ş': 's',
208
- 'ḥ': 'h', 'ḫ': 'h', 'ħ': 'h',
209
- 'ṃ': 'm', 'ṁ': 'm', 'ḿ': 'm',
210
- 'ç': 'ch', 'č': 'ch',
211
-
212
- # Vocalic consonants
213
- 'r̥': 'ri', 'r̥̄': 'ri',
214
- 'l̥': 'li', 'l̥̄': 'li',
215
-
216
- # Common combinations
217
- 'kṣ': 'ksh', 'jñ': 'gn', 'śr': 'shr',
218
-
219
- # Remove virama and other marks
220
- '·': '', '̥': '', '̄': '', '̃': '', '̂': '', '̀': '', '́': '',
221
-
222
- # Double letters cleanup
223
- 'aa': 'a', 'ii': 'i', 'uu': 'u', 'ee': 'e', 'oo': 'o'
224
- }
225
-
226
- natural_text = iast_text
227
-
228
- # Apply all mappings
229
- for iast, natural in natural_map.items():
230
- natural_text = natural_text.replace(iast, natural)
231
-
232
- # Additional cleanup passes for any remaining diacritics
233
- import unicodedata
234
- # Remove all combining diacritical marks
235
- natural_text = ''.join(c for c in unicodedata.normalize('NFD', natural_text)
236
- if unicodedata.category(c) != 'Mn')
237
-
238
- # Fix common Malayalam/Tamil patterns
239
- natural_text = natural_text.replace('zhz', 'zh') # Double zh fix
240
- natural_text = natural_text.replace('nnn', 'nn') # Triple n fix
241
- natural_text = natural_text.replace('lll', 'll') # Triple l fix
242
- natural_text = natural_text.replace('tth', 'th') # Simplify aspirated
243
- natural_text = natural_text.replace('ddh', 'dh') # Simplify aspirated
244
-
245
- # Make it more natural for Manglish/Thanglish
246
- if lang_choice == "Malayalam":
247
- natural_text = natural_text.replace('samgitam', 'sangeetham')
248
- natural_text = natural_text.replace('jivitattinre', 'jeevitathinte')
249
- natural_text = natural_text.replace('bhagaman', 'bhagamaanu')
250
-
251
- return natural_text if natural_text else text
252
-
253
- except Exception as e:
254
- print(f"Transliteration error: {e}")
255
- return text
256
 
257
  @spaces.GPU
258
  def transcribe_once(audio_path, language_choice, beam_size, temperature):
@@ -305,14 +493,14 @@ def normalize_word(word):
305
  # Remove punctuation and whitespace
306
  return word.strip().translate(str.maketrans('', '', string.punctuation)).lower()
307
 
308
- def create_tabular_feedback(intended, actual, lang_choice):
309
- """Create clean, readable tabular feedback without background colors"""
 
 
310
 
311
- # Get simple transliterations
312
- intended_roman = transliterate_to_simple_roman(intended, lang_choice)
313
- actual_roman = transliterate_to_simple_roman(actual, lang_choice)
314
- intended_hk = transliterate_to_hk(intended, lang_choice)
315
- actual_hk = transliterate_to_hk(actual, lang_choice)
316
 
317
  # Split into words for comparison
318
  intended_words = intended.strip().split()
@@ -320,38 +508,26 @@ def create_tabular_feedback(intended, actual, lang_choice):
320
  intended_roman_words = intended_roman.strip().split()
321
  actual_roman_words = actual_roman.strip().split()
322
 
323
- # Calculate accuracy
324
  correct_words = 0
325
  total_words = len(intended_words)
326
 
327
  # Create word-by-word comparison table
328
  feedback_html = """
329
  <div style='font-family: Arial, sans-serif; padding: 20px; margin: 10px 0;'>
330
- <h3 style='color: #2c3e50; margin-bottom: 20px; text-align: center;'>📊 Pronunciation Analysis</h3>
331
  """
332
 
333
- # Show simple transliteration of target sentence for easier reading
334
- if lang_choice in ["Tamil", "Malayalam"]:
335
- feedback_html += f"""
336
- <div style='margin-bottom: 25px; padding: 15px; border: 2px solid #3498db; border-radius: 8px; background: #f8f9fa;'>
337
- <h4 style='color: #3498db; margin-bottom: 10px;'>🎯 Target Sentence (Reading Guide)</h4>
338
- <div style='font-size: 20px; font-family: monospace; color: #2c3e50; line-height: 1.4;'>
339
- <strong>Original:</strong> {intended}<br>
340
- <strong>Romanized:</strong> <span style='color: #e67e22; font-weight: bold;'>{intended_roman}</span>
341
- </div>
342
- </div>
343
- """
344
-
345
- # Overview table - completely clean
346
  feedback_html += """
347
  <div style='margin-bottom: 25px;'>
348
- <h4 style='color: #34495e; margin-bottom: 15px;'>📝 Text Comparison</h4>
349
  <table style='width: 100%; border-collapse: collapse; border: 2px solid #ddd;'>
350
  <thead>
351
  <tr style='border-bottom: 2px solid #ddd;'>
352
  <th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Type</th>
353
  <th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Original Text</th>
354
- <th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50;'>Romanized</th>
355
  </tr>
356
  </thead>
357
  <tbody>
@@ -370,31 +546,30 @@ def create_tabular_feedback(intended, actual, lang_choice):
370
  </div>
371
  """.format(intended, intended_roman, actual, actual_roman)
372
 
373
- # Word-by-word analysis - clean table
374
  feedback_html += """
375
  <div style='margin-bottom: 25px;'>
376
- <h4 style='color: #34495e; margin-bottom: 15px;'>🔍 Word-by-Word Check</h4>
377
  <table style='width: 100%; border-collapse: collapse; border: 2px solid #ddd;'>
378
  <thead>
379
  <tr style='border-bottom: 2px solid #ddd;'>
380
  <th style='padding: 12px; text-align: center; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>#</th>
381
  <th style='padding: 12px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Expected Word</th>
382
  <th style='padding: 12px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>What You Said</th>
 
383
  <th style='padding: 12px; text-align: center; font-weight: bold; color: #2c3e50;'>Result</th>
384
  </tr>
385
  </thead>
386
  <tbody>
387
  """
388
 
389
- # Compare words using difflib with normalized comparison
390
- normalized_intended = [normalize_word(w) for w in intended_words]
391
- normalized_actual = [normalize_word(w) for w in actual_words]
392
- sm = difflib.SequenceMatcher(None, normalized_intended, normalized_actual)
393
  word_index = 0
394
 
395
  for tag, i1, i2, j1, j2 in sm.get_opcodes():
396
  if tag == 'equal':
397
- # Correct words - clean white background
398
  for idx, word in enumerate(intended_words[i1:i2]):
399
  word_index += 1
400
  correct_words += 1
@@ -413,15 +588,18 @@ def create_tabular_feedback(intended, actual, lang_choice):
413
  <div style='font-family: monospace; font-size: 16px; margin-bottom: 4px; color: #27ae60;'>{actual_word}</div>
414
  <div style='font-size: 13px; color: #888;'>({actual_roman_word})</div>
415
  </td>
 
 
 
416
  <td style='padding: 12px; text-align: center;'>
417
  <span style='color: #27ae60; font-weight: bold; font-size: 20px;'>✓</span>
418
- <div style='font-size: 12px; color: #27ae60; margin-top: 2px;'>Correct</div>
419
  </td>
420
  </tr>
421
  """
422
 
423
  elif tag == 'replace':
424
- # Incorrect words - clean white with colored text only
425
  max_words = max(i2-i1, j2-j1)
426
  for idx in range(max_words):
427
  word_index += 1
@@ -430,6 +608,34 @@ def create_tabular_feedback(intended, actual, lang_choice):
430
  actual_word = actual_words[j1 + idx] if (j1 + idx) < j2 else ""
431
  actual_roman_word = actual_roman_words[j1 + idx] if (j1 + idx) < len(actual_roman_words) else ""
432
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  feedback_html += f"""
434
  <tr style='border-bottom: 1px solid #eee;'>
435
  <td style='padding: 12px; text-align: center; font-weight: bold; color: #666; border-right: 1px solid #ddd;'>{word_index}</td>
@@ -438,12 +644,15 @@ def create_tabular_feedback(intended, actual, lang_choice):
438
  <div style='font-size: 13px; color: #888;'>({expected_roman})</div>
439
  </td>
440
  <td style='padding: 12px; border-right: 1px solid #ddd;'>
441
- <div style='font-family: monospace; font-size: 16px; margin-bottom: 4px; color: #e74c3c;'>{actual_word}</div>
442
  <div style='font-size: 13px; color: #888;'>({actual_roman_word})</div>
443
  </td>
 
 
 
444
  <td style='padding: 12px; text-align: center;'>
445
- <span style='color: #e74c3c; font-weight: bold; font-size: 20px;'>✗</span>
446
- <div style='font-size: 12px; color: #e74c3c; margin-top: 2px;'>Different</div>
447
  </td>
448
  </tr>
449
  """
@@ -463,6 +672,9 @@ def create_tabular_feedback(intended, actual, lang_choice):
463
  <td style='padding: 12px; color: #f39c12; font-style: italic; border-right: 1px solid #ddd;'>
464
  <em>Not spoken</em>
465
  </td>
 
 
 
466
  <td style='padding: 12px; text-align: center;'>
467
  <span style='color: #f39c12; font-weight: bold; font-size: 20px;'>⚠</span>
468
  <div style='font-size: 12px; color: #f39c12; margin-top: 2px;'>Missing</div>
@@ -484,6 +696,9 @@ def create_tabular_feedback(intended, actual, lang_choice):
484
  <div style='font-family: monospace; font-size: 16px; margin-bottom: 4px; color: #9b59b6;'>{word}</div>
485
  <div style='font-size: 13px; color: #888;'>({actual_roman_word})</div>
486
  </td>
 
 
 
487
  <td style='padding: 12px; text-align: center;'>
488
  <span style='color: #9b59b6; font-weight: bold; font-size: 20px;'>+</span>
489
  <div style='font-size: 12px; color: #9b59b6; margin-top: 2px;'>Extra</div>
@@ -497,74 +712,57 @@ def create_tabular_feedback(intended, actual, lang_choice):
497
  </div>
498
  """
499
 
500
- # Calculate accuracy
501
  accuracy = (correct_words / total_words * 100) if total_words > 0 else 0
502
 
503
- # Clean summary section
504
  feedback_html += f"""
505
  <div style='background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 25px; border-radius: 12px; text-align: center; margin-top: 20px;'>
506
- <h4 style='margin: 0 0 20px 0; font-size: 24px;'>🎯 Your Score</h4>
507
  <div style='display: flex; justify-content: space-around; flex-wrap: wrap; gap: 20px;'>
508
  <div style='background: rgba(255,255,255,0.15); padding: 20px; border-radius: 12px; min-width: 160px;'>
509
  <div style='font-size: 40px; font-weight: bold; margin-bottom: 8px;'>{accuracy:.0f}%</div>
510
- <div style='font-size: 16px; opacity: 0.9;'>Accuracy</div>
511
  </div>
512
  <div style='background: rgba(255,255,255,0.15); padding: 20px; border-radius: 12px; min-width: 160px;'>
513
- <div style='font-size: 40px; font-weight: bold; margin-bottom: 8px;'>{correct_words}/{total_words}</div>
514
- <div style='font-size: 16px; opacity: 0.9;'>Words Correct</div>
515
  </div>
516
  </div>
517
- <div style='margin-top: 20px; font-size: 18px;'>
 
 
518
  """
519
 
520
- # Simple motivational message
521
  if accuracy >= 95:
522
- feedback_html += "<span>🎉 Perfect! Outstanding pronunciation!</span>"
523
  elif accuracy >= 85:
524
- feedback_html += "<span>🌟 Excellent! Very clear speaking!</span>"
525
  elif accuracy >= 70:
526
- feedback_html += "<span>👍 Good job! Keep practicing!</span>"
527
  elif accuracy >= 50:
528
- feedback_html += "<span>📚 Getting better! Focus on the red words!</span>"
529
  else:
530
- feedback_html += "<span>💪 Keep going! Practice makes perfect!</span>"
531
-
532
- feedback_html += """
533
- </div>
534
- </div>
535
- """
536
 
537
- # Optional technical section (collapsed)
538
- if lang_choice in ["Tamil", "Malayalam"]:
539
- feedback_html += f"""
540
- <details style='margin-top: 20px; padding: 15px; border: 1px solid #ddd; border-radius: 8px;'>
541
- <summary style='cursor: pointer; font-weight: bold; color: #2c3e50; padding: 5px;'>🔧 Technical Details (for experts)</summary>
542
- <div style='margin-top: 15px; display: grid; grid-template-columns: 1fr 1fr; gap: 15px;'>
543
- <div>
544
- <strong>Expected (Harvard-Kyoto):</strong><br>
545
- <span style='font-family: monospace; background: #f5f5f5; padding: 8px; border-radius: 4px; display: block; margin-top: 5px;'>{intended_hk}</span>
546
- </div>
547
- <div>
548
- <strong>You said (Harvard-Kyoto):</strong><br>
549
- <span style='font-family: monospace; background: #f5f5f5; padding: 8px; border-radius: 4px; display: block; margin-top: 5px;'>{actual_hk}</span>
550
- </div>
551
- </div>
552
- </details>
553
- """
554
-
555
- feedback_html += "</div>"
556
 
557
  return feedback_html, accuracy
558
 
559
-
560
-
561
  # ---------------- MAIN ---------------- #
562
  @spaces.GPU
563
- def compare_pronunciation(audio, lang_choice, intended_sentence, pass1_beam, pass1_temp):
564
- if audio is None or not intended_sentence.strip():
565
  return ("⚠️ Please record audio and generate a sentence first.", "", "", "", "")
566
 
567
  try:
 
 
 
 
 
 
568
  # Single transcription pass with user settings
569
  actual_text = transcribe_once(audio, lang_choice, pass1_beam, pass1_temp)
570
 
@@ -575,12 +773,12 @@ def compare_pronunciation(audio, lang_choice, intended_sentence, pass1_beam, pas
575
  wer_val = jiwer.wer(intended_sentence, actual_text)
576
  cer_val = jiwer.cer(intended_sentence, actual_text)
577
 
578
- # Get transliterations for both texts
579
- intended_roman = transliterate_to_simple_roman(intended_sentence, lang_choice)
580
- actual_roman = transliterate_to_simple_roman(actual_text, lang_choice)
581
 
582
- # Create comprehensive tabular feedback
583
- feedback_html, accuracy = create_tabular_feedback(intended_sentence, actual_text, lang_choice)
584
 
585
  return (
586
  actual_text,
@@ -598,14 +796,19 @@ def compare_pronunciation(audio, lang_choice, intended_sentence, pass1_beam, pas
598
  # ---------------- UI ---------------- #
599
  with gr.Blocks(title="Pronunciation Comparator", theme=gr.themes.Soft()) as demo:
600
  gr.Markdown("""
601
- # 🎙️ AI Pronunciation Coach
602
  ### Practice English, Tamil & Malayalam with AI feedback
603
 
 
 
 
 
 
604
  **How to use:**
605
  1. Select your language
606
- 2. Generate a practice sentence
607
  3. Record yourself reading it aloud
608
- 4. Get instant feedback on your pronunciation!
609
  """)
610
 
611
  with gr.Row():
@@ -621,15 +824,8 @@ with gr.Blocks(title="Pronunciation Comparator", theme=gr.themes.Soft()) as demo
621
  intended_display = gr.Textbox(
622
  label="📝 Practice Sentence (Read this aloud)",
623
  interactive=False,
624
- placeholder="Click 'Generate Practice Sentence' to get started..."
625
- )
626
-
627
- intended_transliteration = gr.Textbox(
628
- label="🔤 Pronunciation Guide",
629
- interactive=False,
630
- placeholder="Pronunciation guide will appear here...",
631
- visible=False,
632
- lines=1
633
  )
634
 
635
  with gr.Row():
@@ -649,38 +845,30 @@ with gr.Blocks(title="Pronunciation Comparator", theme=gr.themes.Soft()) as demo
649
  with gr.Row():
650
  with gr.Column():
651
  pass1_out = gr.Textbox(label="🗣️ What You Said", interactive=False)
652
- actual_roman_out = gr.Textbox(label="🔤 Your Pronunciation (Romanized)", interactive=False)
653
  with gr.Column():
654
  wer_out = gr.Textbox(label="📊 Word Error Rate", interactive=False)
655
  cer_out = gr.Textbox(label="📈 Character Error Rate", interactive=False)
656
 
657
- gr.Markdown("### 📋 Detailed Analysis")
658
  feedback_display = gr.HTML()
659
 
660
- # Event handlers
661
- def update_transliteration_visibility(language_choice):
662
- if language_choice in ["Tamil", "Malayalam"]:
663
- return gr.update(visible=True)
664
- else:
665
- return gr.update(visible=False, value="")
666
-
667
- lang_choice.change(
668
- fn=update_transliteration_visibility,
669
- inputs=[lang_choice],
670
- outputs=[intended_transliteration]
671
- )
672
-
673
- gen_btn.click(
674
- fn=get_random_sentence_with_transliteration,
675
- inputs=[lang_choice],
676
- outputs=[intended_display, intended_transliteration]
677
- )
678
-
679
- analyze_btn.click(
680
- fn=compare_pronunciation,
681
- inputs=[audio_input, lang_choice, intended_display, pass1_beam, pass1_temp],
682
- outputs=[pass1_out, actual_roman_out, wer_out, cer_out, feedback_display]
683
- )
684
 
685
  if __name__ == "__main__":
686
  demo.launch()
 
2
  import random
3
  import difflib
4
  import re
5
+ import unicodedata
6
  import jiwer
7
  import torch
8
  from transformers import WhisperForConditionalGeneration, WhisperProcessor
 
75
  ]
76
  }
77
 
78
+ # ---------------- IMPROVED TRANSLITERATION SYSTEM ---------------- #
79
+
80
+ def transliterate_to_natural_roman(text, lang_choice):
81
+ """
82
+ Generalizable transliteration to natural romanization (Thanglish/Manglish)
83
+ using systematic phonetic rules instead of manual dictionaries
84
+ """
85
+ if not text or not text.strip():
86
+ return ""
87
+
88
+ if lang_choice == "English":
89
+ return text
90
+
91
+ try:
92
+ # Step 1: Convert to ISO 15919 (more systematic than IAST)
93
+ if lang_choice == "Tamil":
94
+ iso_text = transliterate(text, sanscript.TAMIL, sanscript.ISO)
95
+ elif lang_choice == "Malayalam":
96
+ iso_text = transliterate(text, sanscript.MALAYALAM, sanscript.ISO)
97
+ else:
98
+ return text
99
+
100
+ # Step 2: Apply systematic phonetic conversion
101
+ romanized = apply_systematic_phonetic_rules(iso_text)
102
+
103
+ # Step 3: Apply language-specific natural patterns
104
+ romanized = apply_natural_language_patterns(romanized, lang_choice)
105
+
106
+ # Step 4: Final phonetic cleanup and flow optimization
107
+ romanized = optimize_natural_flow(romanized)
108
+
109
+ return romanized if romanized else text
110
+
111
+ except Exception as e:
112
+ print(f"Transliteration error: {e}")
113
+ return text
114
+
115
+ def apply_systematic_phonetic_rules(iso_text):
116
+ """
117
+ Apply systematic phonetic rules based on linguistic principles
118
+ rather than manual character mappings
119
+ """
120
+ result = iso_text
121
+
122
+ # === VOWEL SYSTEM ===
123
+ # Long vowels -> natural doubling (how native speakers type)
124
+ vowel_rules = [
125
+ (r'ā', 'aa'), # long a
126
+ (r'ī', 'ii'), # long i
127
+ (r'ū', 'uu'), # long u
128
+ (r'ē', 'ee'), # long e (some prefer 'e', but 'ee' is clearer)
129
+ (r'ō', 'oo'), # long o (some prefer 'o', but 'oo' is clearer)
130
+ (r'ai', 'ai'), # diphthong ai
131
+ (r'au', 'au'), # diphthong au
132
+ (r'r̥', 'ru'), # vocalic r
133
+ (r'r̥̄', 'ruu'), # long vocalic r
134
+ (r'l̥', 'lu'), # vocalic l
135
+ (r'l̥̄', 'luu'), # long vocalic l
136
+ ]
137
+
138
+ # === CONSONANT SYSTEM ===
139
+ # Systematic consonant conversion based on phonetic properties
140
+ consonant_rules = [
141
+ # Nasals - context-sensitive
142
+ (r'ṅ', 'ng'), # velar nasal
143
+ (r'ñ', 'nj'), # palatal nasal (natural in South Indian typing)
144
+ (r'ṇ', 'n'), # retroflex nasal -> dental (natural simplification)
145
+ (r'n̆', 'n'), # any other nasal variants
146
+
147
+ # Stops - systematic by place of articulation
148
+ (r'([kg])h', r'\1h'), # keep aspirated velars
149
+ (r'([cj])h', r'\1h'), # keep aspirated palatals
150
+ (r'([ṭḍ])h', r'th'), # retroflex aspirated -> dental aspirated (natural)
151
+ (r'([td])h', r'\1h'), # keep dental aspirated
152
+ (r'([pb])h', r'\1h'), # keep labial aspirated
153
+
154
+ # Retroflex simplification (how native speakers naturally type)
155
+ (r'ṭ', 't'), # retroflex t -> dental t
156
+ (r'ḍ', 'd'), # retroflex d -> dental d
157
+ (r'ṇ', 'n'), # retroflex n -> dental n (already covered above)
158
+
159
+ # Liquids and approximants
160
+ (r'ṟ', 'r'), # Tamil/Malayalam retroflex r -> simple r
161
+ (r'ṛ', 'r'), # any other retroflex r -> simple r
162
+ (r'ḷ', 'l'), # retroflex l -> simple l (except for special cases)
163
+ (r'ḻ', 'zh'), # Tamil/Malayalam special l -> zh (important!)
164
+
165
+ # Sibilants - systematic
166
+ (r'ś', 'sh'), # palatal sibilant
167
+ (r'ṣ', 'sh'), # retroflex sibilant
168
+ (r's', 's'), # dental sibilant (unchanged)
169
+
170
+ # Fricatives and others
171
+ (r'ḥ', 'h'), # visarga -> simple h
172
+ (r'ḫ', 'h'), # any other h variants
173
+ (r'×', ''), # multiplication sign sometimes appears
174
+
175
+ # Common combinations (compound consonants)
176
+ (r'kṣ', 'ksh'), # kṣa combination
177
+ (r'jñ', 'gn'), # jña combination (natural pronunciation)
178
+ (r'śr', 'shr'), # śra combination
179
+ ]
180
+
181
+ # Apply vowel rules first
182
+ for pattern, replacement in vowel_rules:
183
+ result = re.sub(pattern, replacement, result)
184
+
185
+ # Apply consonant rules
186
+ for pattern, replacement in consonant_rules:
187
+ result = re.sub(pattern, replacement, result)
188
+
189
+ return result
190
+
191
+ def apply_natural_language_patterns(text, lang_choice):
192
+ """
193
+ Apply language-specific patterns that reflect how native speakers
194
+ naturally romanize their languages
195
+ """
196
+ if lang_choice == "Tamil":
197
+ return apply_tamil_natural_patterns(text)
198
+ elif lang_choice == "Malayalam":
199
+ return apply_malayalam_natural_patterns(text)
200
+
201
+ return text
202
+
203
+ def apply_tamil_natural_patterns(text):
204
+ """Tamil-specific natural romanization patterns"""
205
+
206
+ tamil_patterns = [
207
+ # Tamil-specific sounds
208
+ (r'ḻ', 'zh'), # Tamil zh sound (crucial)
209
+ (r'ṟ', 'r'), # Tamil r sound
210
+
211
+ # Natural doubling patterns in Tamil
212
+ (r'([kgcjṭḍtdpb])\1', r'\1\1'), # Keep natural gemination
213
+
214
+ # Tamil word-final patterns
215
+ (r'um$', 'um'), # Tamil suffix -um
216
+ (r'an$', 'an'), # Tamil suffix -an
217
+ (r'al$', 'al'), # Tamil suffix -al
218
+
219
+ # Natural vowel harmony adjustments
220
+ (r'([aeiou])u([mnlr])', r'\1\2u'), # Vowel + u + liquid/nasal
221
+ ]
222
+
223
+ for pattern, replacement in tamil_patterns:
224
+ text = re.sub(pattern, replacement, text)
225
+
226
+ return text
227
+
228
+ def apply_malayalam_natural_patterns(text):
229
+ """Malayalam-specific natural romanization patterns"""
230
+
231
+ malayalam_patterns = [
232
+ # Malayalam-specific sounds
233
+ (r'ḻ', 'zh'), # Malayalam zh sound (very important!)
234
+ (r'ṟ', 'r'), # Malayalam r sound
235
+
236
+ # Natural gemination in Malayalam
237
+ (r'([kgcjṭḍtdpb])\1', r'\1\1'), # Keep natural gemination
238
+
239
+ # Malayalam word patterns
240
+ (r'aanu$', 'aanu'), # Malayalam copula ending
241
+ (r'unnu$', 'unnu'), # Malayalam verb ending
242
+ (r'aam$', 'aam'), # Malayalam suffix
243
+
244
+ # Natural flow adjustments for Malayalam
245
+ (r'([aeiou])([mnlr])([aeiou])', r'\1\2\3'), # Vowel-liquid-vowel unchanged
246
+
247
+ # Handle Malayalam specific consonant clusters
248
+ (r'ngh', 'ngh'), # Keep ngh clusters
249
+ (r'mph', 'mph'), # Keep mph clusters
250
+ ]
251
+
252
+ for pattern, replacement in malayalam_patterns:
253
+ text = re.sub(pattern, replacement, text)
254
+
255
+ return text
256
+
257
+ def optimize_natural_flow(text):
258
+ """
259
+ Final optimization for natural reading flow -
260
+ how native speakers would actually type/read
261
+ """
262
+
263
+ # Remove any remaining diacritical marks using Unicode normalization
264
+ text = ''.join(c for c in unicodedata.normalize('NFD', text)
265
+ if unicodedata.category(c) != 'Mn')
266
+
267
+ # Natural flow optimization rules
268
+ flow_rules = [
269
+ # Vowel optimization for readability
270
+ (r'([aeiou])\1{2,}', r'\1\1'), # Max 2 repeated vowels
271
+ (r'aaa+', 'aa'), # Multiple a's -> aa
272
+ (r'iii+', 'ii'), # Multiple i's -> ii
273
+ (r'uuu+', 'uu'), # Multiple u's -> uu
274
+ (r'eee+', 'ee'), # Multiple e's -> ee
275
+ (r'ooo+', 'oo'), # Multiple o's -> oo
276
+
277
+ # Consonant cluster optimization
278
+ (r'([bcdfghjklmnpqrstvwxyz])\1{2,}', r'\1\1'), # Max 2 repeated consonants
279
+
280
+ # Natural word boundaries and spacing
281
+ (r'\s+', ' '), # Normalize spaces
282
+ (r'^\s+|\s+$', ''), # Trim leading/trailing spaces
283
+
284
+ # Handle common awkward sequences
285
+ (r'([aeiou])h([aeiou])', r'\1\2'), # Remove h between vowels if awkward
286
+ (r'([bcdfghjklmnpqrstvwxyz])y([bcdfghjklmnpqrstvwxyz])', r'\1i\2'), # y->i in consonant clusters
287
+
288
+ # Ensure readability of common endings
289
+ (r'([mnlr])u$', r'\1u'), # Keep natural endings
290
+ (r'([kgt])u$', r'\1u'), # Keep natural endings
291
+ ]
292
+
293
+ for pattern, replacement in flow_rules:
294
+ text = re.sub(pattern, replacement, text)
295
+
296
+ return text
297
+
298
+ def enhanced_phonetic_similarity_check(intended_roman, actual_roman):
299
+ """
300
+ Enhanced similarity check that accounts for natural variations
301
+ in how people might romanize the same sounds
302
+ """
303
+
304
+ # Define phonetically equivalent mappings
305
+ phonetic_equivalents = {
306
+ 'aa': ['a', 'aa'],
307
+ 'ii': ['i', 'ii'],
308
+ 'uu': ['u', 'uu'],
309
+ 'ee': ['e', 'ee'],
310
+ 'oo': ['o', 'oo'],
311
+ 'zh': ['zh', 'z', 'l'], # Common variations for zh sound
312
+ 'sh': ['sh', 's'], # sh vs s variations
313
+ 'ch': ['ch', 'c'], # ch vs c variations
314
+ 'th': ['th', 't'], # th vs t variations
315
+ 'dh': ['dh', 'd'], # dh vs d variations
316
+ 'ksh': ['ksh', 'ksh', 'ks'], # ksh variations
317
+ 'gn': ['gn', 'ny', 'nj'], # gn/ny/nj variations
318
+ }
319
+
320
+ # Normalize both strings for comparison
321
+ intended_normalized = normalize_for_comparison(intended_roman, phonetic_equivalents)
322
+ actual_normalized = normalize_for_comparison(actual_roman, phonetic_equivalents)
323
+
324
+ return intended_normalized, actual_normalized
325
+
326
+ def normalize_for_comparison(text, equivalents):
327
+ """Normalize text for phonetic comparison"""
328
+
329
+ text = text.lower().strip()
330
+
331
+ # Replace equivalents with canonical forms
332
+ for canonical, variants in equivalents.items():
333
+ for variant in variants:
334
+ text = text.replace(variant, canonical)
335
+
336
+ return text
337
+
338
  # ---------------- MEMORY OPTIMIZED MODEL LOADING ---------------- #
339
  # Store only currently loaded model to save memory
340
  current_model = {"language": None, "model": None, "processor": None}
 
400
  def get_random_sentence_with_transliteration(language_choice):
401
  sentence = random.choice(SENTENCE_BANK[language_choice])
402
  if language_choice in ["Tamil", "Malayalam"]:
403
+ # Use the new improved transliteration system
404
+ transliteration = transliterate_to_natural_roman(sentence, language_choice)
405
+ # Combine sentence with transliteration in the same box
406
+ combined_sentence = f"{sentence}\n\n🔤 {transliteration}"
407
+ return combined_sentence, transliteration
408
  else:
409
  return sentence, ""
410
 
 
435
  print(f"Transliteration error: {e}")
436
  return text
437
 
438
+ # Updated function that uses the new transliteration system
439
  def transliterate_to_simple_roman(text, lang_choice):
440
+ """
441
+ IMPROVED VERSION: Natural transliteration using systematic phonetic rules
442
+ """
443
+ return transliterate_to_natural_roman(text, lang_choice)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
 
445
  @spaces.GPU
446
  def transcribe_once(audio_path, language_choice, beam_size, temperature):
 
493
  # Remove punctuation and whitespace
494
  return word.strip().translate(str.maketrans('', '', string.punctuation)).lower()
495
 
496
+ def create_enhanced_tabular_feedback(intended, actual, lang_choice):
497
+ """
498
+ Enhanced feedback system with better phonetic comparison
499
+ """
500
 
501
+ # Get natural transliterations using the new system
502
+ intended_roman = transliterate_to_natural_roman(intended, lang_choice)
503
+ actual_roman = transliterate_to_natural_roman(actual, lang_choice)
 
 
504
 
505
  # Split into words for comparison
506
  intended_words = intended.strip().split()
 
508
  intended_roman_words = intended_roman.strip().split()
509
  actual_roman_words = actual_roman.strip().split()
510
 
511
+ # Calculate accuracy with phonetic awareness
512
  correct_words = 0
513
  total_words = len(intended_words)
514
 
515
  # Create word-by-word comparison table
516
  feedback_html = """
517
  <div style='font-family: Arial, sans-serif; padding: 20px; margin: 10px 0;'>
518
+ <h3 style='color: #2c3e50; margin-bottom: 20px; text-align: center;'>📊 Enhanced Pronunciation Analysis</h3>
519
  """
520
 
521
+ # Overview table with improved romanization
 
 
 
 
 
 
 
 
 
 
 
 
522
  feedback_html += """
523
  <div style='margin-bottom: 25px;'>
524
+ <h4 style='color: #34495e; margin-bottom: 15px;'>📝 Text Comparison (Improved Natural Romanization)</h4>
525
  <table style='width: 100%; border-collapse: collapse; border: 2px solid #ddd;'>
526
  <thead>
527
  <tr style='border-bottom: 2px solid #ddd;'>
528
  <th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Type</th>
529
  <th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Original Text</th>
530
+ <th style='padding: 15px; text-align: left; font-weight: bold; color: #2c3e50;'>Natural Romanization</th>
531
  </tr>
532
  </thead>
533
  <tbody>
 
546
  </div>
547
  """.format(intended, intended_roman, actual, actual_roman)
548
 
549
+ # Enhanced word-by-word analysis with phonetic awareness
550
  feedback_html += """
551
  <div style='margin-bottom: 25px;'>
552
+ <h4 style='color: #34495e; margin-bottom: 15px;'>🔍 Enhanced Word-by-Word Analysis</h4>
553
  <table style='width: 100%; border-collapse: collapse; border: 2px solid #ddd;'>
554
  <thead>
555
  <tr style='border-bottom: 2px solid #ddd;'>
556
  <th style='padding: 12px; text-align: center; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>#</th>
557
  <th style='padding: 12px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Expected Word</th>
558
  <th style='padding: 12px; text-align: left; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>What You Said</th>
559
+ <th style='padding: 12px; text-align: center; font-weight: bold; color: #2c3e50; border-right: 1px solid #ddd;'>Phonetic Match</th>
560
  <th style='padding: 12px; text-align: center; font-weight: bold; color: #2c3e50;'>Result</th>
561
  </tr>
562
  </thead>
563
  <tbody>
564
  """
565
 
566
+ # Enhanced word comparison with phonetic similarity
567
+ sm = difflib.SequenceMatcher(None, intended_words, actual_words)
 
 
568
  word_index = 0
569
 
570
  for tag, i1, i2, j1, j2 in sm.get_opcodes():
571
  if tag == 'equal':
572
+ # Correct words
573
  for idx, word in enumerate(intended_words[i1:i2]):
574
  word_index += 1
575
  correct_words += 1
 
588
  <div style='font-family: monospace; font-size: 16px; margin-bottom: 4px; color: #27ae60;'>{actual_word}</div>
589
  <div style='font-size: 13px; color: #888;'>({actual_roman_word})</div>
590
  </td>
591
+ <td style='padding: 12px; text-align: center; border-right: 1px solid #ddd;'>
592
+ <span style='color: #27ae60; font-weight: bold;'>Perfect</span>
593
+ </td>
594
  <td style='padding: 12px; text-align: center;'>
595
  <span style='color: #27ae60; font-weight: bold; font-size: 20px;'>✓</span>
596
+ <div style='font-size: 12px; color: #27ae60; margin-top: 2px;'>Exact</div>
597
  </td>
598
  </tr>
599
  """
600
 
601
  elif tag == 'replace':
602
+ # Check for phonetic similarity in replacements
603
  max_words = max(i2-i1, j2-j1)
604
  for idx in range(max_words):
605
  word_index += 1
 
608
  actual_word = actual_words[j1 + idx] if (j1 + idx) < j2 else ""
609
  actual_roman_word = actual_roman_words[j1 + idx] if (j1 + idx) < len(actual_roman_words) else ""
610
 
611
+ # Check phonetic similarity
612
+ if expected_roman and actual_roman_word:
613
+ norm_expected, norm_actual = enhanced_phonetic_similarity_check(expected_roman, actual_roman_word)
614
+ similarity_ratio = difflib.SequenceMatcher(None, norm_expected, norm_actual).ratio()
615
+
616
+ if similarity_ratio > 0.8: # High phonetic similarity
617
+ phonetic_match = "Very Close"
618
+ phonetic_color = "#f39c12"
619
+ result_icon = "≈"
620
+ result_text = "Similar"
621
+ correct_words += 0.8 # Partial credit
622
+ elif similarity_ratio > 0.6: # Moderate similarity
623
+ phonetic_match = "Close"
624
+ phonetic_color = "#e67e22"
625
+ result_icon = "~"
626
+ result_text = "Close"
627
+ correct_words += 0.5 # Partial credit
628
+ else:
629
+ phonetic_match = "Different"
630
+ phonetic_color = "#e74c3c"
631
+ result_icon = "✗"
632
+ result_text = "Different"
633
+ else:
634
+ phonetic_match = "Different"
635
+ phonetic_color = "#e74c3c"
636
+ result_icon = "✗"
637
+ result_text = "Different"
638
+
639
  feedback_html += f"""
640
  <tr style='border-bottom: 1px solid #eee;'>
641
  <td style='padding: 12px; text-align: center; font-weight: bold; color: #666; border-right: 1px solid #ddd;'>{word_index}</td>
 
644
  <div style='font-size: 13px; color: #888;'>({expected_roman})</div>
645
  </td>
646
  <td style='padding: 12px; border-right: 1px solid #ddd;'>
647
+ <div style='font-family: monospace; font-size: 16px; margin-bottom: 4px; color: {phonetic_color};'>{actual_word}</div>
648
  <div style='font-size: 13px; color: #888;'>({actual_roman_word})</div>
649
  </td>
650
+ <td style='padding: 12px; text-align: center; border-right: 1px solid #ddd;'>
651
+ <span style='color: {phonetic_color}; font-weight: bold;'>{phonetic_match}</span>
652
+ </td>
653
  <td style='padding: 12px; text-align: center;'>
654
+ <span style='color: {phonetic_color}; font-weight: bold; font-size: 20px;'>{result_icon}</span>
655
+ <div style='font-size: 12px; color: {phonetic_color}; margin-top: 2px;'>{result_text}</div>
656
  </td>
657
  </tr>
658
  """
 
672
  <td style='padding: 12px; color: #f39c12; font-style: italic; border-right: 1px solid #ddd;'>
673
  <em>Not spoken</em>
674
  </td>
675
+ <td style='padding: 12px; text-align: center; border-right: 1px solid #ddd;'>
676
+ <span style='color: #f39c12; font-weight: bold;'>Missing</span>
677
+ </td>
678
  <td style='padding: 12px; text-align: center;'>
679
  <span style='color: #f39c12; font-weight: bold; font-size: 20px;'>⚠</span>
680
  <div style='font-size: 12px; color: #f39c12; margin-top: 2px;'>Missing</div>
 
696
  <div style='font-family: monospace; font-size: 16px; margin-bottom: 4px; color: #9b59b6;'>{word}</div>
697
  <div style='font-size: 13px; color: #888;'>({actual_roman_word})</div>
698
  </td>
699
+ <td style='padding: 12px; text-align: center; border-right: 1px solid #ddd;'>
700
+ <span style='color: #9b59b6; font-weight: bold;'>Extra</span>
701
+ </td>
702
  <td style='padding: 12px; text-align: center;'>
703
  <span style='color: #9b59b6; font-weight: bold; font-size: 20px;'>+</span>
704
  <div style='font-size: 12px; color: #9b59b6; margin-top: 2px;'>Extra</div>
 
712
  </div>
713
  """
714
 
715
+ # Calculate enhanced accuracy
716
  accuracy = (correct_words / total_words * 100) if total_words > 0 else 0
717
 
718
+ # Enhanced summary section
719
  feedback_html += f"""
720
  <div style='background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 25px; border-radius: 12px; text-align: center; margin-top: 20px;'>
721
+ <h4 style='margin: 0 0 20px 0; font-size: 24px;'>🎯 Enhanced Pronunciation Score</h4>
722
  <div style='display: flex; justify-content: space-around; flex-wrap: wrap; gap: 20px;'>
723
  <div style='background: rgba(255,255,255,0.15); padding: 20px; border-radius: 12px; min-width: 160px;'>
724
  <div style='font-size: 40px; font-weight: bold; margin-bottom: 8px;'>{accuracy:.0f}%</div>
725
+ <div style='font-size: 16px; opacity: 0.9;'>Phonetic Accuracy</div>
726
  </div>
727
  <div style='background: rgba(255,255,255,0.15); padding: 20px; border-radius: 12px; min-width: 160px;'>
728
+ <div style='font-size: 40px; font-weight: bold; margin-bottom: 8px;'>{correct_words:.1f}/{total_words}</div>
729
+ <div style='font-size: 16px; opacity: 0.9;'>Words Matched</div>
730
  </div>
731
  </div>
732
+ <div style='margin-top: 15px; font-size: 14px; opacity: 0.8;'>
733
+ ✨ Now with enhanced phonetic matching for better accuracy!
734
+ </div>
735
  """
736
 
737
+ # Enhanced motivational message
738
  if accuracy >= 95:
739
+ feedback_html += "<div style='margin-top: 15px; font-size: 18px;'><span>🎉 Outstanding! Perfect natural pronunciation!</span></div>"
740
  elif accuracy >= 85:
741
+ feedback_html += "<div style='margin-top: 15px; font-size: 18px;'><span>🌟 Excellent! Very natural sounding!</span></div>"
742
  elif accuracy >= 70:
743
+ feedback_html += "<div style='margin-top: 15px; font-size: 18px;'><span>👍 Good job! Your pronunciation is improving!</span></div>"
744
  elif accuracy >= 50:
745
+ feedback_html += "<div style='margin-top: 15px; font-size: 18px;'><span>📚 Getting there! Focus on the highlighted sounds!</span></div>"
746
  else:
747
+ feedback_html += "<div style='margin-top: 15px; font-size: 18px;'><span>💪 Keep practicing! Every attempt makes you better!</span></div>"
 
 
 
 
 
748
 
749
+ feedback_html += "</div></div>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
750
 
751
  return feedback_html, accuracy
752
 
 
 
753
  # ---------------- MAIN ---------------- #
754
  @spaces.GPU
755
+ def compare_pronunciation(audio, lang_choice, intended_display_text, pass1_beam, pass1_temp):
756
+ if audio is None or not intended_display_text.strip():
757
  return ("⚠️ Please record audio and generate a sentence first.", "", "", "", "")
758
 
759
  try:
760
+ # Extract just the original sentence (before the transliteration part)
761
+ if "🔤" in intended_display_text:
762
+ intended_sentence = intended_display_text.split("🔤")[0].strip()
763
+ else:
764
+ intended_sentence = intended_display_text.strip()
765
+
766
  # Single transcription pass with user settings
767
  actual_text = transcribe_once(audio, lang_choice, pass1_beam, pass1_temp)
768
 
 
773
  wer_val = jiwer.wer(intended_sentence, actual_text)
774
  cer_val = jiwer.cer(intended_sentence, actual_text)
775
 
776
+ # Get improved transliterations for both texts
777
+ intended_roman = transliterate_to_natural_roman(intended_sentence, lang_choice)
778
+ actual_roman = transliterate_to_natural_roman(actual_text, lang_choice)
779
 
780
+ # Create enhanced tabular feedback with phonetic awareness
781
+ feedback_html, accuracy = create_enhanced_tabular_feedback(intended_sentence, actual_text, lang_choice)
782
 
783
  return (
784
  actual_text,
 
796
  # ---------------- UI ---------------- #
797
  with gr.Blocks(title="Pronunciation Comparator", theme=gr.themes.Soft()) as demo:
798
  gr.Markdown("""
799
+ # 🎙️ AI Pronunciation Coach (Enhanced)
800
  ### Practice English, Tamil & Malayalam with AI feedback
801
 
802
+ **New Features:**
803
+ - ✨ **Natural Romanization**: Improved Thanglish/Manglish that looks like how you actually type
804
+ - 🎯 **Phonetic Matching**: Gives partial credit for sounds that are close (zh/z/l variations)
805
+ - 📊 **Enhanced Feedback**: More accurate scoring with linguistic awareness
806
+
807
  **How to use:**
808
  1. Select your language
809
+ 2. Generate a practice sentence
810
  3. Record yourself reading it aloud
811
+ 4. Get instant enhanced feedback on your pronunciation!
812
  """)
813
 
814
  with gr.Row():
 
824
  intended_display = gr.Textbox(
825
  label="📝 Practice Sentence (Read this aloud)",
826
  interactive=False,
827
+ placeholder="Click 'Generate Practice Sentence' to get started...",
828
+ lines=3
 
 
 
 
 
 
 
829
  )
830
 
831
  with gr.Row():
 
845
  with gr.Row():
846
  with gr.Column():
847
  pass1_out = gr.Textbox(label="🗣️ What You Said", interactive=False)
848
+ actual_roman_out = gr.Textbox(label="🔤 Your Pronunciation (Natural Romanized)", interactive=False)
849
  with gr.Column():
850
  wer_out = gr.Textbox(label="📊 Word Error Rate", interactive=False)
851
  cer_out = gr.Textbox(label="📈 Character Error Rate", interactive=False)
852
 
853
+ gr.Markdown("### 📋 Enhanced Detailed Analysis")
854
  feedback_display = gr.HTML()
855
 
856
+ def get_sentence_for_display(language_choice):
857
+ sentence, transliteration = get_random_sentence_with_transliteration(language_choice)
858
+ return sentence
859
+
860
+ # Event handlers
861
+ gen_btn.click(
862
+ fn=get_sentence_for_display,
863
+ inputs=[lang_choice],
864
+ outputs=[intended_display]
865
+ )
866
+
867
+ analyze_btn.click(
868
+ fn=compare_pronunciation,
869
+ inputs=[audio_input, lang_choice, intended_display, pass1_beam, pass1_temp],
870
+ outputs=[pass1_out, actual_roman_out, wer_out, cer_out, feedback_display]
871
+ )
 
 
 
 
 
 
 
 
872
 
873
  if __name__ == "__main__":
874
  demo.launch()