sudhanm commited on
Commit
9df7f33
·
verified ·
1 Parent(s): 386695f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -59
app.py CHANGED
@@ -26,27 +26,21 @@ print(f"🔧 Using device: {DEVICE}")
26
  LANG_CODES = {
27
  "English": "en",
28
  "Tamil": "ta",
29
- "Malayalam": "ml",
30
- "Hindi": "hi",
31
- "Sanskrit": "sa"
32
  }
33
 
34
  # Updated model configurations for better HF Spaces compatibility
35
  ASR_MODELS = {
36
  "English": "openai/whisper-base.en",
37
  "Tamil": "vasista22/whisper-tamil-base", # Community model for Tamil
38
- "Malayalam": "parambharat/whisper-small-ml", # Community model for Malayalam
39
- "Hindi": "vasista22/whisper-hindi-base", # Community model for Hindi
40
- "Sanskrit": "vasista22/whisper-hindi-base" # Fallback to Hindi for Sanskrit
41
  }
42
 
43
  # Backup models in case primary ones fail
44
  FALLBACK_MODELS = {
45
  "English": "openai/whisper-base.en",
46
  "Tamil": "openai/whisper-small",
47
- "Malayalam": "openai/whisper-small",
48
- "Hindi": "openai/whisper-small",
49
- "Sanskrit": "openai/whisper-small"
50
  }
51
 
52
  LANG_PRIMERS = {
@@ -55,18 +49,12 @@ LANG_PRIMERS = {
55
  "Tamil": ("தமிழில் எழுதுக.",
56
  "தமிழ் எழுத்துக்களில் மட்டும் எழுதவும். உதாரணம்: இது ஒரு தமிழ் வாக்கியம்."),
57
  "Malayalam": ("മലയാളത്തിൽ എഴുതുക.",
58
- "മലയാള ലിപിയിൽ മാത്രം എഴുതുക. ഉദാഹരണം: ഇതൊരു മലയാള വാക്യമാണ്."),
59
- "Hindi": ("हिंदी में लिखें।",
60
- "केवल देवनागरी लिपि में लिखें। उदाहरण: यह एक हिंदी वाक्य है।"),
61
- "Sanskrit": ("संस्कृते लिखत।",
62
- "देवनागरी लिपि में लिखें। उदाहरण: अहं संस्कृतं जानामि।")
63
  }
64
 
65
  SCRIPT_PATTERNS = {
66
  "Tamil": re.compile(r"[஀-௿]"),
67
  "Malayalam": re.compile(r"[ഀ-ൿ]"),
68
- "Hindi": re.compile(r"[ऀ-ॿ]"),
69
- "Sanskrit": re.compile(r"[ऀ-ॿ]"),
70
  "English": re.compile(r"[A-Za-z]")
71
  }
72
 
@@ -100,26 +88,6 @@ SENTENCE_BANK = {
100
  "സംഗീതം മനസ്സിന് സന്തോഷം നൽകുന്നു.",
101
  "കുടുംബസമയം വളരെ വിലപ്പെട്ടതാണ്.",
102
  "കഠിനാധ്വാനം എപ്പോഴും ഫലം നൽകും."
103
- ],
104
- "Hindi": [
105
- "आज मौसम बहुत अच्छा है।",
106
- "मुझे हिंदी बोलना पसंद है।",
107
- "मैं रोज किताब पढ़ता हूँ।",
108
- "भारत की संस्कृति विविधतापूर्ण है।",
109
- "शिक्षा हमारे भविष्य की कुंजी है।",
110
- "संगीत हमारे दिल को छूता है।",
111
- "परिवार के साथ समय बिताना अनमोल है।",
112
- "मेहनत का फल हमेशा मीठा होता है।"
113
- ],
114
- "Sanskrit": [
115
- "अहं ग्रन्थं पठामि।",
116
- "अद्य सूर्यः तेजस्वी अस्ति।",
117
- "मम नाम रामः।",
118
- "विद्या सर्वत्र पूज्यते।",
119
- "सत्यमेव जयते।",
120
- "गुरुर्ब्रह्मा गुरुर्विष्णुः।",
121
- "वसुधैव कुटुम्बकम्।",
122
- "श्रम एव विजयते।"
123
  ]
124
  }
125
 
@@ -389,10 +357,10 @@ def get_pronunciation_score(wer_val, cer_val):
389
  def compare_pronunciation(audio, language_choice, intended_sentence):
390
  """Main function to compare pronunciation"""
391
  if audio is None:
392
- return ("❌ Please record audio first.", "", "", "", "", "", "", "", "", "")
393
 
394
  if not intended_sentence.strip():
395
- return ("❌ Please generate a practice sentence first.", "", "", "", "", "", "", "", "", "")
396
 
397
  try:
398
  print(f"🔍 Analyzing pronunciation for {language_choice}...")
@@ -408,7 +376,7 @@ def compare_pronunciation(audio, language_choice, intended_sentence):
408
 
409
  # Handle transcription errors
410
  if actual_text.startswith("Error:"):
411
- return (f"❌ {actual_text}", "", "", "", "", "", "", "", "", "")
412
 
413
  # Calculate error metrics
414
  try:
@@ -421,10 +389,13 @@ def compare_pronunciation(audio, language_choice, intended_sentence):
421
  # Get pronunciation score and feedback
422
  score_text, feedback = get_pronunciation_score(wer_val, cer_val)
423
 
424
- # Transliteration for Indic scripts
425
- hk_translit = transliterate_to_hk(actual_text, language_choice)
 
 
 
426
  if not is_script(actual_text, language_choice) and language_choice != "English":
427
- hk_translit = f"⚠️ Expected {language_choice} script, got mixed/other script"
428
 
429
  # Visual feedback
430
  diff_html = highlight_differences(intended_sentence, actual_text)
@@ -437,19 +408,23 @@ def compare_pronunciation(audio, language_choice, intended_sentence):
437
  status,
438
  actual_text or "(No transcription)",
439
  corrected_text or "(No corrected transcription)",
440
- hk_translit,
441
  f"{wer_val:.3f} ({(1-wer_val)*100:.1f}% word accuracy)",
442
  f"{cer_val:.3f} ({(1-cer_val)*100:.1f}% character accuracy)",
443
- diff_html,
444
- char_html,
445
- intended_sentence,
446
- f"🎯 Target: {intended_sentence}"
 
 
 
 
 
447
  )
448
 
449
  except Exception as e:
450
  error_msg = f"❌ Analysis Error: {str(e)[:200]}"
451
  print(f"Analysis error: {e}")
452
- return (error_msg, "", "", "", "", "", "", "", "", "")
453
 
454
  # ---------------- UI ---------------- #
455
  def create_interface():
@@ -522,13 +497,8 @@ def create_interface():
522
  interactive=False,
523
  lines=2
524
  )
525
- cer_out = gr.Textbox(
526
- label="📊 Character Accuracy",
527
- interactive=False
528
- )
529
-
530
- hk_out = gr.Textbox(
531
- label="🔤 Romanization (Harvard-Kyoto)",
532
  interactive=False
533
  )
534
 
@@ -558,14 +528,16 @@ def create_interface():
558
  # Event handlers
559
  def generate_and_clear(language):
560
  sentence = get_random_sentence(language)
561
- return sentence, "", "", "", "", "", "", "", "", ""
562
 
563
  gen_btn.click(
564
  fn=generate_and_clear,
565
  inputs=[lang_choice],
566
  outputs=[
567
  intended_display, status_output, pass1_out, pass2_out,
568
- hk_out, wer_out, cer_out, diff_html_box, char_html_box, target_display
 
 
569
  ]
570
  )
571
 
@@ -573,8 +545,9 @@ def create_interface():
573
  fn=compare_pronunciation,
574
  inputs=[audio_input, lang_choice, intended_display],
575
  outputs=[
576
- status_output, pass1_out, pass2_out, hk_out,
577
- wer_out, cer_out, diff_html_box,
 
578
  char_html_box, intended_display, target_display
579
  ]
580
  )
 
26
  LANG_CODES = {
27
  "English": "en",
28
  "Tamil": "ta",
29
+ "Malayalam": "ml"
 
 
30
  }
31
 
32
  # Updated model configurations for better HF Spaces compatibility
33
  ASR_MODELS = {
34
  "English": "openai/whisper-base.en",
35
  "Tamil": "vasista22/whisper-tamil-base", # Community model for Tamil
36
+ "Malayalam": "parambharat/whisper-small-ml" # Community model for Malayalam
 
 
37
  }
38
 
39
  # Backup models in case primary ones fail
40
  FALLBACK_MODELS = {
41
  "English": "openai/whisper-base.en",
42
  "Tamil": "openai/whisper-small",
43
+ "Malayalam": "openai/whisper-small"
 
 
44
  }
45
 
46
  LANG_PRIMERS = {
 
49
  "Tamil": ("தமிழில் எழுதுக.",
50
  "தமிழ் எழுத்துக்களில் மட்டும் எழுதவும். உதாரணம்: இது ஒரு தமிழ் வாக்கியம்."),
51
  "Malayalam": ("മലയാളത്തിൽ എഴുതുക.",
52
+ "മലയാള ലിപിയിൽ മാത്രം എഴുതുക. ഉദാഹരണം: ഇതൊരു മലയാള വാക്യമാണ്.")
 
 
 
 
53
  }
54
 
55
  SCRIPT_PATTERNS = {
56
  "Tamil": re.compile(r"[஀-௿]"),
57
  "Malayalam": re.compile(r"[ഀ-ൿ]"),
 
 
58
  "English": re.compile(r"[A-Za-z]")
59
  }
60
 
 
88
  "സംഗീതം മനസ്സിന് സന്തോഷം നൽകുന്നു.",
89
  "കുടുംബസമയം വളരെ വിലപ്പെട്ടതാണ്.",
90
  "കഠിനാധ്വാനം എപ്പോഴും ഫലം നൽകും."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  ]
92
  }
93
 
 
357
  def compare_pronunciation(audio, language_choice, intended_sentence):
358
  """Main function to compare pronunciation"""
359
  if audio is None:
360
+ return ("❌ Please record audio first.", "", "", "", "", "", "", "", "", "", "", "", "")
361
 
362
  if not intended_sentence.strip():
363
+ return ("❌ Please generate a practice sentence first.", "", "", "", "", "", "", "", "", "", "", "", "")
364
 
365
  try:
366
  print(f"🔍 Analyzing pronunciation for {language_choice}...")
 
376
 
377
  # Handle transcription errors
378
  if actual_text.startswith("Error:"):
379
+ return (f"❌ {actual_text}", "", "", "", "", "", "", "", "", "", "", "", "")
380
 
381
  # Calculate error metrics
382
  try:
 
389
  # Get pronunciation score and feedback
390
  score_text, feedback = get_pronunciation_score(wer_val, cer_val)
391
 
392
+ # Transliterations for both actual and intended
393
+ actual_hk = transliterate_to_hk(actual_text, language_choice)
394
+ target_hk = transliterate_to_hk(intended_sentence, language_choice)
395
+
396
+ # Handle script mismatches
397
  if not is_script(actual_text, language_choice) and language_choice != "English":
398
+ actual_hk = f"⚠️ Expected {language_choice} script, got mixed/other script"
399
 
400
  # Visual feedback
401
  diff_html = highlight_differences(intended_sentence, actual_text)
 
408
  status,
409
  actual_text or "(No transcription)",
410
  corrected_text or "(No corrected transcription)",
 
411
  f"{wer_val:.3f} ({(1-wer_val)*100:.1f}% word accuracy)",
412
  f"{cer_val:.3f} ({(1-cer_val)*100:.1f}% character accuracy)",
413
+ # New visual feedback outputs
414
+ actual_text or "(No transcription)", # actual_text_display
415
+ actual_hk, # actual_transliteration
416
+ intended_sentence, # target_text_display
417
+ target_hk, # target_transliteration
418
+ diff_html, # diff_html_box
419
+ char_html, # char_html_box
420
+ intended_sentence, # intended_display (unchanged)
421
+ f"🎯 Target: {intended_sentence}" # target_display
422
  )
423
 
424
  except Exception as e:
425
  error_msg = f"❌ Analysis Error: {str(e)[:200]}"
426
  print(f"Analysis error: {e}")
427
+ return (error_msg, "", "", "", "", "", "", "", "", "", "", "", "")
428
 
429
  # ---------------- UI ---------------- #
430
  def create_interface():
 
497
  interactive=False,
498
  lines=2
499
  )
500
+ cer_out = gr.Textbox(
501
+ label="📊 Character Accuracy",
 
 
 
 
 
502
  interactive=False
503
  )
504
 
 
528
  # Event handlers
529
  def generate_and_clear(language):
530
  sentence = get_random_sentence(language)
531
+ return sentence, "", "", "", "", "", "", "", "", "", "", "", ""
532
 
533
  gen_btn.click(
534
  fn=generate_and_clear,
535
  inputs=[lang_choice],
536
  outputs=[
537
  intended_display, status_output, pass1_out, pass2_out,
538
+ wer_out, cer_out, actual_text_display, actual_transliteration,
539
+ target_text_display, target_transliteration, diff_html_box,
540
+ char_html_box, target_display
541
  ]
542
  )
543
 
 
545
  fn=compare_pronunciation,
546
  inputs=[audio_input, lang_choice, intended_display],
547
  outputs=[
548
+ status_output, pass1_out, pass2_out,
549
+ wer_out, cer_out, actual_text_display, actual_transliteration,
550
+ target_text_display, target_transliteration, diff_html_box,
551
  char_html_box, intended_display, target_display
552
  ]
553
  )