Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -26,27 +26,21 @@ print(f"🔧 Using device: {DEVICE}")
|
|
26 |
LANG_CODES = {
|
27 |
"English": "en",
|
28 |
"Tamil": "ta",
|
29 |
-
"Malayalam": "ml"
|
30 |
-
"Hindi": "hi",
|
31 |
-
"Sanskrit": "sa"
|
32 |
}
|
33 |
|
34 |
# Updated model configurations for better HF Spaces compatibility
|
35 |
ASR_MODELS = {
|
36 |
"English": "openai/whisper-base.en",
|
37 |
"Tamil": "vasista22/whisper-tamil-base", # Community model for Tamil
|
38 |
-
"Malayalam": "parambharat/whisper-small-ml"
|
39 |
-
"Hindi": "vasista22/whisper-hindi-base", # Community model for Hindi
|
40 |
-
"Sanskrit": "vasista22/whisper-hindi-base" # Fallback to Hindi for Sanskrit
|
41 |
}
|
42 |
|
43 |
# Backup models in case primary ones fail
|
44 |
FALLBACK_MODELS = {
|
45 |
"English": "openai/whisper-base.en",
|
46 |
"Tamil": "openai/whisper-small",
|
47 |
-
"Malayalam": "openai/whisper-small"
|
48 |
-
"Hindi": "openai/whisper-small",
|
49 |
-
"Sanskrit": "openai/whisper-small"
|
50 |
}
|
51 |
|
52 |
LANG_PRIMERS = {
|
@@ -55,18 +49,12 @@ LANG_PRIMERS = {
|
|
55 |
"Tamil": ("தமிழில் எழுதுக.",
|
56 |
"தமிழ் எழுத்துக்களில் மட்டும் எழுதவும். உதாரணம்: இது ஒரு தமிழ் வாக்கியம்."),
|
57 |
"Malayalam": ("മലയാളത്തിൽ എഴുതുക.",
|
58 |
-
"മലയാള ലിപിയിൽ മാത്രം എഴുതുക. ഉദാഹരണം: ഇതൊരു മലയാള വാക്യമാണ്.")
|
59 |
-
"Hindi": ("हिंदी में लिखें।",
|
60 |
-
"केवल देवनागरी लिपि में लिखें। उदाहरण: यह एक हिंदी वाक्य है।"),
|
61 |
-
"Sanskrit": ("संस्कृते लिखत।",
|
62 |
-
"देवनागरी लिपि में लिखें। उदाहरण: अहं संस्कृतं जानामि।")
|
63 |
}
|
64 |
|
65 |
SCRIPT_PATTERNS = {
|
66 |
"Tamil": re.compile(r"[-]"),
|
67 |
"Malayalam": re.compile(r"[ഀ-ൿ]"),
|
68 |
-
"Hindi": re.compile(r"[ऀ-ॿ]"),
|
69 |
-
"Sanskrit": re.compile(r"[ऀ-ॿ]"),
|
70 |
"English": re.compile(r"[A-Za-z]")
|
71 |
}
|
72 |
|
@@ -100,26 +88,6 @@ SENTENCE_BANK = {
|
|
100 |
"സംഗീതം മനസ്സിന് സന്തോഷം നൽകുന്നു.",
|
101 |
"കുടുംബസമയം വളരെ വിലപ്പെട്ടതാണ്.",
|
102 |
"കഠിനാധ്വാനം എപ്പോഴും ഫലം നൽകും."
|
103 |
-
],
|
104 |
-
"Hindi": [
|
105 |
-
"आज मौसम बहुत अच्छा है।",
|
106 |
-
"मुझे हिंदी बोलना पसंद है।",
|
107 |
-
"मैं रोज किताब पढ़ता हूँ।",
|
108 |
-
"भारत की संस्कृति विविधतापूर्ण है।",
|
109 |
-
"शिक्षा हमारे भविष्य की कुंजी है।",
|
110 |
-
"संगीत हमारे दिल को छूता है।",
|
111 |
-
"परिवार के साथ समय बिताना अनमोल है।",
|
112 |
-
"मेहनत का फल हमेशा मीठा होता है।"
|
113 |
-
],
|
114 |
-
"Sanskrit": [
|
115 |
-
"अहं ग्रन्थं पठामि।",
|
116 |
-
"अद्य सूर्यः तेजस्वी अस्ति।",
|
117 |
-
"मम नाम रामः।",
|
118 |
-
"विद्या सर्वत्र पूज्यते।",
|
119 |
-
"सत्यमेव जयते।",
|
120 |
-
"गुरुर्ब्रह्मा गुरुर्विष्णुः।",
|
121 |
-
"वसुधैव कुटुम्बकम्।",
|
122 |
-
"श्रम एव विजयते।"
|
123 |
]
|
124 |
}
|
125 |
|
@@ -389,10 +357,10 @@ def get_pronunciation_score(wer_val, cer_val):
|
|
389 |
def compare_pronunciation(audio, language_choice, intended_sentence):
|
390 |
"""Main function to compare pronunciation"""
|
391 |
if audio is None:
|
392 |
-
return ("❌ Please record audio first.", "", "", "", "", "", "", "", "", "")
|
393 |
|
394 |
if not intended_sentence.strip():
|
395 |
-
return ("❌ Please generate a practice sentence first.", "", "", "", "", "", "", "", "", "")
|
396 |
|
397 |
try:
|
398 |
print(f"🔍 Analyzing pronunciation for {language_choice}...")
|
@@ -408,7 +376,7 @@ def compare_pronunciation(audio, language_choice, intended_sentence):
|
|
408 |
|
409 |
# Handle transcription errors
|
410 |
if actual_text.startswith("Error:"):
|
411 |
-
return (f"❌ {actual_text}", "", "", "", "", "", "", "", "", "")
|
412 |
|
413 |
# Calculate error metrics
|
414 |
try:
|
@@ -421,10 +389,13 @@ def compare_pronunciation(audio, language_choice, intended_sentence):
|
|
421 |
# Get pronunciation score and feedback
|
422 |
score_text, feedback = get_pronunciation_score(wer_val, cer_val)
|
423 |
|
424 |
-
#
|
425 |
-
|
|
|
|
|
|
|
426 |
if not is_script(actual_text, language_choice) and language_choice != "English":
|
427 |
-
|
428 |
|
429 |
# Visual feedback
|
430 |
diff_html = highlight_differences(intended_sentence, actual_text)
|
@@ -437,19 +408,23 @@ def compare_pronunciation(audio, language_choice, intended_sentence):
|
|
437 |
status,
|
438 |
actual_text or "(No transcription)",
|
439 |
corrected_text or "(No corrected transcription)",
|
440 |
-
hk_translit,
|
441 |
f"{wer_val:.3f} ({(1-wer_val)*100:.1f}% word accuracy)",
|
442 |
f"{cer_val:.3f} ({(1-cer_val)*100:.1f}% character accuracy)",
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
|
|
|
|
|
|
|
|
|
|
447 |
)
|
448 |
|
449 |
except Exception as e:
|
450 |
error_msg = f"❌ Analysis Error: {str(e)[:200]}"
|
451 |
print(f"Analysis error: {e}")
|
452 |
-
return (error_msg, "", "", "", "", "", "", "", "", "")
|
453 |
|
454 |
# ---------------- UI ---------------- #
|
455 |
def create_interface():
|
@@ -522,13 +497,8 @@ def create_interface():
|
|
522 |
interactive=False,
|
523 |
lines=2
|
524 |
)
|
525 |
-
|
526 |
-
|
527 |
-
interactive=False
|
528 |
-
)
|
529 |
-
|
530 |
-
hk_out = gr.Textbox(
|
531 |
-
label="🔤 Romanization (Harvard-Kyoto)",
|
532 |
interactive=False
|
533 |
)
|
534 |
|
@@ -558,14 +528,16 @@ def create_interface():
|
|
558 |
# Event handlers
|
559 |
def generate_and_clear(language):
|
560 |
sentence = get_random_sentence(language)
|
561 |
-
return sentence, "", "", "", "", "", "", "", "", ""
|
562 |
|
563 |
gen_btn.click(
|
564 |
fn=generate_and_clear,
|
565 |
inputs=[lang_choice],
|
566 |
outputs=[
|
567 |
intended_display, status_output, pass1_out, pass2_out,
|
568 |
-
|
|
|
|
|
569 |
]
|
570 |
)
|
571 |
|
@@ -573,8 +545,9 @@ def create_interface():
|
|
573 |
fn=compare_pronunciation,
|
574 |
inputs=[audio_input, lang_choice, intended_display],
|
575 |
outputs=[
|
576 |
-
status_output, pass1_out, pass2_out,
|
577 |
-
wer_out, cer_out,
|
|
|
578 |
char_html_box, intended_display, target_display
|
579 |
]
|
580 |
)
|
|
|
26 |
LANG_CODES = {
|
27 |
"English": "en",
|
28 |
"Tamil": "ta",
|
29 |
+
"Malayalam": "ml"
|
|
|
|
|
30 |
}
|
31 |
|
32 |
# Updated model configurations for better HF Spaces compatibility
|
33 |
ASR_MODELS = {
|
34 |
"English": "openai/whisper-base.en",
|
35 |
"Tamil": "vasista22/whisper-tamil-base", # Community model for Tamil
|
36 |
+
"Malayalam": "parambharat/whisper-small-ml" # Community model for Malayalam
|
|
|
|
|
37 |
}
|
38 |
|
39 |
# Backup models in case primary ones fail
|
40 |
FALLBACK_MODELS = {
|
41 |
"English": "openai/whisper-base.en",
|
42 |
"Tamil": "openai/whisper-small",
|
43 |
+
"Malayalam": "openai/whisper-small"
|
|
|
|
|
44 |
}
|
45 |
|
46 |
LANG_PRIMERS = {
|
|
|
49 |
"Tamil": ("தமிழில் எழுதுக.",
|
50 |
"தமிழ் எழுத்துக்களில் மட்டும் எழுதவும். உதாரணம்: இது ஒரு தமிழ் வாக்கியம்."),
|
51 |
"Malayalam": ("മലയാളത്തിൽ എഴുതുക.",
|
52 |
+
"മലയാള ലിപിയിൽ മാത്രം എഴുതുക. ഉദാഹരണം: ഇതൊരു മലയാള വാക്യമാണ്.")
|
|
|
|
|
|
|
|
|
53 |
}
|
54 |
|
55 |
SCRIPT_PATTERNS = {
|
56 |
"Tamil": re.compile(r"[-]"),
|
57 |
"Malayalam": re.compile(r"[ഀ-ൿ]"),
|
|
|
|
|
58 |
"English": re.compile(r"[A-Za-z]")
|
59 |
}
|
60 |
|
|
|
88 |
"സംഗീതം മനസ്സിന് സന്തോഷം നൽകുന്നു.",
|
89 |
"കുടുംബസമയം വളരെ വിലപ്പെട്ടതാണ്.",
|
90 |
"കഠിനാധ്വാനം എപ്പോഴും ഫലം നൽകും."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
]
|
92 |
}
|
93 |
|
|
|
357 |
def compare_pronunciation(audio, language_choice, intended_sentence):
|
358 |
"""Main function to compare pronunciation"""
|
359 |
if audio is None:
|
360 |
+
return ("❌ Please record audio first.", "", "", "", "", "", "", "", "", "", "", "", "")
|
361 |
|
362 |
if not intended_sentence.strip():
|
363 |
+
return ("❌ Please generate a practice sentence first.", "", "", "", "", "", "", "", "", "", "", "", "")
|
364 |
|
365 |
try:
|
366 |
print(f"🔍 Analyzing pronunciation for {language_choice}...")
|
|
|
376 |
|
377 |
# Handle transcription errors
|
378 |
if actual_text.startswith("Error:"):
|
379 |
+
return (f"❌ {actual_text}", "", "", "", "", "", "", "", "", "", "", "", "")
|
380 |
|
381 |
# Calculate error metrics
|
382 |
try:
|
|
|
389 |
# Get pronunciation score and feedback
|
390 |
score_text, feedback = get_pronunciation_score(wer_val, cer_val)
|
391 |
|
392 |
+
# Transliterations for both actual and intended
|
393 |
+
actual_hk = transliterate_to_hk(actual_text, language_choice)
|
394 |
+
target_hk = transliterate_to_hk(intended_sentence, language_choice)
|
395 |
+
|
396 |
+
# Handle script mismatches
|
397 |
if not is_script(actual_text, language_choice) and language_choice != "English":
|
398 |
+
actual_hk = f"⚠️ Expected {language_choice} script, got mixed/other script"
|
399 |
|
400 |
# Visual feedback
|
401 |
diff_html = highlight_differences(intended_sentence, actual_text)
|
|
|
408 |
status,
|
409 |
actual_text or "(No transcription)",
|
410 |
corrected_text or "(No corrected transcription)",
|
|
|
411 |
f"{wer_val:.3f} ({(1-wer_val)*100:.1f}% word accuracy)",
|
412 |
f"{cer_val:.3f} ({(1-cer_val)*100:.1f}% character accuracy)",
|
413 |
+
# New visual feedback outputs
|
414 |
+
actual_text or "(No transcription)", # actual_text_display
|
415 |
+
actual_hk, # actual_transliteration
|
416 |
+
intended_sentence, # target_text_display
|
417 |
+
target_hk, # target_transliteration
|
418 |
+
diff_html, # diff_html_box
|
419 |
+
char_html, # char_html_box
|
420 |
+
intended_sentence, # intended_display (unchanged)
|
421 |
+
f"🎯 Target: {intended_sentence}" # target_display
|
422 |
)
|
423 |
|
424 |
except Exception as e:
|
425 |
error_msg = f"❌ Analysis Error: {str(e)[:200]}"
|
426 |
print(f"Analysis error: {e}")
|
427 |
+
return (error_msg, "", "", "", "", "", "", "", "", "", "", "", "")
|
428 |
|
429 |
# ---------------- UI ---------------- #
|
430 |
def create_interface():
|
|
|
497 |
interactive=False,
|
498 |
lines=2
|
499 |
)
|
500 |
+
cer_out = gr.Textbox(
|
501 |
+
label="📊 Character Accuracy",
|
|
|
|
|
|
|
|
|
|
|
502 |
interactive=False
|
503 |
)
|
504 |
|
|
|
528 |
# Event handlers
|
529 |
def generate_and_clear(language):
|
530 |
sentence = get_random_sentence(language)
|
531 |
+
return sentence, "", "", "", "", "", "", "", "", "", "", "", ""
|
532 |
|
533 |
gen_btn.click(
|
534 |
fn=generate_and_clear,
|
535 |
inputs=[lang_choice],
|
536 |
outputs=[
|
537 |
intended_display, status_output, pass1_out, pass2_out,
|
538 |
+
wer_out, cer_out, actual_text_display, actual_transliteration,
|
539 |
+
target_text_display, target_transliteration, diff_html_box,
|
540 |
+
char_html_box, target_display
|
541 |
]
|
542 |
)
|
543 |
|
|
|
545 |
fn=compare_pronunciation,
|
546 |
inputs=[audio_input, lang_choice, intended_display],
|
547 |
outputs=[
|
548 |
+
status_output, pass1_out, pass2_out,
|
549 |
+
wer_out, cer_out, actual_text_display, actual_transliteration,
|
550 |
+
target_text_display, target_transliteration, diff_html_box,
|
551 |
char_html_box, intended_display, target_display
|
552 |
]
|
553 |
)
|