Spaces:

sudhanm
/

whisper-largev2-raw-ta-ml

Running on Zero

App Files Files Community

sudhanm commited on 11 days ago

Commit

9df7f33

verified ·

1 Parent(s): 386695f

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -59

app.py CHANGED Viewed

@@ -26,27 +26,21 @@ print(f"🔧 Using device: {DEVICE}")
 LANG_CODES = {
     "English": "en",
     "Tamil": "ta",
-    "Malayalam": "ml",
-    "Hindi": "hi",
-    "Sanskrit": "sa"
 }
 # Updated model configurations for better HF Spaces compatibility
 ASR_MODELS = {
     "English": "openai/whisper-base.en",
     "Tamil": "vasista22/whisper-tamil-base",  # Community model for Tamil
-    "Malayalam": "parambharat/whisper-small-ml",  # Community model for Malayalam
-    "Hindi": "vasista22/whisper-hindi-base",  # Community model for Hindi
-    "Sanskrit": "vasista22/whisper-hindi-base"  # Fallback to Hindi for Sanskrit
 }
 # Backup models in case primary ones fail
 FALLBACK_MODELS = {
     "English": "openai/whisper-base.en",
     "Tamil": "openai/whisper-small",
-    "Malayalam": "openai/whisper-small",
-    "Hindi": "openai/whisper-small",
-    "Sanskrit": "openai/whisper-small"
 }
 LANG_PRIMERS = {
@@ -55,18 +49,12 @@ LANG_PRIMERS = {
     "Tamil": ("தமிழில் எழுதுக.",
               "தமிழ் எழுத்துக்களில் மட்டும் எழுதவும். உதாரணம்: இது ஒரு தமிழ் வாக்கியம்."),
     "Malayalam": ("മലയാളത്തിൽ എഴുതുക.",
-                  "മലയാള ലിപിയിൽ മാത്രം എഴുതുക. ഉദാഹരണം: ഇതൊരു മലയാള വാക്യമാണ്."),
-    "Hindi": ("हिंदी में लिखें।",
-              "केवल देवनागरी लिपि में लिखें। उदाहरण: यह एक हिंदी वाक्य है।"),
-    "Sanskrit": ("संस्कृते लिखत।",
-                 "देवनागरी लिपि में लिखें। उदाहरण: अहं संस्कृतं जानामि।")
 }
 SCRIPT_PATTERNS = {
     "Tamil": re.compile(r"[஀-௿]"),
     "Malayalam": re.compile(r"[ഀ-ൿ]"),
-    "Hindi": re.compile(r"[ऀ-ॿ]"),
-    "Sanskrit": re.compile(r"[ऀ-ॿ]"),
     "English": re.compile(r"[A-Za-z]")
 }
@@ -100,26 +88,6 @@ SENTENCE_BANK = {
         "സംഗീതം മനസ്സിന് സന്തോഷം നൽകുന്നു.",
         "കുടുംബസമയം വളരെ വിലപ്പെട്ടതാണ്.",
         "കഠിനാധ്വാനം എപ്പോഴും ഫലം നൽകും."
-    ],
-    "Hindi": [
-        "आज मौसम बहुत अच्छा है।",
-        "मुझे हिंदी बोलना पसंद है।",
-        "मैं रोज किताब पढ़ता हूँ।",
-        "भारत की संस्कृति विविधतापूर्ण है।",
-        "शिक्षा हमारे भविष्य की कुंजी है।",
-        "संगीत हमारे दिल को छूता है।",
-        "परिवार के साथ समय बिताना अनमोल है।",
-        "मेहनत का फल हमेशा मीठा होता है।"
-    ],
-    "Sanskrit": [
-        "अहं ग्रन्थं पठामि।",
-        "अद्य सूर्यः तेजस्वी अस्ति।",
-        "मम नाम रामः।",
-        "विद्या सर्वत्र पूज्यते।",
-        "सत्यमेव जयते।",
-        "गुरुर्ब्रह्मा गुरुर्विष्णुः।",
-        "वसुधैव कुटुम्बकम्।",
-        "श्रम एव विजयते।"
     ]
 }
@@ -389,10 +357,10 @@ def get_pronunciation_score(wer_val, cer_val):
 def compare_pronunciation(audio, language_choice, intended_sentence):
     """Main function to compare pronunciation"""
     if audio is None:
-        return ("❌ Please record audio first.", "", "", "", "", "", "", "", "", "")
     if not intended_sentence.strip():
-        return ("❌ Please generate a practice sentence first.", "", "", "", "", "", "", "", "", "")
     try:
         print(f"🔍 Analyzing pronunciation for {language_choice}...")
@@ -408,7 +376,7 @@ def compare_pronunciation(audio, language_choice, intended_sentence):
         # Handle transcription errors
         if actual_text.startswith("Error:"):
-            return (f"❌ {actual_text}", "", "", "", "", "", "", "", "", "")
         # Calculate error metrics
         try:
@@ -421,10 +389,13 @@ def compare_pronunciation(audio, language_choice, intended_sentence):
         # Get pronunciation score and feedback
         score_text, feedback = get_pronunciation_score(wer_val, cer_val)
-        # Transliteration for Indic scripts
-        hk_translit = transliterate_to_hk(actual_text, language_choice)
         if not is_script(actual_text, language_choice) and language_choice != "English":
-            hk_translit = f"⚠️ Expected {language_choice} script, got mixed/other script"
         # Visual feedback
         diff_html = highlight_differences(intended_sentence, actual_text)
@@ -437,19 +408,23 @@ def compare_pronunciation(audio, language_choice, intended_sentence):
             status,
             actual_text or "(No transcription)",
             corrected_text or "(No corrected transcription)",
-            hk_translit,
             f"{wer_val:.3f} ({(1-wer_val)*100:.1f}% word accuracy)",
             f"{cer_val:.3f} ({(1-cer_val)*100:.1f}% character accuracy)",
-            diff_html,
-            char_html,
-            intended_sentence,
-            f"🎯 Target: {intended_sentence}"
         )
     except Exception as e:
         error_msg = f"❌ Analysis Error: {str(e)[:200]}"
         print(f"Analysis error: {e}")
-        return (error_msg, "", "", "", "", "", "", "", "", "")
 # ---------------- UI ---------------- #
 def create_interface():
@@ -522,13 +497,8 @@ def create_interface():
                     interactive=False,
                     lines=2
                 )
-                cer_out = gr.Textbox(
-                    label="📊 Character Accuracy",
-                    interactive=False
-                )
-        hk_out = gr.Textbox(
-            label="🔤 Romanization (Harvard-Kyoto)",
             interactive=False
         )
@@ -558,14 +528,16 @@ def create_interface():
         # Event handlers
         def generate_and_clear(language):
             sentence = get_random_sentence(language)
-            return sentence, "", "", "", "", "", "", "", "", ""
         gen_btn.click(
             fn=generate_and_clear,
             inputs=[lang_choice],
             outputs=[
                 intended_display, status_output, pass1_out, pass2_out,
-                hk_out, wer_out, cer_out, diff_html_box, char_html_box, target_display
             ]
         )
@@ -573,8 +545,9 @@ def create_interface():
             fn=compare_pronunciation,
             inputs=[audio_input, lang_choice, intended_display],
             outputs=[
-                status_output, pass1_out, pass2_out, hk_out,
-                wer_out, cer_out, diff_html_box,
                 char_html_box, intended_display, target_display
             ]
         )

 LANG_CODES = {
     "English": "en",
     "Tamil": "ta",
+    "Malayalam": "ml"
 }
 # Updated model configurations for better HF Spaces compatibility
 ASR_MODELS = {
     "English": "openai/whisper-base.en",
     "Tamil": "vasista22/whisper-tamil-base",  # Community model for Tamil
+    "Malayalam": "parambharat/whisper-small-ml"  # Community model for Malayalam
 }
 # Backup models in case primary ones fail
 FALLBACK_MODELS = {
     "English": "openai/whisper-base.en",
     "Tamil": "openai/whisper-small",
+    "Malayalam": "openai/whisper-small"
 }
 LANG_PRIMERS = {
     "Tamil": ("தமிழில் எழுதுக.",
               "தமிழ் எழுத்துக்களில் மட்டும் எழுதவும். உதாரணம்: இது ஒரு தமிழ் வாக்கியம்."),
     "Malayalam": ("മലയാളത്തിൽ എഴുതുക.",
+                  "മലയാള ലിപിയിൽ മാത്രം എഴുതുക. ഉദാഹരണം: ഇതൊരു മലയാള വാക്യമാണ്.")
 }
 SCRIPT_PATTERNS = {
     "Tamil": re.compile(r"[஀-௿]"),
     "Malayalam": re.compile(r"[ഀ-ൿ]"),
     "English": re.compile(r"[A-Za-z]")
 }
         "സംഗീതം മനസ്സിന് സന്തോഷം നൽകുന്നു.",
         "കുടുംബസമയം വളരെ വിലപ്പെട്ടതാണ്.",
         "കഠിനാധ്വാനം എപ്പോഴും ഫലം നൽകും."
     ]
 }
 def compare_pronunciation(audio, language_choice, intended_sentence):
     """Main function to compare pronunciation"""
     if audio is None:
+        return ("❌ Please record audio first.", "", "", "", "", "", "", "", "", "", "", "", "")
     if not intended_sentence.strip():
+        return ("❌ Please generate a practice sentence first.", "", "", "", "", "", "", "", "", "", "", "", "")
     try:
         print(f"🔍 Analyzing pronunciation for {language_choice}...")
         # Handle transcription errors
         if actual_text.startswith("Error:"):
+            return (f"❌ {actual_text}", "", "", "", "", "", "", "", "", "", "", "", "")
         # Calculate error metrics
         try:
         # Get pronunciation score and feedback
         score_text, feedback = get_pronunciation_score(wer_val, cer_val)
+        # Transliterations for both actual and intended
+        actual_hk = transliterate_to_hk(actual_text, language_choice)
+        target_hk = transliterate_to_hk(intended_sentence, language_choice)
+        # Handle script mismatches
         if not is_script(actual_text, language_choice) and language_choice != "English":
+            actual_hk = f"⚠️ Expected {language_choice} script, got mixed/other script"
         # Visual feedback
         diff_html = highlight_differences(intended_sentence, actual_text)
             status,
             actual_text or "(No transcription)",
             corrected_text or "(No corrected transcription)",
             f"{wer_val:.3f} ({(1-wer_val)*100:.1f}% word accuracy)",
             f"{cer_val:.3f} ({(1-cer_val)*100:.1f}% character accuracy)",
+            # New visual feedback outputs
+            actual_text or "(No transcription)",  # actual_text_display
+            actual_hk,  # actual_transliteration
+            intended_sentence,  # target_text_display
+            target_hk,  # target_transliteration
+            diff_html,  # diff_html_box
+            char_html,  # char_html_box
+            intended_sentence,  # intended_display (unchanged)
+            f"🎯 Target: {intended_sentence}"  # target_display
         )
     except Exception as e:
         error_msg = f"❌ Analysis Error: {str(e)[:200]}"
         print(f"Analysis error: {e}")
+        return (error_msg, "", "", "", "", "", "", "", "", "", "", "", "")
 # ---------------- UI ---------------- #
 def create_interface():
                     interactive=False,
                     lines=2
                 )
+        cer_out = gr.Textbox(
+            label="📊 Character Accuracy",
             interactive=False
         )
         # Event handlers
         def generate_and_clear(language):
             sentence = get_random_sentence(language)
+            return sentence, "", "", "", "", "", "", "", "", "", "", "", ""
         gen_btn.click(
             fn=generate_and_clear,
             inputs=[lang_choice],
             outputs=[
                 intended_display, status_output, pass1_out, pass2_out,
+                wer_out, cer_out, actual_text_display, actual_transliteration,
+                target_text_display, target_transliteration, diff_html_box,
+                char_html_box, target_display
             ]
         )
             fn=compare_pronunciation,
             inputs=[audio_input, lang_choice, intended_display],
             outputs=[
+                status_output, pass1_out, pass2_out,
+                wer_out, cer_out, actual_text_display, actual_transliteration,
+                target_text_display, target_transliteration, diff_html_box,
                 char_html_box, intended_display, target_display
             ]
         )