Spaces:

sudhanm
/

whisper-largev2-raw-ta-ml

Sleeping

App Files Files Community

sudhanm commited on 15 days ago

Commit

2fbc223

verified ·

1 Parent(s): 3940c6b

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -18

app.py CHANGED Viewed

@@ -131,30 +131,95 @@ def transcribe_once(audio_path, language_choice, initial_prompt, beam_size, temp
 def highlight_differences(ref, hyp):
     ref_words, hyp_words = ref.strip().split(), hyp.strip().split()
     sm = difflib.SequenceMatcher(None, ref_words, hyp_words)
-    out_html = []
     for tag, i1, i2, j1, j2 in sm.get_opcodes():
         if tag == 'equal':
-            out_html.extend([f"<span style='color:green'>{w}</span>" for w in ref_words[i1:i2]])
         elif tag == 'replace':
-            out_html.extend([f"<span style='color:red'>{w}</span>" for w in ref_words[i1:i2]])
-            out_html.extend([f"<span style='color:orange'>{w}</span>" for w in hyp_words[j1:j2]])
         elif tag == 'delete':
-            out_html.extend([f"<span style='color:red;text-decoration:line-through'>{w}</span>" for w in ref_words[i1:i2]])
         elif tag == 'insert':
-            out_html.extend([f"<span style='color:orange'>{w}</span>" for w in hyp_words[j1:j2]])
-    return " ".join(out_html)
 def char_level_highlight(ref, hyp):
     sm = difflib.SequenceMatcher(None, list(ref), list(hyp))
-    out = []
     for tag, i1, i2, j1, j2 in sm.get_opcodes():
         if tag == 'equal':
-            out.extend([f"<span style='color:green'>{c}</span>" for c in ref[i1:i2]])
-        elif tag in ('replace', 'delete'):
-            out.extend([f"<span style='color:red;text-decoration:underline'>{c}</span>" for c in ref[i1:i2]])
         elif tag == 'insert':
-            out.extend([f"<span style='color:orange'>{c}</span>" for c in hyp[j1:j2]])
-    return "".join(out)
 # ---------------- MAIN ---------------- #
 @spaces.GPU
@@ -210,18 +275,21 @@ with gr.Blocks(title="Pronunciation Comparator") as demo:
     submit_btn = gr.Button("🔍 Analyze Pronunciation", variant="primary")
     with gr.Row():
         pass1_out = gr.Textbox(label="Pass 1: What You Actually Said")
         pass2_out = gr.Textbox(label="Pass 2: Target-Biased Output")
     with gr.Row():
         hk_out = gr.Textbox(label="Harvard-Kyoto Transliteration (Pass 1)")
-        wer_out = gr.Textbox(label="Word Error Rate")
-        cer_out = gr.Textbox(label="Character Error Rate")
-    gr.Markdown("### Visual Feedback")
-    diff_html_box = gr.HTML(label="Word Differences Highlighted")
-    char_html_box = gr.HTML(label="Character-Level Highlighting (mispronounced = red underline)")
     # Event handlers
     gen_btn.click(fn=get_random_sentence, inputs=[lang_choice], outputs=[intended_display])

 def highlight_differences(ref, hyp):
     ref_words, hyp_words = ref.strip().split(), hyp.strip().split()
     sm = difflib.SequenceMatcher(None, ref_words, hyp_words)
+    # Create side-by-side comparison
+    expected_html = []
+    actual_html = []
     for tag, i1, i2, j1, j2 in sm.get_opcodes():
         if tag == 'equal':
+            # Correct words - green background
+            expected_html.extend([f"<span style='background-color:#d4edda; color:#155724; padding:2px 4px; margin:1px; border-radius:3px;'>{w}</span>" for w in ref_words[i1:i2]])
+            actual_html.extend([f"<span style='background-color:#d4edda; color:#155724; padding:2px 4px; margin:1px; border-radius:3px;'>{w}</span>" for w in hyp_words[j1:j2]])
         elif tag == 'replace':
+            # Substituted words - red for expected, orange for actual
+            expected_html.extend([f"<span style='background-color:#f8d7da; color:#721c24; padding:2px 4px; margin:1px; border-radius:3px; text-decoration:underline;'>{w}</span>" for w in ref_words[i1:i2]])
+            actual_html.extend([f"<span style='background-color:#fff3cd; color:#856404; padding:2px 4px; margin:1px; border-radius:3px; font-weight:bold;'>{w}</span>" for w in hyp_words[j1:j2]])
         elif tag == 'delete':
+            # Missing words - red with strikethrough
+            expected_html.extend([f"<span style='background-color:#f8d7da; color:#721c24; padding:2px 4px; margin:1px; border-radius:3px; text-decoration:line-through;'>{w}</span>" for w in ref_words[i1:i2]])
         elif tag == 'insert':
+            # Extra words - orange
+            actual_html.extend([f"<span style='background-color:#fff3cd; color:#856404; padding:2px 4px; margin:1px; border-radius:3px; font-weight:bold;'>+{w}</span>" for w in hyp_words[j1:j2]])
+    # Create the comparison HTML
+    comparison_html = f"""
+    <div style='font-family: monospace; line-height: 2;'>
+        <div style='margin-bottom: 15px;'>
+            <strong>📝 Expected:</strong><br>
+            <div style='padding: 10px; background-color: #f8f9fa; border-radius: 5px; margin-top: 5px;'>
+                {" ".join(expected_html)}
+            </div>
+        </div>
+        <div style='margin-bottom: 15px;'>
+            <strong>🎤 You said:</strong><br>
+            <div style='padding: 10px; background-color: #f8f9fa; border-radius: 5px; margin-top: 5px;'>
+                {" ".join(actual_html)}
+            </div>
+        </div>
+        <div style='font-size: 12px; color: #6c757d; margin-top: 10px;'>
+            <span style='background-color:#d4edda; padding:2px 4px; border-radius:3px;'>✓ Correct</span>
+            <span style='background-color:#f8d7da; padding:2px 4px; border-radius:3px; margin-left:5px;'>✗ Expected</span>
+            <span style='background-color:#fff3cd; padding:2px 4px; border-radius:3px; margin-left:5px;'>+ Extra/Wrong</span>
+        </div>
+    </div>
+    """
+    return comparison_html
 def char_level_highlight(ref, hyp):
     sm = difflib.SequenceMatcher(None, list(ref), list(hyp))
+    expected_chars = []
+    actual_chars = []
     for tag, i1, i2, j1, j2 in sm.get_opcodes():
         if tag == 'equal':
+            # Correct characters - green background
+            expected_chars.extend([f"<span style='background-color:#d4edda; color:#155724;'>{c}</span>" for c in ref[i1:i2]])
+            actual_chars.extend([f"<span style='background-color:#d4edda; color:#155724;'>{c}</span>" for c in hyp[j1:j2]])
+        elif tag == 'replace':
+            # Different characters - red for expected, orange for actual
+            expected_chars.extend([f"<span style='background-color:#f8d7da; color:#721c24; text-decoration:underline;'>{c}</span>" for c in ref[i1:i2]])
+            actual_chars.extend([f"<span style='background-color:#fff3cd; color:#856404; font-weight:bold;'>{c}</span>" for c in hyp[j1:j2]])
+        elif tag == 'delete':
+            # Missing characters - red with strikethrough
+            expected_chars.extend([f"<span style='background-color:#f8d7da; color:#721c24; text-decoration:line-through;'>{c}</span>" for c in ref[i1:i2]])
         elif tag == 'insert':
+            # Extra characters - orange with + prefix
+            actual_chars.extend([f"<span style='background-color:#fff3cd; color:#856404; font-weight:bold;'>{c}</span>" for c in hyp[j1:j2]])
+    # Character-level comparison
+    char_comparison_html = f"""
+    <div style='font-family: monospace; line-height: 2; font-size: 16px;'>
+        <div style='margin-bottom: 15px;'>
+            <strong>📝 Expected (character-level):</strong><br>
+            <div style='padding: 10px; background-color: #f8f9fa; border-radius: 5px; margin-top: 5px; word-break: break-all; letter-spacing: 1px;'>
+                {"".join(expected_chars)}
+            </div>
+        </div>
+        <div style='margin-bottom: 15px;'>
+            <strong>🎤 You said (character-level):</strong><br>
+            <div style='padding: 10px; background-color: #f8f9fa; border-radius: 5px; margin-top: 5px; word-break: break-all; letter-spacing: 1px;'>
+                {"".join(actual_chars)}
+            </div>
+        </div>
+        <div style='font-size: 12px; color: #6c757d; margin-top: 10px;'>
+            Character-level analysis helps identify pronunciation issues within words
+        </div>
+    </div>
+    """
+    return char_comparison_html
 # ---------------- MAIN ---------------- #
 @spaces.GPU
     submit_btn = gr.Button("🔍 Analyze Pronunciation", variant="primary")
+    gr.Markdown("### 📊 Analysis Results")
     with gr.Row():
         pass1_out = gr.Textbox(label="Pass 1: What You Actually Said")
         pass2_out = gr.Textbox(label="Pass 2: Target-Biased Output")
     with gr.Row():
         hk_out = gr.Textbox(label="Harvard-Kyoto Transliteration (Pass 1)")
+        wer_out = gr.Textbox(label="Word Error Rate (WER)")
+        cer_out = gr.Textbox(label="Character Error Rate (CER)")
+    gr.Markdown("### 🎯 Visual Comparison")
+    gr.Markdown("Compare your pronunciation with the expected text to identify areas for improvement")
+    diff_html_box = gr.HTML(label="Word-Level Comparison")
+    char_html_box = gr.HTML(label="Character-Level Analysis")
     # Event handlers
     gen_btn.click(fn=get_random_sentence, inputs=[lang_choice], outputs=[intended_display])