Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -131,30 +131,95 @@ def transcribe_once(audio_path, language_choice, initial_prompt, beam_size, temp
|
|
131 |
def highlight_differences(ref, hyp):
|
132 |
ref_words, hyp_words = ref.strip().split(), hyp.strip().split()
|
133 |
sm = difflib.SequenceMatcher(None, ref_words, hyp_words)
|
134 |
-
|
|
|
|
|
|
|
|
|
135 |
for tag, i1, i2, j1, j2 in sm.get_opcodes():
|
136 |
if tag == 'equal':
|
137 |
-
|
|
|
|
|
138 |
elif tag == 'replace':
|
139 |
-
|
140 |
-
|
|
|
141 |
elif tag == 'delete':
|
142 |
-
|
|
|
143 |
elif tag == 'insert':
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
def char_level_highlight(ref, hyp):
|
148 |
sm = difflib.SequenceMatcher(None, list(ref), list(hyp))
|
149 |
-
|
|
|
|
|
150 |
for tag, i1, i2, j1, j2 in sm.get_opcodes():
|
151 |
if tag == 'equal':
|
152 |
-
|
153 |
-
|
154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
elif tag == 'insert':
|
156 |
-
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
# ---------------- MAIN ---------------- #
|
160 |
@spaces.GPU
|
@@ -210,18 +275,21 @@ with gr.Blocks(title="Pronunciation Comparator") as demo:
|
|
210 |
|
211 |
submit_btn = gr.Button("π Analyze Pronunciation", variant="primary")
|
212 |
|
|
|
213 |
with gr.Row():
|
214 |
pass1_out = gr.Textbox(label="Pass 1: What You Actually Said")
|
215 |
pass2_out = gr.Textbox(label="Pass 2: Target-Biased Output")
|
216 |
|
217 |
with gr.Row():
|
218 |
hk_out = gr.Textbox(label="Harvard-Kyoto Transliteration (Pass 1)")
|
219 |
-
wer_out = gr.Textbox(label="Word Error Rate")
|
220 |
-
cer_out = gr.Textbox(label="Character Error Rate")
|
221 |
|
222 |
-
gr.Markdown("### Visual
|
223 |
-
|
224 |
-
|
|
|
|
|
225 |
|
226 |
# Event handlers
|
227 |
gen_btn.click(fn=get_random_sentence, inputs=[lang_choice], outputs=[intended_display])
|
|
|
131 |
def highlight_differences(ref, hyp):
|
132 |
ref_words, hyp_words = ref.strip().split(), hyp.strip().split()
|
133 |
sm = difflib.SequenceMatcher(None, ref_words, hyp_words)
|
134 |
+
|
135 |
+
# Create side-by-side comparison
|
136 |
+
expected_html = []
|
137 |
+
actual_html = []
|
138 |
+
|
139 |
for tag, i1, i2, j1, j2 in sm.get_opcodes():
|
140 |
if tag == 'equal':
|
141 |
+
# Correct words - green background
|
142 |
+
expected_html.extend([f"<span style='background-color:#d4edda; color:#155724; padding:2px 4px; margin:1px; border-radius:3px;'>{w}</span>" for w in ref_words[i1:i2]])
|
143 |
+
actual_html.extend([f"<span style='background-color:#d4edda; color:#155724; padding:2px 4px; margin:1px; border-radius:3px;'>{w}</span>" for w in hyp_words[j1:j2]])
|
144 |
elif tag == 'replace':
|
145 |
+
# Substituted words - red for expected, orange for actual
|
146 |
+
expected_html.extend([f"<span style='background-color:#f8d7da; color:#721c24; padding:2px 4px; margin:1px; border-radius:3px; text-decoration:underline;'>{w}</span>" for w in ref_words[i1:i2]])
|
147 |
+
actual_html.extend([f"<span style='background-color:#fff3cd; color:#856404; padding:2px 4px; margin:1px; border-radius:3px; font-weight:bold;'>{w}</span>" for w in hyp_words[j1:j2]])
|
148 |
elif tag == 'delete':
|
149 |
+
# Missing words - red with strikethrough
|
150 |
+
expected_html.extend([f"<span style='background-color:#f8d7da; color:#721c24; padding:2px 4px; margin:1px; border-radius:3px; text-decoration:line-through;'>{w}</span>" for w in ref_words[i1:i2]])
|
151 |
elif tag == 'insert':
|
152 |
+
# Extra words - orange
|
153 |
+
actual_html.extend([f"<span style='background-color:#fff3cd; color:#856404; padding:2px 4px; margin:1px; border-radius:3px; font-weight:bold;'>+{w}</span>" for w in hyp_words[j1:j2]])
|
154 |
+
|
155 |
+
# Create the comparison HTML
|
156 |
+
comparison_html = f"""
|
157 |
+
<div style='font-family: monospace; line-height: 2;'>
|
158 |
+
<div style='margin-bottom: 15px;'>
|
159 |
+
<strong>π Expected:</strong><br>
|
160 |
+
<div style='padding: 10px; background-color: #f8f9fa; border-radius: 5px; margin-top: 5px;'>
|
161 |
+
{" ".join(expected_html)}
|
162 |
+
</div>
|
163 |
+
</div>
|
164 |
+
<div style='margin-bottom: 15px;'>
|
165 |
+
<strong>π€ You said:</strong><br>
|
166 |
+
<div style='padding: 10px; background-color: #f8f9fa; border-radius: 5px; margin-top: 5px;'>
|
167 |
+
{" ".join(actual_html)}
|
168 |
+
</div>
|
169 |
+
</div>
|
170 |
+
<div style='font-size: 12px; color: #6c757d; margin-top: 10px;'>
|
171 |
+
<span style='background-color:#d4edda; padding:2px 4px; border-radius:3px;'>β Correct</span>
|
172 |
+
<span style='background-color:#f8d7da; padding:2px 4px; border-radius:3px; margin-left:5px;'>β Expected</span>
|
173 |
+
<span style='background-color:#fff3cd; padding:2px 4px; border-radius:3px; margin-left:5px;'>+ Extra/Wrong</span>
|
174 |
+
</div>
|
175 |
+
</div>
|
176 |
+
"""
|
177 |
+
|
178 |
+
return comparison_html
|
179 |
|
180 |
def char_level_highlight(ref, hyp):
|
181 |
sm = difflib.SequenceMatcher(None, list(ref), list(hyp))
|
182 |
+
expected_chars = []
|
183 |
+
actual_chars = []
|
184 |
+
|
185 |
for tag, i1, i2, j1, j2 in sm.get_opcodes():
|
186 |
if tag == 'equal':
|
187 |
+
# Correct characters - green background
|
188 |
+
expected_chars.extend([f"<span style='background-color:#d4edda; color:#155724;'>{c}</span>" for c in ref[i1:i2]])
|
189 |
+
actual_chars.extend([f"<span style='background-color:#d4edda; color:#155724;'>{c}</span>" for c in hyp[j1:j2]])
|
190 |
+
elif tag == 'replace':
|
191 |
+
# Different characters - red for expected, orange for actual
|
192 |
+
expected_chars.extend([f"<span style='background-color:#f8d7da; color:#721c24; text-decoration:underline;'>{c}</span>" for c in ref[i1:i2]])
|
193 |
+
actual_chars.extend([f"<span style='background-color:#fff3cd; color:#856404; font-weight:bold;'>{c}</span>" for c in hyp[j1:j2]])
|
194 |
+
elif tag == 'delete':
|
195 |
+
# Missing characters - red with strikethrough
|
196 |
+
expected_chars.extend([f"<span style='background-color:#f8d7da; color:#721c24; text-decoration:line-through;'>{c}</span>" for c in ref[i1:i2]])
|
197 |
elif tag == 'insert':
|
198 |
+
# Extra characters - orange with + prefix
|
199 |
+
actual_chars.extend([f"<span style='background-color:#fff3cd; color:#856404; font-weight:bold;'>{c}</span>" for c in hyp[j1:j2]])
|
200 |
+
|
201 |
+
# Character-level comparison
|
202 |
+
char_comparison_html = f"""
|
203 |
+
<div style='font-family: monospace; line-height: 2; font-size: 16px;'>
|
204 |
+
<div style='margin-bottom: 15px;'>
|
205 |
+
<strong>π Expected (character-level):</strong><br>
|
206 |
+
<div style='padding: 10px; background-color: #f8f9fa; border-radius: 5px; margin-top: 5px; word-break: break-all; letter-spacing: 1px;'>
|
207 |
+
{"".join(expected_chars)}
|
208 |
+
</div>
|
209 |
+
</div>
|
210 |
+
<div style='margin-bottom: 15px;'>
|
211 |
+
<strong>π€ You said (character-level):</strong><br>
|
212 |
+
<div style='padding: 10px; background-color: #f8f9fa; border-radius: 5px; margin-top: 5px; word-break: break-all; letter-spacing: 1px;'>
|
213 |
+
{"".join(actual_chars)}
|
214 |
+
</div>
|
215 |
+
</div>
|
216 |
+
<div style='font-size: 12px; color: #6c757d; margin-top: 10px;'>
|
217 |
+
Character-level analysis helps identify pronunciation issues within words
|
218 |
+
</div>
|
219 |
+
</div>
|
220 |
+
"""
|
221 |
+
|
222 |
+
return char_comparison_html
|
223 |
|
224 |
# ---------------- MAIN ---------------- #
|
225 |
@spaces.GPU
|
|
|
275 |
|
276 |
submit_btn = gr.Button("π Analyze Pronunciation", variant="primary")
|
277 |
|
278 |
+
gr.Markdown("### π Analysis Results")
|
279 |
with gr.Row():
|
280 |
pass1_out = gr.Textbox(label="Pass 1: What You Actually Said")
|
281 |
pass2_out = gr.Textbox(label="Pass 2: Target-Biased Output")
|
282 |
|
283 |
with gr.Row():
|
284 |
hk_out = gr.Textbox(label="Harvard-Kyoto Transliteration (Pass 1)")
|
285 |
+
wer_out = gr.Textbox(label="Word Error Rate (WER)")
|
286 |
+
cer_out = gr.Textbox(label="Character Error Rate (CER)")
|
287 |
|
288 |
+
gr.Markdown("### π― Visual Comparison")
|
289 |
+
gr.Markdown("Compare your pronunciation with the expected text to identify areas for improvement")
|
290 |
+
|
291 |
+
diff_html_box = gr.HTML(label="Word-Level Comparison")
|
292 |
+
char_html_box = gr.HTML(label="Character-Level Analysis")
|
293 |
|
294 |
# Event handlers
|
295 |
gen_btn.click(fn=get_random_sentence, inputs=[lang_choice], outputs=[intended_display])
|