Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -3,12 +3,10 @@ import random
|
|
3 |
import difflib
|
4 |
import re
|
5 |
import jiwer
|
6 |
-
import torch
|
7 |
-
import soundfile as sf
|
8 |
from faster_whisper import WhisperModel
|
9 |
from indic_transliteration import sanscript
|
10 |
from indic_transliteration.sanscript import transliterate
|
11 |
-
from transformers import
|
12 |
|
13 |
# ---------------- CONFIG ---------------- #
|
14 |
MODEL_NAME = "large-v2"
|
@@ -58,7 +56,6 @@ SENTENCE_BANK = {
|
|
58 |
"मम नाम रामः।"]
|
59 |
}
|
60 |
|
61 |
-
# Voice/style mapping for IndicParler-TTS
|
62 |
VOICE_STYLE = {
|
63 |
"English": "An English female voice with a neutral Indian accent.",
|
64 |
"Tamil": "A female speaker with a clear Tamil accent.",
|
@@ -71,11 +68,9 @@ VOICE_STYLE = {
|
|
71 |
print("Loading Whisper model...")
|
72 |
whisper_model = WhisperModel(MODEL_NAME, device=DEVICE)
|
73 |
|
74 |
-
print("Loading IndicParler-TTS...")
|
75 |
TTS_MODEL_ID = "ai4bharat/indic-parler-tts"
|
76 |
-
|
77 |
-
tts_tokenizer = AutoTokenizer.from_pretrained(TTS_MODEL_ID)
|
78 |
-
tts_pipe = pipeline("text-to-speech", model=tts_model, tokenizer=tts_tokenizer)
|
79 |
|
80 |
# ---------------- HELPERS ---------------- #
|
81 |
def get_random_sentence(language_choice):
|
@@ -119,7 +114,6 @@ def highlight_differences(ref, hyp):
|
|
119 |
return " ".join(out_html)
|
120 |
|
121 |
def char_level_highlight(ref, hyp):
|
122 |
-
# Highlight correct in green, incorrect in red underline
|
123 |
sm = difflib.SequenceMatcher(None, list(ref), list(hyp))
|
124 |
out = []
|
125 |
for tag, i1, i2, j1, j2 in sm.get_opcodes():
|
@@ -128,7 +122,6 @@ def char_level_highlight(ref, hyp):
|
|
128 |
elif tag in ('replace', 'delete'):
|
129 |
out.extend([f"<span style='color:red;text-decoration:underline'>{c}</span>" for c in ref[i1:i2]])
|
130 |
elif tag == 'insert':
|
131 |
-
# Characters only in hyp - show orange
|
132 |
out.extend([f"<span style='color:orange'>{c}</span>" for c in hyp[j1:j2]])
|
133 |
return "".join(out)
|
134 |
|
@@ -148,11 +141,11 @@ def compare_pronunciation(audio, language_choice, intended_sentence,
|
|
148 |
lang_code = LANG_CODES[language_choice]
|
149 |
primer_weak, primer_strong = LANG_PRIMERS[language_choice]
|
150 |
|
151 |
-
# Pass 1
|
152 |
actual_text = transcribe_once(audio, lang_code, primer_weak,
|
153 |
pass1_beam, pass1_temp, pass1_condition)
|
154 |
|
155 |
-
# Pass 2
|
156 |
strict_prompt = f"{primer_strong}\nTarget: {intended_sentence}"
|
157 |
corrected_text = transcribe_once(audio, lang_code, strict_prompt,
|
158 |
beam_size=5, temperature=0.0, condition_on_previous_text=False)
|
@@ -161,11 +154,12 @@ def compare_pronunciation(audio, language_choice, intended_sentence,
|
|
161 |
wer_val = jiwer.wer(intended_sentence, actual_text)
|
162 |
cer_val = jiwer.cer(intended_sentence, actual_text)
|
163 |
|
164 |
-
# Transliteration
|
165 |
hk_translit = transliterate_to_hk(actual_text, language_choice) \
|
166 |
if is_script(actual_text, language_choice) \
|
167 |
else f"[Script mismatch: expected {language_choice}]"
|
168 |
|
|
|
169 |
diff_html = highlight_differences(intended_sentence, actual_text)
|
170 |
char_html = char_level_highlight(intended_sentence, actual_text)
|
171 |
|
@@ -177,8 +171,7 @@ def compare_pronunciation(audio, language_choice, intended_sentence,
|
|
177 |
|
178 |
# ---------------- UI ---------------- #
|
179 |
with gr.Blocks() as demo:
|
180 |
-
gr.Markdown("## 🎙 Pronunciation Comparator + IndicParler‑TTS + Error Highlighting
|
181 |
-
"Generate sentence → Listen to TTS → Read aloud → See errors → Listen to your transcription")
|
182 |
|
183 |
with gr.Row():
|
184 |
lang_choice = gr.Dropdown(choices=list(LANG_CODES.keys()), value="Malayalam", label="Language")
|
|
|
3 |
import difflib
|
4 |
import re
|
5 |
import jiwer
|
|
|
|
|
6 |
from faster_whisper import WhisperModel
|
7 |
from indic_transliteration import sanscript
|
8 |
from indic_transliteration.sanscript import transliterate
|
9 |
+
from transformers import pipeline # only pipeline is needed for TTS
|
10 |
|
11 |
# ---------------- CONFIG ---------------- #
|
12 |
MODEL_NAME = "large-v2"
|
|
|
56 |
"मम नाम रामः।"]
|
57 |
}
|
58 |
|
|
|
59 |
VOICE_STYLE = {
|
60 |
"English": "An English female voice with a neutral Indian accent.",
|
61 |
"Tamil": "A female speaker with a clear Tamil accent.",
|
|
|
68 |
print("Loading Whisper model...")
|
69 |
whisper_model = WhisperModel(MODEL_NAME, device=DEVICE)
|
70 |
|
71 |
+
print("Loading IndicParler-TTS via pipeline...")
|
72 |
TTS_MODEL_ID = "ai4bharat/indic-parler-tts"
|
73 |
+
tts_pipe = pipeline("text-to-speech", model=TTS_MODEL_ID)
|
|
|
|
|
74 |
|
75 |
# ---------------- HELPERS ---------------- #
|
76 |
def get_random_sentence(language_choice):
|
|
|
114 |
return " ".join(out_html)
|
115 |
|
116 |
def char_level_highlight(ref, hyp):
|
|
|
117 |
sm = difflib.SequenceMatcher(None, list(ref), list(hyp))
|
118 |
out = []
|
119 |
for tag, i1, i2, j1, j2 in sm.get_opcodes():
|
|
|
122 |
elif tag in ('replace', 'delete'):
|
123 |
out.extend([f"<span style='color:red;text-decoration:underline'>{c}</span>" for c in ref[i1:i2]])
|
124 |
elif tag == 'insert':
|
|
|
125 |
out.extend([f"<span style='color:orange'>{c}</span>" for c in hyp[j1:j2]])
|
126 |
return "".join(out)
|
127 |
|
|
|
141 |
lang_code = LANG_CODES[language_choice]
|
142 |
primer_weak, primer_strong = LANG_PRIMERS[language_choice]
|
143 |
|
144 |
+
# Pass 1
|
145 |
actual_text = transcribe_once(audio, lang_code, primer_weak,
|
146 |
pass1_beam, pass1_temp, pass1_condition)
|
147 |
|
148 |
+
# Pass 2 (fixed)
|
149 |
strict_prompt = f"{primer_strong}\nTarget: {intended_sentence}"
|
150 |
corrected_text = transcribe_once(audio, lang_code, strict_prompt,
|
151 |
beam_size=5, temperature=0.0, condition_on_previous_text=False)
|
|
|
154 |
wer_val = jiwer.wer(intended_sentence, actual_text)
|
155 |
cer_val = jiwer.cer(intended_sentence, actual_text)
|
156 |
|
157 |
+
# Transliteration
|
158 |
hk_translit = transliterate_to_hk(actual_text, language_choice) \
|
159 |
if is_script(actual_text, language_choice) \
|
160 |
else f"[Script mismatch: expected {language_choice}]"
|
161 |
|
162 |
+
# Highlights
|
163 |
diff_html = highlight_differences(intended_sentence, actual_text)
|
164 |
char_html = char_level_highlight(intended_sentence, actual_text)
|
165 |
|
|
|
171 |
|
172 |
# ---------------- UI ---------------- #
|
173 |
with gr.Blocks() as demo:
|
174 |
+
gr.Markdown("## 🎙 Pronunciation Comparator + IndicParler‑TTS + Error Highlighting")
|
|
|
175 |
|
176 |
with gr.Row():
|
177 |
lang_choice = gr.Dropdown(choices=list(LANG_CODES.keys()), value="Malayalam", label="Language")
|