sudhanm commited on
Commit
bc807f8
·
verified ·
1 Parent(s): 73253af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -15
app.py CHANGED
@@ -3,12 +3,10 @@ import random
3
  import difflib
4
  import re
5
  import jiwer
6
- import torch
7
- import soundfile as sf
8
  from faster_whisper import WhisperModel
9
  from indic_transliteration import sanscript
10
  from indic_transliteration.sanscript import transliterate
11
- from transformers import AutoModelForTextToSpeech, AutoTokenizer, pipeline
12
 
13
  # ---------------- CONFIG ---------------- #
14
  MODEL_NAME = "large-v2"
@@ -58,7 +56,6 @@ SENTENCE_BANK = {
58
  "मम नाम रामः।"]
59
  }
60
 
61
- # Voice/style mapping for IndicParler-TTS
62
  VOICE_STYLE = {
63
  "English": "An English female voice with a neutral Indian accent.",
64
  "Tamil": "A female speaker with a clear Tamil accent.",
@@ -71,11 +68,9 @@ VOICE_STYLE = {
71
  print("Loading Whisper model...")
72
  whisper_model = WhisperModel(MODEL_NAME, device=DEVICE)
73
 
74
- print("Loading IndicParler-TTS...")
75
  TTS_MODEL_ID = "ai4bharat/indic-parler-tts"
76
- tts_model = AutoModelForTextToSpeech.from_pretrained(TTS_MODEL_ID)
77
- tts_tokenizer = AutoTokenizer.from_pretrained(TTS_MODEL_ID)
78
- tts_pipe = pipeline("text-to-speech", model=tts_model, tokenizer=tts_tokenizer)
79
 
80
  # ---------------- HELPERS ---------------- #
81
  def get_random_sentence(language_choice):
@@ -119,7 +114,6 @@ def highlight_differences(ref, hyp):
119
  return " ".join(out_html)
120
 
121
  def char_level_highlight(ref, hyp):
122
- # Highlight correct in green, incorrect in red underline
123
  sm = difflib.SequenceMatcher(None, list(ref), list(hyp))
124
  out = []
125
  for tag, i1, i2, j1, j2 in sm.get_opcodes():
@@ -128,7 +122,6 @@ def char_level_highlight(ref, hyp):
128
  elif tag in ('replace', 'delete'):
129
  out.extend([f"<span style='color:red;text-decoration:underline'>{c}</span>" for c in ref[i1:i2]])
130
  elif tag == 'insert':
131
- # Characters only in hyp - show orange
132
  out.extend([f"<span style='color:orange'>{c}</span>" for c in hyp[j1:j2]])
133
  return "".join(out)
134
 
@@ -148,11 +141,11 @@ def compare_pronunciation(audio, language_choice, intended_sentence,
148
  lang_code = LANG_CODES[language_choice]
149
  primer_weak, primer_strong = LANG_PRIMERS[language_choice]
150
 
151
- # Pass 1 - actual speech
152
  actual_text = transcribe_once(audio, lang_code, primer_weak,
153
  pass1_beam, pass1_temp, pass1_condition)
154
 
155
- # Pass 2 - target biased (fixed)
156
  strict_prompt = f"{primer_strong}\nTarget: {intended_sentence}"
157
  corrected_text = transcribe_once(audio, lang_code, strict_prompt,
158
  beam_size=5, temperature=0.0, condition_on_previous_text=False)
@@ -161,11 +154,12 @@ def compare_pronunciation(audio, language_choice, intended_sentence,
161
  wer_val = jiwer.wer(intended_sentence, actual_text)
162
  cer_val = jiwer.cer(intended_sentence, actual_text)
163
 
164
- # Transliteration - pass1
165
  hk_translit = transliterate_to_hk(actual_text, language_choice) \
166
  if is_script(actual_text, language_choice) \
167
  else f"[Script mismatch: expected {language_choice}]"
168
 
 
169
  diff_html = highlight_differences(intended_sentence, actual_text)
170
  char_html = char_level_highlight(intended_sentence, actual_text)
171
 
@@ -177,8 +171,7 @@ def compare_pronunciation(audio, language_choice, intended_sentence,
177
 
178
  # ---------------- UI ---------------- #
179
  with gr.Blocks() as demo:
180
- gr.Markdown("## 🎙 Pronunciation Comparator + IndicParler‑TTS + Error Highlighting\n"
181
- "Generate sentence → Listen to TTS → Read aloud → See errors → Listen to your transcription")
182
 
183
  with gr.Row():
184
  lang_choice = gr.Dropdown(choices=list(LANG_CODES.keys()), value="Malayalam", label="Language")
 
3
  import difflib
4
  import re
5
  import jiwer
 
 
6
  from faster_whisper import WhisperModel
7
  from indic_transliteration import sanscript
8
  from indic_transliteration.sanscript import transliterate
9
+ from transformers import pipeline # only pipeline is needed for TTS
10
 
11
  # ---------------- CONFIG ---------------- #
12
  MODEL_NAME = "large-v2"
 
56
  "मम नाम रामः।"]
57
  }
58
 
 
59
  VOICE_STYLE = {
60
  "English": "An English female voice with a neutral Indian accent.",
61
  "Tamil": "A female speaker with a clear Tamil accent.",
 
68
  print("Loading Whisper model...")
69
  whisper_model = WhisperModel(MODEL_NAME, device=DEVICE)
70
 
71
+ print("Loading IndicParler-TTS via pipeline...")
72
  TTS_MODEL_ID = "ai4bharat/indic-parler-tts"
73
+ tts_pipe = pipeline("text-to-speech", model=TTS_MODEL_ID)
 
 
74
 
75
  # ---------------- HELPERS ---------------- #
76
  def get_random_sentence(language_choice):
 
114
  return " ".join(out_html)
115
 
116
  def char_level_highlight(ref, hyp):
 
117
  sm = difflib.SequenceMatcher(None, list(ref), list(hyp))
118
  out = []
119
  for tag, i1, i2, j1, j2 in sm.get_opcodes():
 
122
  elif tag in ('replace', 'delete'):
123
  out.extend([f"<span style='color:red;text-decoration:underline'>{c}</span>" for c in ref[i1:i2]])
124
  elif tag == 'insert':
 
125
  out.extend([f"<span style='color:orange'>{c}</span>" for c in hyp[j1:j2]])
126
  return "".join(out)
127
 
 
141
  lang_code = LANG_CODES[language_choice]
142
  primer_weak, primer_strong = LANG_PRIMERS[language_choice]
143
 
144
+ # Pass 1
145
  actual_text = transcribe_once(audio, lang_code, primer_weak,
146
  pass1_beam, pass1_temp, pass1_condition)
147
 
148
+ # Pass 2 (fixed)
149
  strict_prompt = f"{primer_strong}\nTarget: {intended_sentence}"
150
  corrected_text = transcribe_once(audio, lang_code, strict_prompt,
151
  beam_size=5, temperature=0.0, condition_on_previous_text=False)
 
154
  wer_val = jiwer.wer(intended_sentence, actual_text)
155
  cer_val = jiwer.cer(intended_sentence, actual_text)
156
 
157
+ # Transliteration
158
  hk_translit = transliterate_to_hk(actual_text, language_choice) \
159
  if is_script(actual_text, language_choice) \
160
  else f"[Script mismatch: expected {language_choice}]"
161
 
162
+ # Highlights
163
  diff_html = highlight_differences(intended_sentence, actual_text)
164
  char_html = char_level_highlight(intended_sentence, actual_text)
165
 
 
171
 
172
  # ---------------- UI ---------------- #
173
  with gr.Blocks() as demo:
174
+ gr.Markdown("## 🎙 Pronunciation Comparator + IndicParler‑TTS + Error Highlighting")
 
175
 
176
  with gr.Row():
177
  lang_choice = gr.Dropdown(choices=list(LANG_CODES.keys()), value="Malayalam", label="Language")