sudhanm commited on
Commit
fbcc894
·
verified ·
1 Parent(s): 39de6da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -13
app.py CHANGED
@@ -134,7 +134,13 @@ def load_model_for_language(language_choice):
134
 
135
  # ---------------- HELPERS ---------------- #
136
  def get_random_sentence(language_choice):
137
- return random.choice(SENTENCE_BANK[language_choice])
 
 
 
 
 
 
138
 
139
  def is_script(text, lang_name):
140
  pattern = SCRIPT_PATTERNS.get(lang_name)
@@ -229,19 +235,29 @@ def transcribe_once(audio_path, language_choice, beam_size, temperature):
229
  model_dtype = next(model.parameters()).dtype
230
  input_features = input_features.to(device=DEVICE, dtype=model_dtype)
231
 
232
- # Generate forced decoder ids for the language
233
- forced_decoder_ids = processor.get_decoder_prompt_ids(language=lang_code, task="transcribe")
234
-
235
- # Generate transcription
236
  with torch.no_grad():
237
- predicted_ids = model.generate(
238
- input_features,
239
- forced_decoder_ids=forced_decoder_ids,
240
- max_length=448,
241
- num_beams=beam_size,
242
- temperature=temperature if temperature > 0 else None,
243
- do_sample=temperature > 0,
244
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
  # Decode the transcription
247
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
 
134
 
135
  # ---------------- HELPERS ---------------- #
136
  def get_random_sentence(language_choice):
137
+ sentence = random.choice(SENTENCE_BANK[language_choice])
138
+ # Add simple transliteration for Tamil and Malayalam
139
+ if language_choice in ["Tamil", "Malayalam"]:
140
+ simple_roman = transliterate_to_simple_roman(sentence, language_choice)
141
+ return f"{sentence}\n\n📖 Read as: {simple_roman}"
142
+ else:
143
+ return sentence
144
 
145
  def is_script(text, lang_name):
146
  pattern = SCRIPT_PATTERNS.get(lang_name)
 
235
  model_dtype = next(model.parameters()).dtype
236
  input_features = input_features.to(device=DEVICE, dtype=model_dtype)
237
 
238
+ # Generate transcription with fallback for different model capabilities
 
 
 
239
  with torch.no_grad():
240
+ try:
241
+ # Try with forced decoder ids first (standard Whisper models)
242
+ forced_decoder_ids = processor.get_decoder_prompt_ids(language=lang_code, task="transcribe")
243
+ predicted_ids = model.generate(
244
+ input_features,
245
+ forced_decoder_ids=forced_decoder_ids,
246
+ max_length=448,
247
+ num_beams=beam_size,
248
+ temperature=temperature if temperature > 0 else None,
249
+ do_sample=temperature > 0,
250
+ )
251
+ except (TypeError, ValueError) as e:
252
+ # Fallback for models that don't support forced_decoder_ids (like some fine-tuned models)
253
+ print(f"Fallback generation for {language_choice}: {e}")
254
+ predicted_ids = model.generate(
255
+ input_features,
256
+ max_length=448,
257
+ num_beams=beam_size,
258
+ temperature=temperature if temperature > 0 else None,
259
+ do_sample=temperature > 0,
260
+ )
261
 
262
  # Decode the transcription
263
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]