Spaces:

sudhanm
/

whisper-largev2-raw-ta-ml

Sleeping

App Files Files Community

sudhanm commited on 17 days ago

Commit

fbcc894

verified ·

1 Parent(s): 39de6da

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -13

app.py CHANGED Viewed

@@ -134,7 +134,13 @@ def load_model_for_language(language_choice):
 # ---------------- HELPERS ---------------- #
 def get_random_sentence(language_choice):
-    return random.choice(SENTENCE_BANK[language_choice])
 def is_script(text, lang_name):
     pattern = SCRIPT_PATTERNS.get(lang_name)
@@ -229,19 +235,29 @@ def transcribe_once(audio_path, language_choice, beam_size, temperature):
     model_dtype = next(model.parameters()).dtype
     input_features = input_features.to(device=DEVICE, dtype=model_dtype)
-    # Generate forced decoder ids for the language
-    forced_decoder_ids = processor.get_decoder_prompt_ids(language=lang_code, task="transcribe")
-    # Generate transcription
     with torch.no_grad():
-        predicted_ids = model.generate(
-            input_features,
-            forced_decoder_ids=forced_decoder_ids,
-            max_length=448,
-            num_beams=beam_size,
-            temperature=temperature if temperature > 0 else None,
-            do_sample=temperature > 0,
-        )
     # Decode the transcription
     transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]

 # ---------------- HELPERS ---------------- #
 def get_random_sentence(language_choice):
+    sentence = random.choice(SENTENCE_BANK[language_choice])
+    # Add simple transliteration for Tamil and Malayalam
+    if language_choice in ["Tamil", "Malayalam"]:
+        simple_roman = transliterate_to_simple_roman(sentence, language_choice)
+        return f"{sentence}\n\n📖 Read as: {simple_roman}"
+    else:
+        return sentence
 def is_script(text, lang_name):
     pattern = SCRIPT_PATTERNS.get(lang_name)
     model_dtype = next(model.parameters()).dtype
     input_features = input_features.to(device=DEVICE, dtype=model_dtype)
+    # Generate transcription with fallback for different model capabilities
     with torch.no_grad():
+        try:
+            # Try with forced decoder ids first (standard Whisper models)
+            forced_decoder_ids = processor.get_decoder_prompt_ids(language=lang_code, task="transcribe")
+            predicted_ids = model.generate(
+                input_features,
+                forced_decoder_ids=forced_decoder_ids,
+                max_length=448,
+                num_beams=beam_size,
+                temperature=temperature if temperature > 0 else None,
+                do_sample=temperature > 0,
+            )
+        except (TypeError, ValueError) as e:
+            # Fallback for models that don't support forced_decoder_ids (like some fine-tuned models)
+            print(f"Fallback generation for {language_choice}: {e}")
+            predicted_ids = model.generate(
+                input_features,
+                max_length=448,
+                num_beams=beam_size,
+                temperature=temperature if temperature > 0 else None,
+                do_sample=temperature > 0,
+            )
     # Decode the transcription
     transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]