Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -134,7 +134,13 @@ def load_model_for_language(language_choice):
|
|
134 |
|
135 |
# ---------------- HELPERS ---------------- #
|
136 |
def get_random_sentence(language_choice):
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
def is_script(text, lang_name):
|
140 |
pattern = SCRIPT_PATTERNS.get(lang_name)
|
@@ -229,19 +235,29 @@ def transcribe_once(audio_path, language_choice, beam_size, temperature):
|
|
229 |
model_dtype = next(model.parameters()).dtype
|
230 |
input_features = input_features.to(device=DEVICE, dtype=model_dtype)
|
231 |
|
232 |
-
# Generate
|
233 |
-
forced_decoder_ids = processor.get_decoder_prompt_ids(language=lang_code, task="transcribe")
|
234 |
-
|
235 |
-
# Generate transcription
|
236 |
with torch.no_grad():
|
237 |
-
|
238 |
-
|
239 |
-
forced_decoder_ids=
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
# Decode the transcription
|
247 |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
|
|
134 |
|
135 |
# ---------------- HELPERS ---------------- #
|
136 |
def get_random_sentence(language_choice):
|
137 |
+
sentence = random.choice(SENTENCE_BANK[language_choice])
|
138 |
+
# Add simple transliteration for Tamil and Malayalam
|
139 |
+
if language_choice in ["Tamil", "Malayalam"]:
|
140 |
+
simple_roman = transliterate_to_simple_roman(sentence, language_choice)
|
141 |
+
return f"{sentence}\n\n📖 Read as: {simple_roman}"
|
142 |
+
else:
|
143 |
+
return sentence
|
144 |
|
145 |
def is_script(text, lang_name):
|
146 |
pattern = SCRIPT_PATTERNS.get(lang_name)
|
|
|
235 |
model_dtype = next(model.parameters()).dtype
|
236 |
input_features = input_features.to(device=DEVICE, dtype=model_dtype)
|
237 |
|
238 |
+
# Generate transcription with fallback for different model capabilities
|
|
|
|
|
|
|
239 |
with torch.no_grad():
|
240 |
+
try:
|
241 |
+
# Try with forced decoder ids first (standard Whisper models)
|
242 |
+
forced_decoder_ids = processor.get_decoder_prompt_ids(language=lang_code, task="transcribe")
|
243 |
+
predicted_ids = model.generate(
|
244 |
+
input_features,
|
245 |
+
forced_decoder_ids=forced_decoder_ids,
|
246 |
+
max_length=448,
|
247 |
+
num_beams=beam_size,
|
248 |
+
temperature=temperature if temperature > 0 else None,
|
249 |
+
do_sample=temperature > 0,
|
250 |
+
)
|
251 |
+
except (TypeError, ValueError) as e:
|
252 |
+
# Fallback for models that don't support forced_decoder_ids (like some fine-tuned models)
|
253 |
+
print(f"Fallback generation for {language_choice}: {e}")
|
254 |
+
predicted_ids = model.generate(
|
255 |
+
input_features,
|
256 |
+
max_length=448,
|
257 |
+
num_beams=beam_size,
|
258 |
+
temperature=temperature if temperature > 0 else None,
|
259 |
+
do_sample=temperature > 0,
|
260 |
+
)
|
261 |
|
262 |
# Decode the transcription
|
263 |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|