nareauow commited on
Commit
b2a6006
·
verified ·
1 Parent(s): a52cf03

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -144,11 +144,23 @@ def recognize_speech(audio_path):
144
  inputs = speech_processor(audio_data, sampling_rate=sr, return_tensors="pt")
145
  inputs = {k: v.to(device) for k, v in inputs.items()}
146
 
147
- # Generate transcription
148
- generated_ids = speech_recognizer.generate(**inputs)
149
- transcription = speech_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
 
 
 
150
 
151
- return transcription
 
 
 
 
 
 
 
 
152
  except Exception as e:
153
  return f"Speech recognition error: {str(e)}"
154
 
 
144
  inputs = speech_processor(audio_data, sampling_rate=sr, return_tensors="pt")
145
  inputs = {k: v.to(device) for k, v in inputs.items()}
146
 
147
+ # Generate transcription with specific parameters to prevent repetition
148
+ generated_ids = speech_recognizer.generate(
149
+ **inputs,
150
+ max_length=100, # Limit output length
151
+ num_beams=1, # Use greedy search instead of beam search
152
+ no_repeat_ngram_size=2, # Prevent repeating n-grams
153
+ )
154
 
155
+ # Decode with skip special tokens
156
+ transcription = speech_processor.batch_decode(
157
+ generated_ids,
158
+ skip_special_tokens=True,
159
+ clean_up_tokenization_spaces=True
160
+ )[0]
161
+
162
+ return transcription.strip() # Remove any extra whitespace
163
+
164
  except Exception as e:
165
  return f"Speech recognition error: {str(e)}"
166