bluenevus commited on
Commit
29f0e9b
·
verified ·
1 Parent(s): fd1e8cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -1
app.py CHANGED
@@ -72,7 +72,17 @@ def transcribe_audio(audio_file):
72
  print(f"Audio duration: {len(audio) / 1000:.2f} seconds")
73
  print("Starting transcription...")
74
  input_features = whisper_processor(audio_array, sampling_rate=16000, return_tensors="pt").input_features.to(device)
75
- predicted_ids = whisper_model.generate(input_features)
 
 
 
 
 
 
 
 
 
 
76
  transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)
77
 
78
  print(f"Transcription complete. Length: {len(transcription[0])} characters")
 
72
  print(f"Audio duration: {len(audio) / 1000:.2f} seconds")
73
  print("Starting transcription...")
74
  input_features = whisper_processor(audio_array, sampling_rate=16000, return_tensors="pt").input_features.to(device)
75
+
76
+ # Create attention mask
77
+ attention_mask = torch.ones_like(input_features)
78
+
79
+ # Generate with specific parameters
80
+ predicted_ids = whisper_model.generate(
81
+ input_features,
82
+ attention_mask=attention_mask,
83
+ language='en',
84
+ task='translate'
85
+ )
86
  transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)
87
 
88
  print(f"Transcription complete. Length: {len(transcription[0])} characters")