Update app.py
Browse files
app.py
CHANGED
@@ -72,7 +72,17 @@ def transcribe_audio(audio_file):
|
|
72 |
print(f"Audio duration: {len(audio) / 1000:.2f} seconds")
|
73 |
print("Starting transcription...")
|
74 |
input_features = whisper_processor(audio_array, sampling_rate=16000, return_tensors="pt").input_features.to(device)
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)
|
77 |
|
78 |
print(f"Transcription complete. Length: {len(transcription[0])} characters")
|
|
|
72 |
print(f"Audio duration: {len(audio) / 1000:.2f} seconds")
|
73 |
print("Starting transcription...")
|
74 |
input_features = whisper_processor(audio_array, sampling_rate=16000, return_tensors="pt").input_features.to(device)
|
75 |
+
|
76 |
+
# Create attention mask
|
77 |
+
attention_mask = torch.ones_like(input_features)
|
78 |
+
|
79 |
+
# Generate with specific parameters
|
80 |
+
predicted_ids = whisper_model.generate(
|
81 |
+
input_features,
|
82 |
+
attention_mask=attention_mask,
|
83 |
+
language='en',
|
84 |
+
task='translate'
|
85 |
+
)
|
86 |
transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)
|
87 |
|
88 |
print(f"Transcription complete. Length: {len(transcription[0])} characters")
|