bluenevus commited on
Commit
ffd5e97
·
verified ·
1 Parent(s): 1df2592

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -16
app.py CHANGED
@@ -69,33 +69,41 @@ def download_audio_from_url(url):
69
 
70
  def transcribe_audio(audio_file):
71
  try:
72
- print("Loading audio file...")
73
  audio = AudioSegment.from_file(audio_file)
74
  audio = audio.set_channels(1).set_frame_rate(16000)
75
  audio_array = torch.tensor(audio.get_array_of_samples()).float()
76
 
77
- print(f"Audio duration: {len(audio) / 1000:.2f} seconds")
78
- print("Starting transcription...")
79
  input_features = whisper_processor(audio_array, sampling_rate=16000, return_tensors="pt").input_features.to(device)
80
 
81
  # Create attention mask
82
  attention_mask = torch.ones_like(input_features)
83
 
84
- # Generate with specific parameters
85
- predicted_ids = whisper_model.generate(
86
- input_features,
87
- attention_mask=attention_mask,
88
- language='en',
89
- task='translate'
90
- )
91
- transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- print(f"Transcription complete. Length: {len(transcription[0])} characters")
94
- if len(transcription[0]) < 10:
95
- raise ValueError(f"Transcription too short: {transcription[0]}")
96
- return transcription[0]
97
  except Exception as e:
98
- print(f"Error in transcribe_audio: {str(e)}")
99
  raise
100
 
101
  def separate_speakers(transcription):
 
69
 
70
  def transcribe_audio(audio_file):
71
  try:
72
+ logger.info("Loading audio file...")
73
  audio = AudioSegment.from_file(audio_file)
74
  audio = audio.set_channels(1).set_frame_rate(16000)
75
  audio_array = torch.tensor(audio.get_array_of_samples()).float()
76
 
77
+ logger.info(f"Audio duration: {len(audio) / 1000:.2f} seconds")
78
+ logger.info("Starting transcription...")
79
  input_features = whisper_processor(audio_array, sampling_rate=16000, return_tensors="pt").input_features.to(device)
80
 
81
  # Create attention mask
82
  attention_mask = torch.ones_like(input_features)
83
 
84
+ max_retries = 3
85
+ for attempt in range(max_retries):
86
+ # Generate with specific parameters
87
+ predicted_ids = whisper_model.generate(
88
+ input_features,
89
+ attention_mask=attention_mask,
90
+ language='en',
91
+ task='translate',
92
+ temperature=0.7, # Adjust temperature for potentially better results
93
+ num_beams=5, # Increase beam search for potentially better results
94
+ max_length=448, # Increase max length to allow for longer transcriptions
95
+ )
96
+ transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)
97
+
98
+ logger.info(f"Transcription attempt {attempt + 1} complete. Length: {len(transcription[0])} characters")
99
+ if len(transcription[0]) >= 10:
100
+ return transcription[0]
101
+ else:
102
+ logger.warning(f"Transcription too short on attempt {attempt + 1}: {transcription[0]}")
103
 
104
+ raise ValueError(f"Failed to generate a valid transcription after {max_retries} attempts")
 
 
 
105
  except Exception as e:
106
+ logger.error(f"Error in transcribe_audio: {str(e)}")
107
  raise
108
 
109
  def separate_speakers(transcription):