bluenevus commited on
Commit
b0c825b
·
verified ·
1 Parent(s): 1b493d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -70,22 +70,27 @@ def format_transcript(transcript):
70
 
71
  def transcribe_audio(audio_file):
72
  try:
73
- # Load and resample the audio to 16000 Hz
74
- audio_input, _ = librosa.load(audio_file, sr=16000)
75
 
76
  # Convert to float32 numpy array
77
  audio_input = audio_input.astype(np.float32)
78
 
79
- # Prepare the input features
80
- input_features = processor(audio_input, sampling_rate=16000, return_tensors="pt").input_features.to(device)
 
81
 
82
- # Generate token ids
83
- predicted_ids = model.generate(input_features)
 
 
 
 
84
 
85
- # Decode token ids to text
86
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
87
 
88
- return transcription[0]
89
  except Exception as e:
90
  print(f"Error in transcribe_audio: {str(e)}")
91
  raise
@@ -118,8 +123,6 @@ def transcribe_video(url):
118
  return transcript
119
  except Exception as e:
120
  error_message = f"An error occurred: {str(e)}"
121
- print(error_message)
122
- return error_message
123
 
124
  def download_transcript(transcript):
125
  with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as temp_file:
 
70
 
71
  def transcribe_audio(audio_file):
72
  try:
73
+ # Load the entire audio file
74
+ audio_input, sr = librosa.load(audio_file, sr=16000)
75
 
76
  # Convert to float32 numpy array
77
  audio_input = audio_input.astype(np.float32)
78
 
79
+ # Process in chunks of 30 seconds
80
+ chunk_length = 30 * sr
81
+ transcriptions = []
82
 
83
+ for i in range(0, len(audio_input), chunk_length):
84
+ chunk = audio_input[i:i+chunk_length]
85
+ input_features = processor(chunk, sampling_rate=16000, return_tensors="pt").input_features.to(device)
86
+ predicted_ids = model.generate(input_features)
87
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
88
+ transcriptions.extend(transcription)
89
 
90
+ # Join all transcriptions
91
+ full_transcription = " ".join(transcriptions)
92
 
93
+ return full_transcription
94
  except Exception as e:
95
  print(f"Error in transcribe_audio: {str(e)}")
96
  raise
 
123
  return transcript
124
  except Exception as e:
125
  error_message = f"An error occurred: {str(e)}"
 
 
126
 
127
  def download_transcript(transcript):
128
  with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as temp_file: