Yago Bolivar commited on
Commit
2ed87be
·
1 Parent(s): 65437c6

refactor: improve ASR pipeline initialization and transcription handling

Browse files
Files changed (1) hide show
  1. src/speech_to_text.py +8 -13
src/speech_to_text.py CHANGED
@@ -4,33 +4,28 @@ import os
4
 
5
  # Initialize the ASR pipeline with a specific model
6
  # Using a smaller Whisper model for quicker setup, but larger models offer better accuracy
7
- asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en")
 
 
 
8
 
9
  def transcribe_audio(audio_filepath):
10
  """
11
  Transcribes an audio file using the Hugging Face ASR pipeline.
12
  """
13
  try:
14
- transcription = asr_pipeline(audio_filepath)
15
  return transcription["text"]
16
  except Exception as e:
17
  return f"Error during transcription: {e}"
18
 
19
  # Example usage:
20
  if __name__ == "__main__":
21
- audio_file = "./downloaded_files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3"
22
 
23
  if os.path.exists(audio_file): # Check if the (placeholder or real) file exists
24
  print(f"Attempting to transcribe: {audio_file}")
25
  transcribed_text = transcribe_audio(audio_file)
26
- print(f"Transcription: {transcribed_text}")
27
  else:
28
- print(f"File not found: {audio_file}. Please provide a valid audio file.")
29
-
30
- # To integrate this into your FileIdentifier or a subsequent processing step:
31
- # identified_file_info = identifier.identify_file("path/to/your/audio.mp3")
32
- # if identified_file_info.get("determined_type") == "audio":
33
- # text_from_audio = transcribe_audio(identified_file_info["filepath"])
34
- # print(f"Processed audio: {text_from_audio}")
35
- # else:
36
- # print("Not an audio file or error in identification.")
 
4
 
5
  # Initialize the ASR pipeline with a specific model
6
  # Using a smaller Whisper model for quicker setup, but larger models offer better accuracy
7
+ asr_pipeline = pipeline(
8
+ "automatic-speech-recognition",
9
+ model="openai/whisper-tiny.en",
10
+ )
11
 
12
  def transcribe_audio(audio_filepath):
13
  """
14
  Transcribes an audio file using the Hugging Face ASR pipeline.
15
  """
16
  try:
17
+ transcription = asr_pipeline(audio_filepath, return_timestamps=True)
18
  return transcription["text"]
19
  except Exception as e:
20
  return f"Error during transcription: {e}"
21
 
22
  # Example usage:
23
  if __name__ == "__main__":
24
+ audio_file = "./downloaded_files/1f975693-876d-457b-a649-393859e79bf3.mp3"
25
 
26
  if os.path.exists(audio_file): # Check if the (placeholder or real) file exists
27
  print(f"Attempting to transcribe: {audio_file}")
28
  transcribed_text = transcribe_audio(audio_file)
29
+ print(f"Transcription:\n{transcribed_text}")
30
  else:
31
+ print(f"File not found: {audio_file}. Please provide a valid audio file.")