Yago Bolivar commited on
Commit
d0584cc
·
1 Parent(s): abfabef

feat: add speech-to-text transcription functionality using Hugging Face ASR pipeline

Browse files
Files changed (1) hide show
  1. src/speech_to_text.py +36 -0
src/speech_to_text.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import librosa # Or soundfile
3
+ import os
4
+
5
+ # Initialize the ASR pipeline with a specific model
6
+ # Using a smaller Whisper model for quicker setup, but larger models offer better accuracy
7
+ asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en")
8
+
9
+ def transcribe_audio(audio_filepath):
10
+ """
11
+ Transcribes an audio file using the Hugging Face ASR pipeline.
12
+ """
13
+ try:
14
+ transcription = asr_pipeline(audio_filepath)
15
+ return transcription["text"]
16
+ except Exception as e:
17
+ return f"Error during transcription: {e}"
18
+
19
+ # Example usage:
20
+ if __name__ == "__main__":
21
+ audio_file = "./downloaded_files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3"
22
+
23
+ if os.path.exists(audio_file): # Check if the (placeholder or real) file exists
24
+ print(f"Attempting to transcribe: {audio_file}")
25
+ transcribed_text = transcribe_audio(audio_file)
26
+ print(f"Transcription: {transcribed_text}")
27
+ else:
28
+ print(f"File not found: {audio_file}. Please provide a valid audio file.")
29
+
30
+ # To integrate this into your FileIdentifier or a subsequent processing step:
31
+ # identified_file_info = identifier.identify_file("path/to/your/audio.mp3")
32
+ # if identified_file_info.get("determined_type") == "audio":
33
+ # text_from_audio = transcribe_audio(identified_file_info["filepath"])
34
+ # print(f"Processed audio: {text_from_audio}")
35
+ # else:
36
+ # print("Not an audio file or error in identification.")