Yago Bolivar
commited on
Commit
·
2ed87be
1
Parent(s):
65437c6
refactor: improve ASR pipeline initialization and transcription handling
Browse files- src/speech_to_text.py +8 -13
src/speech_to_text.py
CHANGED
@@ -4,33 +4,28 @@ import os
|
|
4 |
|
5 |
# Initialize the ASR pipeline with a specific model
|
6 |
# Using a smaller Whisper model for quicker setup, but larger models offer better accuracy
|
7 |
-
asr_pipeline = pipeline(
|
|
|
|
|
|
|
8 |
|
9 |
def transcribe_audio(audio_filepath):
|
10 |
"""
|
11 |
Transcribes an audio file using the Hugging Face ASR pipeline.
|
12 |
"""
|
13 |
try:
|
14 |
-
transcription = asr_pipeline(audio_filepath)
|
15 |
return transcription["text"]
|
16 |
except Exception as e:
|
17 |
return f"Error during transcription: {e}"
|
18 |
|
19 |
# Example usage:
|
20 |
if __name__ == "__main__":
|
21 |
-
audio_file = "./downloaded_files/
|
22 |
|
23 |
if os.path.exists(audio_file): # Check if the (placeholder or real) file exists
|
24 |
print(f"Attempting to transcribe: {audio_file}")
|
25 |
transcribed_text = transcribe_audio(audio_file)
|
26 |
-
print(f"Transcription
|
27 |
else:
|
28 |
-
print(f"File not found: {audio_file}. Please provide a valid audio file.")
|
29 |
-
|
30 |
-
# To integrate this into your FileIdentifier or a subsequent processing step:
|
31 |
-
# identified_file_info = identifier.identify_file("path/to/your/audio.mp3")
|
32 |
-
# if identified_file_info.get("determined_type") == "audio":
|
33 |
-
# text_from_audio = transcribe_audio(identified_file_info["filepath"])
|
34 |
-
# print(f"Processed audio: {text_from_audio}")
|
35 |
-
# else:
|
36 |
-
# print("Not an audio file or error in identification.")
|
|
|
4 |
|
5 |
# Initialize the ASR pipeline with a specific model
|
6 |
# Using a smaller Whisper model for quicker setup, but larger models offer better accuracy
|
7 |
+
asr_pipeline = pipeline(
|
8 |
+
"automatic-speech-recognition",
|
9 |
+
model="openai/whisper-tiny.en",
|
10 |
+
)
|
11 |
|
12 |
def transcribe_audio(audio_filepath):
|
13 |
"""
|
14 |
Transcribes an audio file using the Hugging Face ASR pipeline.
|
15 |
"""
|
16 |
try:
|
17 |
+
transcription = asr_pipeline(audio_filepath, return_timestamps=True)
|
18 |
return transcription["text"]
|
19 |
except Exception as e:
|
20 |
return f"Error during transcription: {e}"
|
21 |
|
22 |
# Example usage:
|
23 |
if __name__ == "__main__":
|
24 |
+
audio_file = "./downloaded_files/1f975693-876d-457b-a649-393859e79bf3.mp3"
|
25 |
|
26 |
if os.path.exists(audio_file): # Check if the (placeholder or real) file exists
|
27 |
print(f"Attempting to transcribe: {audio_file}")
|
28 |
transcribed_text = transcribe_audio(audio_file)
|
29 |
+
print(f"Transcription:\n{transcribed_text}")
|
30 |
else:
|
31 |
+
print(f"File not found: {audio_file}. Please provide a valid audio file.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|