from transformers import pipeline | |
import librosa # Or soundfile | |
import os | |
# Initialize the ASR pipeline with a specific model | |
# Using a smaller Whisper model for quicker setup, but larger models offer better accuracy | |
asr_pipeline = pipeline( | |
"automatic-speech-recognition", | |
model="openai/whisper-tiny.en", | |
) | |
def transcribe_audio(audio_filepath): | |
""" | |
Transcribes an audio file using the Hugging Face ASR pipeline. | |
""" | |
try: | |
transcription = asr_pipeline(audio_filepath, return_timestamps=True) | |
return transcription["text"] | |
except Exception as e: | |
return f"Error during transcription: {e}" | |
# Example usage: | |
if __name__ == "__main__": | |
audio_file = "./downloaded_files/1f975693-876d-457b-a649-393859e79bf3.mp3" | |
if os.path.exists(audio_file): # Check if the (placeholder or real) file exists | |
print(f"Attempting to transcribe: {audio_file}") | |
transcribed_text = transcribe_audio(audio_file) | |
print(f"Transcription:\n{transcribed_text}") | |
else: | |
print(f"File not found: {audio_file}. Please provide a valid audio file.") |