AGAZO_Final_Assignment / audio_to_text_tool.py
Alexandre Gazola
codigo agente
f66d8b7
raw
history blame
1.48 kB
import whisper
from langchain_core.tools import tool
#@tool
import whisper
import os
import os
import whisper
import subprocess
import tempfile
import os
import whisper
import subprocess
import tempfile
def audio_to_text(file_path: str) -> str:
"""
Converts an MP3 file to WAV and transcribes it using Whisper.
Args:
file_path (str): Path to the MP3 file.
Returns:
str: Transcribed text.
"""
if not os.path.isfile(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
# Convert MP3 to temporary WAV file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
tmp_wav_path = tmp_wav.name
try:
# Convert to WAV using ffmpeg
subprocess.run(
["ffmpeg", "-y", "-i", file_path, tmp_wav_path],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True
)
model = whisper.load_model("base")
result = model.transcribe(tmp_wav_path)
if result is None or "text" not in result:
raise ValueError("Transcription failed or result is invalid.")
return result["text"]
finally:
# Clean up temporary WAV file
if os.path.exists(tmp_wav_path):
os.remove(tmp_wav_path)
if __name__ == "__main__":
try:
print(audio_to_text("C:\\tmp\\ibm\\audio.mp3"))
except Exception as e:
print(f"Error: {e}")