Final_Assignment_Template / audio_transcriber.py
dlaima's picture
Update audio_transcriber.py
86758ba verified
raw
history blame
1.61 kB
import os
import requests
from smolagents import Tool
class AudioTranscriptionTool(Tool):
name = "audio_transcriber"
description = "Transcribe a given audio file in mp3 or wav format to text using Whisper via Hugging Face API."
inputs = {
"file_path": {
"type": "string",
"description": "Path to the audio file (must be .mp3 or .wav)"
}
}
output_type = "string"
def __init__(self):
super().__init__()
self.api_url = "https://api-inference.huggingface.co/models/openai/whisper-large"
self.headers = {
"Authorization": f"Bearer {os.getenv('HF_API_TOKEN')}"
}
def forward(self, file_path: str) -> str:
try:
with open(file_path, "rb") as audio_file:
audio_bytes = audio_file.read()
response = requests.post(
self.api_url,
headers=self.headers,
data=audio_bytes,
timeout=60
)
if response.status_code == 200:
result = response.json()
# The exact key depends on the model; usually 'text' for whisper
transcription = result.get("text", None)
if transcription:
return transcription.strip()
else:
return "Error: No transcription found in the response."
else:
return f"Error transcribing audio: {response.status_code} {response.text}"
except Exception as e:
return f"Error transcribing audio: {e}"