Spaces:
Sleeping
Sleeping
import os | |
import requests | |
from smolagents import Tool | |
class AudioTranscriptionTool(Tool): | |
name = "audio_transcriber" | |
description = "Transcribe a given audio file in mp3 or wav format to text using Whisper via Hugging Face API." | |
inputs = { | |
"file_path": { | |
"type": "string", | |
"description": "Path to the audio file (must be .mp3 or .wav)" | |
} | |
} | |
output_type = "string" | |
def __init__(self): | |
super().__init__() | |
self.api_url = "https://api-inference.huggingface.co/models/openai/whisper-large" | |
self.headers = { | |
"Authorization": f"Bearer {os.getenv('HF_API_TOKEN')}" | |
} | |
def forward(self, file_path: str) -> str: | |
try: | |
with open(file_path, "rb") as audio_file: | |
audio_bytes = audio_file.read() | |
response = requests.post( | |
self.api_url, | |
headers=self.headers, | |
data=audio_bytes, | |
timeout=60 | |
) | |
if response.status_code == 200: | |
result = response.json() | |
# The exact key depends on the model; usually 'text' for whisper | |
transcription = result.get("text", None) | |
if transcription: | |
return transcription.strip() | |
else: | |
return "Error: No transcription found in the response." | |
else: | |
return f"Error transcribing audio: {response.status_code} {response.text}" | |
except Exception as e: | |
return f"Error transcribing audio: {e}" | |