Spaces:
Sleeping
Sleeping
import os | |
import requests | |
from smolagents import Tool | |
class AudioTranscriptionTool(Tool): | |
name = "audio_transcriber" | |
description = "Transcribe a given audio file in .mp3 or .wav format using Whisper via Hugging Face API." | |
inputs = { | |
"file_path": { | |
"type": "string", | |
"description": "Path to the audio file (.mp3 or .wav)" | |
} | |
} | |
output_type = "string" | |
def __init__(self): | |
super().__init__() | |
api_token = os.getenv("HF_API_TOKEN") | |
if not api_token: | |
raise EnvironmentError("HF_API_TOKEN not found in environment variables.") | |
self.api_url = "https://api-inference.huggingface.com/models/openai/whisper-large" | |
self.headers = { | |
"Authorization": f"Bearer {api_token}" | |
} | |
def forward(self, file_path: str) -> str: | |
if not file_path.lower().endswith((".mp3", ".wav")): | |
return "Error: File must be .mp3 or .wav format." | |
try: | |
with open(file_path, "rb") as audio_file: | |
audio_bytes = audio_file.read() | |
response = requests.post( | |
self.api_url, | |
headers=self.headers, | |
data=audio_bytes, | |
timeout=60 | |
) | |
if response.status_code == 200: | |
result = response.json() | |
transcription = result.get("text") | |
if transcription: | |
return transcription.strip() | |
else: | |
return "Error: No transcription found in API response." | |
else: | |
return f"Error transcribing audio: {response.status_code} - {response.text}" | |
except Exception as e: | |
return f"Error transcribing audio: {e}" | |