Spaces:
Sleeping
Sleeping
File size: 1,607 Bytes
2606dba 02a5e73 2606dba 86758ba 2606dba 86758ba 2606dba 86758ba 2606dba 86758ba 2606dba 86758ba 02a5e73 86758ba 2606dba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import os
import requests
from smolagents import Tool
class AudioTranscriptionTool(Tool):
name = "audio_transcriber"
description = "Transcribe a given audio file in mp3 or wav format to text using Whisper via Hugging Face API."
inputs = {
"file_path": {
"type": "string",
"description": "Path to the audio file (must be .mp3 or .wav)"
}
}
output_type = "string"
def __init__(self):
super().__init__()
self.api_url = "https://api-inference.huggingface.co/models/openai/whisper-large"
self.headers = {
"Authorization": f"Bearer {os.getenv('HF_API_TOKEN')}"
}
def forward(self, file_path: str) -> str:
try:
with open(file_path, "rb") as audio_file:
audio_bytes = audio_file.read()
response = requests.post(
self.api_url,
headers=self.headers,
data=audio_bytes,
timeout=60
)
if response.status_code == 200:
result = response.json()
# The exact key depends on the model; usually 'text' for whisper
transcription = result.get("text", None)
if transcription:
return transcription.strip()
else:
return "Error: No transcription found in the response."
else:
return f"Error transcribing audio: {response.status_code} {response.text}"
except Exception as e:
return f"Error transcribing audio: {e}"
|