Spaces:
Sleeping
Sleeping
File size: 1,780 Bytes
2606dba 02a5e73 2606dba 87850bb 2606dba 86758ba 2606dba 87850bb 2606dba 86758ba 87850bb 86758ba 87850bb 86758ba 87850bb 2606dba 86758ba 02a5e73 86758ba 87850bb 86758ba 87850bb 86758ba 87850bb 86758ba 87850bb 2606dba 87850bb 5331296 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import os
import requests
from smolagents import Tool
class AudioTranscriptionTool(Tool):
name = "audio_transcriber"
description = "Transcribe a given audio file in .mp3 or .wav format using Whisper via Hugging Face API."
inputs = {
"file_path": {
"type": "string",
"description": "Path to the audio file (.mp3 or .wav)"
}
}
output_type = "string"
def __init__(self):
super().__init__()
api_token = os.getenv("HF_API_TOKEN")
if not api_token:
raise EnvironmentError("HF_API_TOKEN not found in environment variables.")
self.api_url = "https://api-inference.huggingface.com/models/openai/whisper-large"
self.headers = {
"Authorization": f"Bearer {api_token}"
}
def forward(self, file_path: str) -> str:
if not file_path.lower().endswith((".mp3", ".wav")):
return "Error: File must be .mp3 or .wav format."
try:
with open(file_path, "rb") as audio_file:
audio_bytes = audio_file.read()
response = requests.post(
self.api_url,
headers=self.headers,
data=audio_bytes,
timeout=60
)
if response.status_code == 200:
result = response.json()
transcription = result.get("text")
if transcription:
return transcription.strip()
else:
return "Error: No transcription found in API response."
else:
return f"Error transcribing audio: {response.status_code} - {response.text}"
except Exception as e:
return f"Error transcribing audio: {e}"
|