File size: 1,780 Bytes
2606dba
02a5e73
2606dba
 
 
 
87850bb
2606dba
86758ba
2606dba
87850bb
2606dba
 
 
 
86758ba
 
87850bb
 
 
 
86758ba
87850bb
86758ba
 
 
87850bb
 
 
2606dba
86758ba
 
02a5e73
86758ba
 
 
 
 
 
87850bb
86758ba
 
87850bb
86758ba
 
 
87850bb
86758ba
87850bb
 
2606dba
 
87850bb
5331296
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import requests
from smolagents import Tool

class AudioTranscriptionTool(Tool):
    name = "audio_transcriber"
    description = "Transcribe a given audio file in .mp3 or .wav format using Whisper via Hugging Face API."
    inputs = {
        "file_path": {
            "type": "string",
            "description": "Path to the audio file (.mp3 or .wav)"
        }
    }
    output_type = "string"

    def __init__(self):
        super().__init__()
        api_token = os.getenv("HF_API_TOKEN")
        if not api_token:
            raise EnvironmentError("HF_API_TOKEN not found in environment variables.")
        self.api_url = "https://api-inference.huggingface.com/models/openai/whisper-large"
        self.headers = {
            "Authorization": f"Bearer {api_token}"
        }

    def forward(self, file_path: str) -> str:
        if not file_path.lower().endswith((".mp3", ".wav")):
            return "Error: File must be .mp3 or .wav format."

        try:
            with open(file_path, "rb") as audio_file:
                audio_bytes = audio_file.read()

            response = requests.post(
                self.api_url,
                headers=self.headers,
                data=audio_bytes,
                timeout=60
            )

            if response.status_code == 200:
                result = response.json()
                transcription = result.get("text")
                if transcription:
                    return transcription.strip()
                else:
                    return "Error: No transcription found in API response."
            else:
                return f"Error transcribing audio: {response.status_code} - {response.text}"

        except Exception as e:
            return f"Error transcribing audio: {e}"