Final_Assignment_Template

Sleeping

App Files Files Community

dlaima commited on Jun 2

Commit

86758ba

verified ·

1 Parent(s): 9571928

Update audio_transcriber.py

Browse files

Files changed (1) hide show

audio_transcriber.py +29 -19

audio_transcriber.py CHANGED Viewed

@@ -1,35 +1,45 @@
 import os
 import requests
-import openai
 from smolagents import Tool
-openai.api_key = os.getenv("OPENAI_API_KEY")
 class AudioTranscriptionTool(Tool):
     name = "audio_transcriber"
-    description = "Transcribe a given audio file in mp3 or wav format to text using Whisper."
     inputs = {
-        "url": {
             "type": "string",
-            "description": "URL to the audio file (.mp3 or .wav)"
         }
     }
     output_type = "string"
-    def forward(self, url: str) -> str:
         try:
-            # Download audio
-            filename = "/tmp/audio_input.mp3"
-            response = requests.get(url)
-            with open(filename, "wb") as f:
-                f.write(response.content)
-            # Transcribe with Whisper
-            with open(filename, "rb") as audio_file:
-                transcript = openai.audio.transcriptions.create(
-                    model="whisper-1",
-                    file=audio_file
-                )
-            return transcript.text.strip()
         except Exception as e:
             return f"Error transcribing audio: {e}"

 import os
 import requests
 from smolagents import Tool
 class AudioTranscriptionTool(Tool):
     name = "audio_transcriber"
+    description = "Transcribe a given audio file in mp3 or wav format to text using Whisper via Hugging Face API."
     inputs = {
+        "file_path": {
             "type": "string",
+            "description": "Path to the audio file (must be .mp3 or .wav)"
         }
     }
     output_type = "string"
+    def __init__(self):
+        super().__init__()
+        self.api_url = "https://api-inference.huggingface.co/models/openai/whisper-large"
+        self.headers = {
+            "Authorization": f"Bearer {os.getenv('HF_API_TOKEN')}"
+        }
+    def forward(self, file_path: str) -> str:
         try:
+            with open(file_path, "rb") as audio_file:
+                audio_bytes = audio_file.read()
+            response = requests.post(
+                self.api_url,
+                headers=self.headers,
+                data=audio_bytes,
+                timeout=60
+            )
+            if response.status_code == 200:
+                result = response.json()
+                # The exact key depends on the model; usually 'text' for whisper
+                transcription = result.get("text", None)
+                if transcription:
+                    return transcription.strip()
+                else:
+                    return "Error: No transcription found in the response."
+            else:
+                return f"Error transcribing audio: {response.status_code} {response.text}"
         except Exception as e:
             return f"Error transcribing audio: {e}"