Final_Assignment_Template

Sleeping

App Files Files Community

dlaima commited on Jun 3

Commit

87850bb

verified ·

1 Parent(s): 0a0ae08

Update audio_transcriber.py

Browse files

Files changed (1) hide show

audio_transcriber.py +16 -8

audio_transcriber.py CHANGED Viewed

@@ -4,23 +4,29 @@ from smolagents import Tool
 class AudioTranscriptionTool(Tool):
     name = "audio_transcriber"
-    description = "Transcribe a given audio file in mp3 or wav format to text using Whisper via Hugging Face API."
     inputs = {
         "file_path": {
             "type": "string",
-            "description": "Path to the audio file (must be .mp3 or .wav)"
         }
     }
     output_type = "string"
     def __init__(self):
         super().__init__()
-        self.api_url = "https://api-inference.huggingface.co/models/openai/whisper-large"
         self.headers = {
-            "Authorization": f"Bearer {os.getenv('HF_API_TOKEN')}"
         }
     def forward(self, file_path: str) -> str:
         try:
             with open(file_path, "rb") as audio_file:
                 audio_bytes = audio_file.read()
@@ -31,15 +37,17 @@ class AudioTranscriptionTool(Tool):
                 data=audio_bytes,
                 timeout=60
             )
             if response.status_code == 200:
                 result = response.json()
-                # The exact key depends on the model; usually 'text' for whisper
-                transcription = result.get("text", None)
                 if transcription:
                     return transcription.strip()
                 else:
-                    return "Error: No transcription found in the response."
             else:
-                return f"Error transcribing audio: {response.status_code} {response.text}"
         except Exception as e:
             return f"Error transcribing audio: {e}"

 class AudioTranscriptionTool(Tool):
     name = "audio_transcriber"
+    description = "Transcribe a given audio file in .mp3 or .wav format using Whisper via Hugging Face API."
     inputs = {
         "file_path": {
             "type": "string",
+            "description": "Path to the audio file (.mp3 or .wav)"
         }
     }
     output_type = "string"
     def __init__(self):
         super().__init__()
+        api_token = os.getenv("HF_API_TOKEN")
+        if not api_token:
+            raise EnvironmentError("HF_API_TOKEN not found in environment variables.")
+        self.api_url = "https://api-inference.huggingface.com/models/openai/whisper-large"
         self.headers = {
+            "Authorization": f"Bearer {api_token}"
         }
     def forward(self, file_path: str) -> str:
+        if not file_path.lower().endswith((".mp3", ".wav")):
+            return "Error: File must be .mp3 or .wav format."
         try:
             with open(file_path, "rb") as audio_file:
                 audio_bytes = audio_file.read()
                 data=audio_bytes,
                 timeout=60
             )
             if response.status_code == 200:
                 result = response.json()
+                transcription = result.get("text")
                 if transcription:
                     return transcription.strip()
                 else:
+                    return "Error: No transcription found in API response."
             else:
+                return f"Error transcribing audio: {response.status_code} - {response.text}"
         except Exception as e:
             return f"Error transcribing audio: {e}"