dlaima commited on
Commit
87850bb
·
verified ·
1 Parent(s): 0a0ae08

Update audio_transcriber.py

Browse files
Files changed (1) hide show
  1. audio_transcriber.py +16 -8
audio_transcriber.py CHANGED
@@ -4,23 +4,29 @@ from smolagents import Tool
4
 
5
  class AudioTranscriptionTool(Tool):
6
  name = "audio_transcriber"
7
- description = "Transcribe a given audio file in mp3 or wav format to text using Whisper via Hugging Face API."
8
  inputs = {
9
  "file_path": {
10
  "type": "string",
11
- "description": "Path to the audio file (must be .mp3 or .wav)"
12
  }
13
  }
14
  output_type = "string"
15
 
16
  def __init__(self):
17
  super().__init__()
18
- self.api_url = "https://api-inference.huggingface.co/models/openai/whisper-large"
 
 
 
19
  self.headers = {
20
- "Authorization": f"Bearer {os.getenv('HF_API_TOKEN')}"
21
  }
22
 
23
  def forward(self, file_path: str) -> str:
 
 
 
24
  try:
25
  with open(file_path, "rb") as audio_file:
26
  audio_bytes = audio_file.read()
@@ -31,15 +37,17 @@ class AudioTranscriptionTool(Tool):
31
  data=audio_bytes,
32
  timeout=60
33
  )
 
34
  if response.status_code == 200:
35
  result = response.json()
36
- # The exact key depends on the model; usually 'text' for whisper
37
- transcription = result.get("text", None)
38
  if transcription:
39
  return transcription.strip()
40
  else:
41
- return "Error: No transcription found in the response."
42
  else:
43
- return f"Error transcribing audio: {response.status_code} {response.text}"
 
44
  except Exception as e:
45
  return f"Error transcribing audio: {e}"
 
 
4
 
5
  class AudioTranscriptionTool(Tool):
6
  name = "audio_transcriber"
7
+ description = "Transcribe a given audio file in .mp3 or .wav format using Whisper via Hugging Face API."
8
  inputs = {
9
  "file_path": {
10
  "type": "string",
11
+ "description": "Path to the audio file (.mp3 or .wav)"
12
  }
13
  }
14
  output_type = "string"
15
 
16
  def __init__(self):
17
  super().__init__()
18
+ api_token = os.getenv("HF_API_TOKEN")
19
+ if not api_token:
20
+ raise EnvironmentError("HF_API_TOKEN not found in environment variables.")
21
+ self.api_url = "https://api-inference.huggingface.com/models/openai/whisper-large"
22
  self.headers = {
23
+ "Authorization": f"Bearer {api_token}"
24
  }
25
 
26
  def forward(self, file_path: str) -> str:
27
+ if not file_path.lower().endswith((".mp3", ".wav")):
28
+ return "Error: File must be .mp3 or .wav format."
29
+
30
  try:
31
  with open(file_path, "rb") as audio_file:
32
  audio_bytes = audio_file.read()
 
37
  data=audio_bytes,
38
  timeout=60
39
  )
40
+
41
  if response.status_code == 200:
42
  result = response.json()
43
+ transcription = result.get("text")
 
44
  if transcription:
45
  return transcription.strip()
46
  else:
47
+ return "Error: No transcription found in API response."
48
  else:
49
+ return f"Error transcribing audio: {response.status_code} - {response.text}"
50
+
51
  except Exception as e:
52
  return f"Error transcribing audio: {e}"
53
+