l3xv commited on
Commit
0d95f74
Β·
1 Parent(s): 03efb95

add whisper

Browse files
Files changed (1) hide show
  1. app.py +59 -5
app.py CHANGED
@@ -2,16 +2,70 @@ import os
2
  import gradio as gr
3
  import requests
4
  import inspect
 
5
  import pandas as pd
6
- from smolagents import OpenAIServerModel, DuckDuckGoSearchTool, PythonInterpreterTool, CodeAgent, WikipediaSearchTool, \
7
- SpeechToTextTool
8
  from pathlib import Path
9
  import tempfile
 
10
 
11
  # (Keep Constants as is)
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def download_file_if_any(base_api_url: str, task_id: str) -> str | None:
17
  """
@@ -55,7 +109,7 @@ class BasicAgent:
55
  model=OpenAIServerModel(model_id="gpt-4o"),
56
  tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), SpeechToTextTool()],
57
  add_base_tools=True,
58
- additional_authorized_imports=['pandas','numpy','csv']
59
  )
60
 
61
  print("BasicAgent initialized.")
@@ -72,7 +126,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
72
  and displays the results.
73
  """
74
  # --- Determine HF Space Runtime URL and Repo URL ---
75
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
76
 
77
  if profile:
78
  username= f"{profile.username}"
@@ -242,7 +296,7 @@ if __name__ == "__main__":
242
  print("\n" + "-"*30 + " App Starting " + "-"*30)
243
  # Check for SPACE_HOST and SPACE_ID at startup for information
244
  space_host_startup = os.getenv("SPACE_HOST")
245
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
246
 
247
  if space_host_startup:
248
  print(f"βœ… SPACE_HOST found: {space_host_startup}")
 
2
  import gradio as gr
3
  import requests
4
  import inspect
5
+ import openai
6
  import pandas as pd
7
+ from smolagents import OpenAIServerModel, DuckDuckGoSearchTool, PythonInterpreterTool, CodeAgent, WikipediaSearchTool
 
8
  from pathlib import Path
9
  import tempfile
10
+ from smolagents.tools import PipelineTool
11
 
12
  # (Keep Constants as is)
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
+ class SpeechToTextTool(PipelineTool):
17
+ """
18
+ Transcribes an audio file to text using the OpenAI Whisper API.
19
+ Only local file paths are supported.
20
+ """
21
+ default_checkpoint = "openai/whisper-1" # purely informational here
22
+ description = (
23
+ "This tool sends an audio file to OpenAI Whisper and returns the "
24
+ "transcribed text."
25
+ )
26
+ name = "transcriber"
27
+ inputs = {
28
+ "audio": {
29
+ "type": "string",
30
+ "description": "Absolute or relative path to a local audio file.",
31
+ }
32
+ }
33
+ output_type = "string"
34
+
35
+ # ──────────────────────────────────────────────────────────────────────────
36
+ # Public interface
37
+ # ──────────────────────────────────────────────────────────────────────────
38
+ def __call__(self, audio: str) -> str:
39
+ """
40
+ Convenience wrapper so the tool can be used like a regular function:
41
+ text = SpeechToTextTool()(path_to_audio)
42
+ """
43
+ return self._transcribe(audio)
44
+
45
+ # ──────────────────────────────────────────────────────────────────────────
46
+ # Internal helpers
47
+ # ──────────────────────────────────────────────────────────────────────────
48
+ @staticmethod
49
+ def _transcribe(audio_path: str) -> str:
50
+ # ----- validation ----------------------------------------------------
51
+ if not isinstance(audio_path, str):
52
+ raise TypeError(
53
+ "Parameter 'audio' must be a string containing the file path."
54
+ )
55
+ path = Path(audio_path).expanduser().resolve()
56
+ if not path.is_file():
57
+ raise FileNotFoundError(f"No such audio file: {path}")
58
+
59
+ # ----- API call ------------------------------------------------------
60
+ with path.open("rb") as fp:
61
+ response = openai.audio.transcriptions.create(
62
+ file=fp,
63
+ model="whisper-1", # currently the only Whisper model
64
+ response_format="text" # returns plain text instead of JSON
65
+ )
66
+
67
+ # For response_format="text", `response` is already the raw transcript
68
+ return response
69
 
70
  def download_file_if_any(base_api_url: str, task_id: str) -> str | None:
71
  """
 
109
  model=OpenAIServerModel(model_id="gpt-4o"),
110
  tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), SpeechToTextTool()],
111
  add_base_tools=True,
112
+ additional_authorized_imports=['pandas','numpy','csv','subprocess']
113
  )
114
 
115
  print("BasicAgent initialized.")
 
126
  and displays the results.
127
  """
128
  # --- Determine HF Space Runtime URL and Repo URL ---
129
+ space_id = "l3xv/Final_Assignment_Template"
130
 
131
  if profile:
132
  username= f"{profile.username}"
 
296
  print("\n" + "-"*30 + " App Starting " + "-"*30)
297
  # Check for SPACE_HOST and SPACE_ID at startup for information
298
  space_host_startup = os.getenv("SPACE_HOST")
299
+ space_id_startup = "l3xv/Final_Assignment_Template"
300
 
301
  if space_host_startup:
302
  print(f"βœ… SPACE_HOST found: {space_host_startup}")