Spaces:
Sleeping
Sleeping
add whisper
Browse files
app.py
CHANGED
@@ -2,16 +2,70 @@ import os
|
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
import inspect
|
|
|
5 |
import pandas as pd
|
6 |
-
from smolagents import OpenAIServerModel, DuckDuckGoSearchTool, PythonInterpreterTool, CodeAgent, WikipediaSearchTool
|
7 |
-
SpeechToTextTool
|
8 |
from pathlib import Path
|
9 |
import tempfile
|
|
|
10 |
|
11 |
# (Keep Constants as is)
|
12 |
# --- Constants ---
|
13 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def download_file_if_any(base_api_url: str, task_id: str) -> str | None:
|
17 |
"""
|
@@ -55,7 +109,7 @@ class BasicAgent:
|
|
55 |
model=OpenAIServerModel(model_id="gpt-4o"),
|
56 |
tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), SpeechToTextTool()],
|
57 |
add_base_tools=True,
|
58 |
-
additional_authorized_imports=['pandas','numpy','csv']
|
59 |
)
|
60 |
|
61 |
print("BasicAgent initialized.")
|
@@ -72,7 +126,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
72 |
and displays the results.
|
73 |
"""
|
74 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
75 |
-
space_id =
|
76 |
|
77 |
if profile:
|
78 |
username= f"{profile.username}"
|
@@ -242,7 +296,7 @@ if __name__ == "__main__":
|
|
242 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
243 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
244 |
space_host_startup = os.getenv("SPACE_HOST")
|
245 |
-
space_id_startup =
|
246 |
|
247 |
if space_host_startup:
|
248 |
print(f"β
SPACE_HOST found: {space_host_startup}")
|
|
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
import inspect
|
5 |
+
import openai
|
6 |
import pandas as pd
|
7 |
+
from smolagents import OpenAIServerModel, DuckDuckGoSearchTool, PythonInterpreterTool, CodeAgent, WikipediaSearchTool
|
|
|
8 |
from pathlib import Path
|
9 |
import tempfile
|
10 |
+
from smolagents.tools import PipelineTool
|
11 |
|
12 |
# (Keep Constants as is)
|
13 |
# --- Constants ---
|
14 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
15 |
|
16 |
+
class SpeechToTextTool(PipelineTool):
|
17 |
+
"""
|
18 |
+
Transcribes an audio file to text using the OpenAI Whisper API.
|
19 |
+
Only local file paths are supported.
|
20 |
+
"""
|
21 |
+
default_checkpoint = "openai/whisper-1" # purely informational here
|
22 |
+
description = (
|
23 |
+
"This tool sends an audio file to OpenAI Whisper and returns the "
|
24 |
+
"transcribed text."
|
25 |
+
)
|
26 |
+
name = "transcriber"
|
27 |
+
inputs = {
|
28 |
+
"audio": {
|
29 |
+
"type": "string",
|
30 |
+
"description": "Absolute or relative path to a local audio file.",
|
31 |
+
}
|
32 |
+
}
|
33 |
+
output_type = "string"
|
34 |
+
|
35 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
36 |
+
# Public interface
|
37 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
38 |
+
def __call__(self, audio: str) -> str:
|
39 |
+
"""
|
40 |
+
Convenience wrapper so the tool can be used like a regular function:
|
41 |
+
text = SpeechToTextTool()(path_to_audio)
|
42 |
+
"""
|
43 |
+
return self._transcribe(audio)
|
44 |
+
|
45 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
46 |
+
# Internal helpers
|
47 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
48 |
+
@staticmethod
|
49 |
+
def _transcribe(audio_path: str) -> str:
|
50 |
+
# ----- validation ----------------------------------------------------
|
51 |
+
if not isinstance(audio_path, str):
|
52 |
+
raise TypeError(
|
53 |
+
"Parameter 'audio' must be a string containing the file path."
|
54 |
+
)
|
55 |
+
path = Path(audio_path).expanduser().resolve()
|
56 |
+
if not path.is_file():
|
57 |
+
raise FileNotFoundError(f"No such audio file: {path}")
|
58 |
+
|
59 |
+
# ----- API call ------------------------------------------------------
|
60 |
+
with path.open("rb") as fp:
|
61 |
+
response = openai.audio.transcriptions.create(
|
62 |
+
file=fp,
|
63 |
+
model="whisper-1", # currently the only Whisper model
|
64 |
+
response_format="text" # returns plain text instead of JSON
|
65 |
+
)
|
66 |
+
|
67 |
+
# For response_format="text", `response` is already the raw transcript
|
68 |
+
return response
|
69 |
|
70 |
def download_file_if_any(base_api_url: str, task_id: str) -> str | None:
|
71 |
"""
|
|
|
109 |
model=OpenAIServerModel(model_id="gpt-4o"),
|
110 |
tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), SpeechToTextTool()],
|
111 |
add_base_tools=True,
|
112 |
+
additional_authorized_imports=['pandas','numpy','csv','subprocess']
|
113 |
)
|
114 |
|
115 |
print("BasicAgent initialized.")
|
|
|
126 |
and displays the results.
|
127 |
"""
|
128 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
129 |
+
space_id = "l3xv/Final_Assignment_Template"
|
130 |
|
131 |
if profile:
|
132 |
username= f"{profile.username}"
|
|
|
296 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
297 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
298 |
space_host_startup = os.getenv("SPACE_HOST")
|
299 |
+
space_id_startup = "l3xv/Final_Assignment_Template"
|
300 |
|
301 |
if space_host_startup:
|
302 |
print(f"β
SPACE_HOST found: {space_host_startup}")
|