Spaces:
Sleeping
Sleeping
IngoTB303
commited on
Commit
·
9e58d03
1
Parent(s):
4046252
Enhance agent functionality: add new agents for audio, visual, and Python tasks; update question fetching to handle attachments; improve tool initialization and requirements.
Browse files- .gitignore +2 -0
- TODOs.txt +6 -3
- agent.py +47 -9
- agent_tools.py +44 -69
- app-test.py +45 -36
- requirements.txt +8 -1
.gitignore
CHANGED
@@ -94,3 +94,5 @@ Desktop.ini
|
|
94 |
app_tokens.py
|
95 |
questions.json
|
96 |
answers.json
|
|
|
|
|
|
94 |
app_tokens.py
|
95 |
questions.json
|
96 |
answers.json
|
97 |
+
cache/
|
98 |
+
cache/*.*
|
TODOs.txt
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
Notes:
|
2 |
-
-
|
3 |
-
-
|
4 |
-
-
|
5 |
- checkout https://huggingface.co/blog/open-deep-research#using-a-codeagent and https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research
|
|
|
|
|
|
|
|
1 |
Notes:
|
2 |
+
- checkout different quantized models locally, e. g. Gemini-flash 2.5, Deepseek-R1
|
3 |
+
- update agent.run() method, so it attaches the attachments via additional_args or images parameter
|
4 |
+
see https://smolagents.org/docs/tools-of-smolagents-in-depth-guide/ and https://github.com/huggingface/smolagents/blob/main/src/smolagents/agents.py
|
5 |
- checkout https://huggingface.co/blog/open-deep-research#using-a-codeagent and https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research
|
6 |
+
- add more tools:
|
7 |
+
- video q&a: https://huggingface.co/spaces/lixin4ever/VideoLLaMA2,
|
8 |
+
|
agent.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import agent_tools
|
2 |
-
from smolagents import CodeAgent,
|
3 |
import app_tokens
|
4 |
|
5 |
model = AzureOpenAIServerModel(
|
@@ -13,22 +13,60 @@ class BasicAgent:
|
|
13 |
def __init__(self):
|
14 |
self.web_agent = CodeAgent(
|
15 |
model=model,
|
16 |
-
tools=[agent_tools.
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
max_steps=8,
|
18 |
name="web_agent",
|
19 |
-
description="Runs web searches for you."
|
|
|
20 |
)
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
self.manager_agent = CodeAgent(
|
23 |
model=model,
|
24 |
tools=[],
|
25 |
-
managed_agents=[self.web_agent],
|
26 |
-
|
27 |
-
planning_interval=5,
|
28 |
verbosity_level=2,
|
29 |
max_steps=12,
|
|
|
30 |
)
|
31 |
|
32 |
-
def forward(self, question: str) -> str:
|
33 |
-
|
|
|
|
|
|
|
34 |
return result
|
|
|
1 |
import agent_tools
|
2 |
+
from smolagents import CodeAgent, AzureOpenAIServerModel
|
3 |
import app_tokens
|
4 |
|
5 |
model = AzureOpenAIServerModel(
|
|
|
13 |
def __init__(self):
|
14 |
self.web_agent = CodeAgent(
|
15 |
model=model,
|
16 |
+
tools=[#agent_tools.tavily_search,
|
17 |
+
#agent_tools.duck_search,
|
18 |
+
agent_tools.visit_page,
|
19 |
+
agent_tools.wiki_search,
|
20 |
+
agent_tools.google_search,
|
21 |
+
agent_tools.final_answer
|
22 |
+
],
|
23 |
max_steps=8,
|
24 |
name="web_agent",
|
25 |
+
description="Runs web searches for you. Can use Google, DuckDuckGo and Wikipedia for search.",
|
26 |
+
add_base_tools = True
|
27 |
)
|
28 |
+
|
29 |
+
self.audio_agent = CodeAgent(
|
30 |
+
model=model,
|
31 |
+
tools=[agent_tools.speech_to_text_tool, agent_tools.final_answer],
|
32 |
+
max_steps=4,
|
33 |
+
name="audio_agent",
|
34 |
+
description="This agent can help you with converting audio to text.",
|
35 |
+
add_base_tools = True
|
36 |
+
)
|
37 |
+
|
38 |
+
self.py_agent = CodeAgent(
|
39 |
+
model=model,
|
40 |
+
tools=[agent_tools.do_python, agent_tools.final_answer],
|
41 |
+
additional_authorized_imports=["json","pandas","numpy", "regex"],
|
42 |
+
max_steps=8,
|
43 |
+
name="python_code_agent",
|
44 |
+
description="This agent can help you with executing and validating python code.",
|
45 |
+
add_base_tools = True
|
46 |
+
)
|
47 |
+
|
48 |
+
self.visual_agent = CodeAgent(
|
49 |
+
model=model,
|
50 |
+
tools=[agent_tools.visual_qa_tool, agent_tools.final_answer],
|
51 |
+
max_steps=4,
|
52 |
+
name="visual_qa_agent",
|
53 |
+
description="This agent can help you with answering questions about pictures.",
|
54 |
+
add_base_tools = True
|
55 |
+
)
|
56 |
+
|
57 |
self.manager_agent = CodeAgent(
|
58 |
model=model,
|
59 |
tools=[],
|
60 |
+
managed_agents=[self.web_agent, self.audio_agent, self.py_agent, self.visual_agent],
|
61 |
+
planning_interval=8,
|
|
|
62 |
verbosity_level=2,
|
63 |
max_steps=12,
|
64 |
+
add_base_tools = True
|
65 |
)
|
66 |
|
67 |
+
def forward(self, question: str, attachment: str = None) -> str:
|
68 |
+
if attachment:
|
69 |
+
result = self.manager_agent.run(question, additional_args={"attachment": attachment})
|
70 |
+
else:
|
71 |
+
result = self.manager_agent.run(question)
|
72 |
return result
|
agent_tools.py
CHANGED
@@ -1,71 +1,46 @@
|
|
1 |
from smolagents import tool, Tool
|
2 |
from tavily import TavilyClient
|
3 |
-
import
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
from smolagents.utils import truncate_content
|
49 |
-
except ImportError as e:
|
50 |
-
raise ImportError(
|
51 |
-
"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
|
52 |
-
) from e
|
53 |
-
try:
|
54 |
-
if proxy:
|
55 |
-
response = requests.get(url, timeout=20, proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
|
56 |
-
else:
|
57 |
-
response = requests.get(url, timeout=20)
|
58 |
-
response.raise_for_status() # Raise an exception for bad status codes
|
59 |
-
markdown_content = markdownify(response.text).strip()
|
60 |
-
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
61 |
-
return truncate_content(markdown_content, 40000)
|
62 |
-
|
63 |
-
except requests.exceptions.Timeout:
|
64 |
-
return "The request timed out. Please try again later or check the URL."
|
65 |
-
except RequestException as e:
|
66 |
-
return f"Error fetching the webpage: {str(e)}"
|
67 |
-
except Exception as e:
|
68 |
-
return f"An unexpected error occurred: {str(e)}"
|
69 |
-
|
70 |
-
# test = VisitWebpageTool()
|
71 |
-
# print(test.forward("https://www.wikipedia.de", proxy=True))
|
|
|
1 |
from smolagents import tool, Tool
|
2 |
from tavily import TavilyClient
|
3 |
+
from smolagents import DuckDuckGoSearchTool, GoogleSearchTool, VisitWebpageTool, WikipediaSearchTool, PythonInterpreterTool, FinalAnswerTool
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
import os
|
6 |
+
|
7 |
+
load_dotenv() # take environment variables
|
8 |
+
|
9 |
+
# init tools
|
10 |
+
duck_search = DuckDuckGoSearchTool()
|
11 |
+
google_search = GoogleSearchTool()
|
12 |
+
visit_page = VisitWebpageTool()
|
13 |
+
wiki_search = WikipediaSearchTool()
|
14 |
+
do_python = PythonInterpreterTool()
|
15 |
+
final_answer = FinalAnswerTool()
|
16 |
+
tavily_search = TavilyClient()
|
17 |
+
speech_to_text_tool = Tool.from_space("hf-audio/whisper-large-v3-turbo",
|
18 |
+
name="speech_to_text_tool",
|
19 |
+
description="""Converts audio to text by providing a file or url.
|
20 |
+
Use the with the command speed_to_text_tool(filename).
|
21 |
+
Example: 'speech_to_text_tool("cache\\audio.mp3'""",
|
22 |
+
api_name="/predict")
|
23 |
+
visual_qa_tool = Tool.from_space("sitammeur/PicQ",
|
24 |
+
name="visual_qa_tool",
|
25 |
+
description="""Can answer question about a provided image.
|
26 |
+
Use it with visual_qa_tool(question=<question>, image=<image filename>).
|
27 |
+
Example visual_qa_tool(question='How many items are in the image?', image='cache\\image.png').""",
|
28 |
+
api_name="/predict")
|
29 |
+
|
30 |
+
# image_generation_tool = Tool.from_space(
|
31 |
+
# "black-forest-labs/FLUX.1-schnell",
|
32 |
+
# name="image_generator",
|
33 |
+
# description="Generate an image from a prompt"
|
34 |
+
# )
|
35 |
+
# image = image_generation_tool("A sunny beach")
|
36 |
+
# print(image)
|
37 |
+
|
38 |
+
## Testing the tools
|
39 |
+
# print(duck_search.forward("smolagents")) # connect error, does not take proxy
|
40 |
+
# print(google_search.forward("smolagents"))
|
41 |
+
# print(visit_page.forward("https://www.wikipedia.de"))
|
42 |
+
# print(wiki_search.forward("Python_(programming_language)"))
|
43 |
+
# print(do_python('print("hello world")'))
|
44 |
+
# print(tavily_search.search("smolagents"))
|
45 |
+
# print(speech_to_text_tool("cache\\1f975693-876d-457b-a649-393859e79bf3.mp3"))
|
46 |
+
# print(visual_qa_tool(question="Name all black and white figures with each coordinates, e. g. black king on g8", image="cache\\cca530fc-4052-43b2-b130-b30968d8aa44.png"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app-test.py
CHANGED
@@ -2,7 +2,7 @@ import requests
|
|
2 |
from huggingface_hub import login
|
3 |
import agent
|
4 |
import json
|
5 |
-
import
|
6 |
|
7 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
8 |
SPACE_ID = "https://huggingface.co/spaces/IngoTB303/Final_Assignment_Template/tree/main"
|
@@ -13,35 +13,37 @@ submit_url = f"{api_url}/submit"
|
|
13 |
|
14 |
agent = agent.BasicAgent()
|
15 |
|
16 |
-
def fetch_questions(
|
17 |
"""Fetch questions from the API endpoint."""
|
18 |
print(f"Fetching questions from: {questions_url}")
|
|
|
19 |
try:
|
20 |
-
|
21 |
-
response = requests.get(questions_url, timeout=30, proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
|
22 |
-
else:
|
23 |
-
response = requests.get(questions_url, timeout=30)
|
24 |
response.raise_for_status()
|
25 |
questions_data = response.json()
|
26 |
if not questions_data:
|
27 |
return None
|
28 |
-
|
|
|
29 |
# Fetch attachments for questions with file_name
|
30 |
for question in questions_data:
|
31 |
file_name = question.get("file_name", "")
|
32 |
task_id = question.get("task_id")
|
33 |
if file_name and task_id:
|
34 |
try:
|
35 |
-
|
36 |
-
att_response = requests.get(f"{attachments_url}{task_id}", timeout=15, proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
|
37 |
-
else:
|
38 |
-
att_response = requests.get(f"{attachments_url}{task_id}", timeout=15)
|
39 |
att_response.raise_for_status()
|
40 |
-
#
|
41 |
-
|
|
|
|
|
|
|
|
|
42 |
except Exception as e:
|
43 |
print(f"Error fetching attachment for task {task_id}: {e}")
|
44 |
-
question["
|
|
|
|
|
45 |
return questions_data
|
46 |
except Exception as e:
|
47 |
print(f"Error fetching questions: {e}")
|
@@ -56,15 +58,15 @@ def run_agent(questions_data):
|
|
56 |
for item in questions_data:
|
57 |
task_id = item.get("task_id")
|
58 |
question_text = item.get("question", "")
|
59 |
-
|
60 |
-
# Concatenate question and attachment_b64 if present
|
61 |
-
if attachment_b64:
|
62 |
-
question_text = f"{question_text}\n\n[ATTACHMENT:]\n{attachment_b64}"
|
63 |
if not task_id or question_text is None:
|
64 |
print(f"Skipping item with missing task_id or question: {item}")
|
65 |
continue
|
66 |
try:
|
67 |
-
|
|
|
|
|
|
|
68 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
69 |
except Exception as e:
|
70 |
print(f"Error running agent on task {task_id}: {e}")
|
@@ -116,24 +118,31 @@ def submit_answers_to_hf(username, agent_code, answers_payload):
|
|
116 |
print(status_message)
|
117 |
return status_message
|
118 |
|
119 |
-
#
|
120 |
-
|
121 |
|
122 |
-
#
|
123 |
-
questions = load_questions("questions.json")
|
124 |
-
|
125 |
-
# # test print the questions to verify, if attachments were loaded
|
126 |
# for question in questions:
|
127 |
-
# print(question["question"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
-
#
|
130 |
-
# answers =
|
131 |
-
#
|
132 |
-
#
|
133 |
-
# with open("answers.json", "w", encoding="utf-8") as f:
|
134 |
-
# json.dump(answers, f, ensure_ascii=False, indent=2)
|
135 |
|
136 |
-
#
|
137 |
-
|
138 |
-
|
139 |
-
print(
|
|
|
|
|
|
2 |
from huggingface_hub import login
|
3 |
import agent
|
4 |
import json
|
5 |
+
import os
|
6 |
|
7 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
8 |
SPACE_ID = "https://huggingface.co/spaces/IngoTB303/Final_Assignment_Template/tree/main"
|
|
|
13 |
|
14 |
agent = agent.BasicAgent()
|
15 |
|
16 |
+
def fetch_questions():
|
17 |
"""Fetch questions from the API endpoint."""
|
18 |
print(f"Fetching questions from: {questions_url}")
|
19 |
+
questions_data = None
|
20 |
try:
|
21 |
+
response = requests.get(questions_url, timeout=30)
|
|
|
|
|
|
|
22 |
response.raise_for_status()
|
23 |
questions_data = response.json()
|
24 |
if not questions_data:
|
25 |
return None
|
26 |
+
# Ensure cache directory exists
|
27 |
+
os.makedirs("cache", exist_ok=True)
|
28 |
# Fetch attachments for questions with file_name
|
29 |
for question in questions_data:
|
30 |
file_name = question.get("file_name", "")
|
31 |
task_id = question.get("task_id")
|
32 |
if file_name and task_id:
|
33 |
try:
|
34 |
+
att_response = requests.get(f"{attachments_url}{task_id}", timeout=15)
|
|
|
|
|
|
|
35 |
att_response.raise_for_status()
|
36 |
+
# Save binary content to file in cache folder
|
37 |
+
file_path = os.path.join("cache", file_name)
|
38 |
+
with open(file_path, "wb") as f:
|
39 |
+
f.write(att_response.content)
|
40 |
+
# Store the local file path in the question dict with double backslashes
|
41 |
+
question["attachment_file"] = file_path
|
42 |
except Exception as e:
|
43 |
print(f"Error fetching attachment for task {task_id}: {e}")
|
44 |
+
question["attachment_file"] = None
|
45 |
+
else:
|
46 |
+
question["attachment_file"] = ""
|
47 |
return questions_data
|
48 |
except Exception as e:
|
49 |
print(f"Error fetching questions: {e}")
|
|
|
58 |
for item in questions_data:
|
59 |
task_id = item.get("task_id")
|
60 |
question_text = item.get("question", "")
|
61 |
+
attachment_file = item.get("attachment_file", None)
|
|
|
|
|
|
|
62 |
if not task_id or question_text is None:
|
63 |
print(f"Skipping item with missing task_id or question: {item}")
|
64 |
continue
|
65 |
try:
|
66 |
+
if attachment_file not in (None, ""):
|
67 |
+
submitted_answer = agent.forward(question=question_text, attachment=attachment_file)
|
68 |
+
else:
|
69 |
+
submitted_answer = agent.forward(question=question_text)
|
70 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
71 |
except Exception as e:
|
72 |
print(f"Error running agent on task {task_id}: {e}")
|
|
|
118 |
print(status_message)
|
119 |
return status_message
|
120 |
|
121 |
+
# --- fetch questions from server ---
|
122 |
+
questions = fetch_questions()
|
123 |
|
124 |
+
# --- load cached questions ---
|
125 |
+
# questions = load_questions("questions.json")
|
126 |
+
# test print the questions to verify, if attachments were loaded
|
|
|
127 |
# for question in questions:
|
128 |
+
# print(question["question"])
|
129 |
+
# print(question["attachment_file"])
|
130 |
+
|
131 |
+
# --- generate answers ---
|
132 |
+
answers = run_agent(questions)
|
133 |
+
# save answers to publish them later
|
134 |
+
if answers:
|
135 |
+
with open("answers.json", "w", encoding="utf-8") as f:
|
136 |
+
json.dump(answers, f, ensure_ascii=False, indent=2)
|
137 |
|
138 |
+
# --- submit results to Huggingface ---
|
139 |
+
# answers = load_answers("answers.json"))
|
140 |
+
# assignment_results = submit_answers_to_hf("IngoTB303", SPACE_ID, answers)
|
141 |
+
# print(assignment_results)
|
|
|
|
|
142 |
|
143 |
+
# # Find the question with the specified task_id
|
144 |
+
# question_text = next((q["question"] for q in questions if q.get("task_id") == "7bd855d8-463d-4ed5-93ca-5fe35145f733"), None)
|
145 |
+
# attachment_file = next((q["attachment_file"] for q in questions if q.get("task_id") == "7bd855d8-463d-4ed5-93ca-5fe35145f733"), None)
|
146 |
+
# print(question_text)
|
147 |
+
# print(attachment_file)
|
148 |
+
# print(agent.forward(question=question_text, attachment=attachment_file))
|
requirements.txt
CHANGED
@@ -1,7 +1,14 @@
|
|
1 |
gradio
|
|
|
2 |
requests
|
3 |
tavily-python
|
4 |
smolagents
|
|
|
|
|
5 |
regex
|
6 |
pandas
|
7 |
-
numpy
|
|
|
|
|
|
|
|
|
|
1 |
gradio
|
2 |
+
gradio_client
|
3 |
requests
|
4 |
tavily-python
|
5 |
smolagents
|
6 |
+
smolagents[transformers]
|
7 |
+
smolagents[audio]
|
8 |
regex
|
9 |
pandas
|
10 |
+
numpy
|
11 |
+
wikipedia-api
|
12 |
+
duckduckgo_search
|
13 |
+
markdownify
|
14 |
+
python-dotenv
|