IngoTB303 commited on
Commit
9e58d03
·
1 Parent(s): 4046252

Enhance agent functionality: add new agents for audio, visual, and Python tasks; update question fetching to handle attachments; improve tool initialization and requirements.

Browse files
Files changed (6) hide show
  1. .gitignore +2 -0
  2. TODOs.txt +6 -3
  3. agent.py +47 -9
  4. agent_tools.py +44 -69
  5. app-test.py +45 -36
  6. requirements.txt +8 -1
.gitignore CHANGED
@@ -94,3 +94,5 @@ Desktop.ini
94
  app_tokens.py
95
  questions.json
96
  answers.json
 
 
 
94
  app_tokens.py
95
  questions.json
96
  answers.json
97
+ cache/
98
+ cache/*.*
TODOs.txt CHANGED
@@ -1,5 +1,8 @@
1
  Notes:
2
- - prompt tuning
3
- - check existing classes for tools: video/image capabilities, audio transcription, etc.
4
- - checkout different quantized models locally, e. g. Gemini-flash 2.5, Deepseek-R1
5
  - checkout https://huggingface.co/blog/open-deep-research#using-a-codeagent and https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research
 
 
 
 
1
  Notes:
2
+ - checkout different quantized models locally, e. g. Gemini-flash 2.5, Deepseek-R1
3
+ - update agent.run() method, so it attaches the attachments via additional_args or images parameter
4
+ see https://smolagents.org/docs/tools-of-smolagents-in-depth-guide/ and https://github.com/huggingface/smolagents/blob/main/src/smolagents/agents.py
5
  - checkout https://huggingface.co/blog/open-deep-research#using-a-codeagent and https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research
6
+ - add more tools:
7
+ - video q&a: https://huggingface.co/spaces/lixin4ever/VideoLLaMA2,
8
+
agent.py CHANGED
@@ -1,5 +1,5 @@
1
  import agent_tools
2
- from smolagents import CodeAgent, FinalAnswerTool, DuckDuckGoSearchTool, AzureOpenAIServerModel
3
  import app_tokens
4
 
5
  model = AzureOpenAIServerModel(
@@ -13,22 +13,60 @@ class BasicAgent:
13
  def __init__(self):
14
  self.web_agent = CodeAgent(
15
  model=model,
16
- tools=[agent_tools.VisitWebpageTool(), FinalAnswerTool(), DuckDuckGoSearchTool()],
 
 
 
 
 
 
17
  max_steps=8,
18
  name="web_agent",
19
- description="Runs web searches for you."
 
20
  )
21
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  self.manager_agent = CodeAgent(
23
  model=model,
24
  tools=[],
25
- managed_agents=[self.web_agent],
26
- additional_authorized_imports=["json","pandas","numpy", "regex"],
27
- planning_interval=5,
28
  verbosity_level=2,
29
  max_steps=12,
 
30
  )
31
 
32
- def forward(self, question: str) -> str:
33
- result = self.manager_agent.run(question)
 
 
 
34
  return result
 
1
  import agent_tools
2
+ from smolagents import CodeAgent, AzureOpenAIServerModel
3
  import app_tokens
4
 
5
  model = AzureOpenAIServerModel(
 
13
  def __init__(self):
14
  self.web_agent = CodeAgent(
15
  model=model,
16
+ tools=[#agent_tools.tavily_search,
17
+ #agent_tools.duck_search,
18
+ agent_tools.visit_page,
19
+ agent_tools.wiki_search,
20
+ agent_tools.google_search,
21
+ agent_tools.final_answer
22
+ ],
23
  max_steps=8,
24
  name="web_agent",
25
+ description="Runs web searches for you. Can use Google, DuckDuckGo and Wikipedia for search.",
26
+ add_base_tools = True
27
  )
28
+
29
+ self.audio_agent = CodeAgent(
30
+ model=model,
31
+ tools=[agent_tools.speech_to_text_tool, agent_tools.final_answer],
32
+ max_steps=4,
33
+ name="audio_agent",
34
+ description="This agent can help you with converting audio to text.",
35
+ add_base_tools = True
36
+ )
37
+
38
+ self.py_agent = CodeAgent(
39
+ model=model,
40
+ tools=[agent_tools.do_python, agent_tools.final_answer],
41
+ additional_authorized_imports=["json","pandas","numpy", "regex"],
42
+ max_steps=8,
43
+ name="python_code_agent",
44
+ description="This agent can help you with executing and validating python code.",
45
+ add_base_tools = True
46
+ )
47
+
48
+ self.visual_agent = CodeAgent(
49
+ model=model,
50
+ tools=[agent_tools.visual_qa_tool, agent_tools.final_answer],
51
+ max_steps=4,
52
+ name="visual_qa_agent",
53
+ description="This agent can help you with answering questions about pictures.",
54
+ add_base_tools = True
55
+ )
56
+
57
  self.manager_agent = CodeAgent(
58
  model=model,
59
  tools=[],
60
+ managed_agents=[self.web_agent, self.audio_agent, self.py_agent, self.visual_agent],
61
+ planning_interval=8,
 
62
  verbosity_level=2,
63
  max_steps=12,
64
+ add_base_tools = True
65
  )
66
 
67
+ def forward(self, question: str, attachment: str = None) -> str:
68
+ if attachment:
69
+ result = self.manager_agent.run(question, additional_args={"attachment": attachment})
70
+ else:
71
+ result = self.manager_agent.run(question)
72
  return result
agent_tools.py CHANGED
@@ -1,71 +1,46 @@
1
  from smolagents import tool, Tool
2
  from tavily import TavilyClient
3
- import app_tokens
4
-
5
- @tool
6
- def web_search(query: str, proxy: bool = False) -> str:
7
- """Searches the web for your query.
8
-
9
- Args:
10
- query: Your query.
11
- proxy: An optional boolean parameter, if a local proxy should be used or not.
12
-
13
- """
14
- # tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
15
- if proxy:
16
- tavily_client = TavilyClient(api_key=app_tokens.get_tavily_api_key(), proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
17
- else:
18
- tavily_client = TavilyClient(api_key=app_tokens.get_tavily_api_key())
19
- response = tavily_client.search(query)
20
- return str(response["results"])
21
-
22
- class VisitWebpageTool(Tool):
23
- name = "visit_webpage"
24
- description = (
25
- "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
26
- )
27
- inputs = {
28
- "url": {
29
- "type": "string",
30
- "description": "The url of the webpage to visit.",
31
- },
32
- "proxy": {
33
- "type": "boolean",
34
- "description": "An optional boolean parameter, if a local proxy should be used or not. Should be True, if the request timed out.",
35
- "nullable": "True",
36
- },
37
- }
38
- output_type = "string"
39
-
40
- def forward(self, url: str, proxy: bool = True) -> str:
41
- try:
42
- import re
43
-
44
- import requests
45
- from markdownify import markdownify
46
- from requests.exceptions import RequestException
47
-
48
- from smolagents.utils import truncate_content
49
- except ImportError as e:
50
- raise ImportError(
51
- "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
52
- ) from e
53
- try:
54
- if proxy:
55
- response = requests.get(url, timeout=20, proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
56
- else:
57
- response = requests.get(url, timeout=20)
58
- response.raise_for_status() # Raise an exception for bad status codes
59
- markdown_content = markdownify(response.text).strip()
60
- markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
61
- return truncate_content(markdown_content, 40000)
62
-
63
- except requests.exceptions.Timeout:
64
- return "The request timed out. Please try again later or check the URL."
65
- except RequestException as e:
66
- return f"Error fetching the webpage: {str(e)}"
67
- except Exception as e:
68
- return f"An unexpected error occurred: {str(e)}"
69
-
70
- # test = VisitWebpageTool()
71
- # print(test.forward("https://www.wikipedia.de", proxy=True))
 
1
  from smolagents import tool, Tool
2
  from tavily import TavilyClient
3
+ from smolagents import DuckDuckGoSearchTool, GoogleSearchTool, VisitWebpageTool, WikipediaSearchTool, PythonInterpreterTool, FinalAnswerTool
4
+ from dotenv import load_dotenv
5
+ import os
6
+
7
+ load_dotenv() # take environment variables
8
+
9
+ # init tools
10
+ duck_search = DuckDuckGoSearchTool()
11
+ google_search = GoogleSearchTool()
12
+ visit_page = VisitWebpageTool()
13
+ wiki_search = WikipediaSearchTool()
14
+ do_python = PythonInterpreterTool()
15
+ final_answer = FinalAnswerTool()
16
+ tavily_search = TavilyClient()
17
+ speech_to_text_tool = Tool.from_space("hf-audio/whisper-large-v3-turbo",
18
+ name="speech_to_text_tool",
19
+ description="""Converts audio to text by providing a file or url.
20
+ Use the with the command speed_to_text_tool(filename).
21
+ Example: 'speech_to_text_tool("cache\\audio.mp3'""",
22
+ api_name="/predict")
23
+ visual_qa_tool = Tool.from_space("sitammeur/PicQ",
24
+ name="visual_qa_tool",
25
+ description="""Can answer question about a provided image.
26
+ Use it with visual_qa_tool(question=<question>, image=<image filename>).
27
+ Example visual_qa_tool(question='How many items are in the image?', image='cache\\image.png').""",
28
+ api_name="/predict")
29
+
30
+ # image_generation_tool = Tool.from_space(
31
+ # "black-forest-labs/FLUX.1-schnell",
32
+ # name="image_generator",
33
+ # description="Generate an image from a prompt"
34
+ # )
35
+ # image = image_generation_tool("A sunny beach")
36
+ # print(image)
37
+
38
+ ## Testing the tools
39
+ # print(duck_search.forward("smolagents")) # connect error, does not take proxy
40
+ # print(google_search.forward("smolagents"))
41
+ # print(visit_page.forward("https://www.wikipedia.de"))
42
+ # print(wiki_search.forward("Python_(programming_language)"))
43
+ # print(do_python('print("hello world")'))
44
+ # print(tavily_search.search("smolagents"))
45
+ # print(speech_to_text_tool("cache\\1f975693-876d-457b-a649-393859e79bf3.mp3"))
46
+ # print(visual_qa_tool(question="Name all black and white figures with each coordinates, e. g. black king on g8", image="cache\\cca530fc-4052-43b2-b130-b30968d8aa44.png"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app-test.py CHANGED
@@ -2,7 +2,7 @@ import requests
2
  from huggingface_hub import login
3
  import agent
4
  import json
5
- import base64
6
 
7
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
8
  SPACE_ID = "https://huggingface.co/spaces/IngoTB303/Final_Assignment_Template/tree/main"
@@ -13,35 +13,37 @@ submit_url = f"{api_url}/submit"
13
 
14
  agent = agent.BasicAgent()
15
 
16
- def fetch_questions(proxy: bool = True):
17
  """Fetch questions from the API endpoint."""
18
  print(f"Fetching questions from: {questions_url}")
 
19
  try:
20
- if proxy:
21
- response = requests.get(questions_url, timeout=30, proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
22
- else:
23
- response = requests.get(questions_url, timeout=30)
24
  response.raise_for_status()
25
  questions_data = response.json()
26
  if not questions_data:
27
  return None
28
-
 
29
  # Fetch attachments for questions with file_name
30
  for question in questions_data:
31
  file_name = question.get("file_name", "")
32
  task_id = question.get("task_id")
33
  if file_name and task_id:
34
  try:
35
- if proxy:
36
- att_response = requests.get(f"{attachments_url}{task_id}", timeout=15, proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
37
- else:
38
- att_response = requests.get(f"{attachments_url}{task_id}", timeout=15)
39
  att_response.raise_for_status()
40
- # Encode binary content to base64 string
41
- question["attachment_b64"] = base64.b64encode(att_response.content).decode("utf-8")
 
 
 
 
42
  except Exception as e:
43
  print(f"Error fetching attachment for task {task_id}: {e}")
44
- question["attachment_b64"] = None
 
 
45
  return questions_data
46
  except Exception as e:
47
  print(f"Error fetching questions: {e}")
@@ -56,15 +58,15 @@ def run_agent(questions_data):
56
  for item in questions_data:
57
  task_id = item.get("task_id")
58
  question_text = item.get("question", "")
59
- attachment_b64 = item.get("attachment_b64", "")
60
- # Concatenate question and attachment_b64 if present
61
- if attachment_b64:
62
- question_text = f"{question_text}\n\n[ATTACHMENT:]\n{attachment_b64}"
63
  if not task_id or question_text is None:
64
  print(f"Skipping item with missing task_id or question: {item}")
65
  continue
66
  try:
67
- submitted_answer = agent.forward(question=question_text)
 
 
 
68
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
69
  except Exception as e:
70
  print(f"Error running agent on task {task_id}: {e}")
@@ -116,24 +118,31 @@ def submit_answers_to_hf(username, agent_code, answers_payload):
116
  print(status_message)
117
  return status_message
118
 
119
- # # for use without proxy
120
- # questions = fetch_questions()
121
 
122
- # for use with proxy, after questions have been fetched
123
- questions = load_questions("questions.json")
124
-
125
- # # test print the questions to verify, if attachments were loaded
126
  # for question in questions:
127
- # print(question["question"],"\n")
 
 
 
 
 
 
 
 
128
 
129
- # # for use with proxy
130
- # answers = run_agent(questions)
131
- # # save answers to publish them later without use of proxy
132
- # if answers:
133
- # with open("answers.json", "w", encoding="utf-8") as f:
134
- # json.dump(answers, f, ensure_ascii=False, indent=2)
135
 
136
- # submit results to Huggingface
137
- answers = load_answers("answers.json")
138
- assignment_results = submit_answers_to_hf("IngoTB303", SPACE_ID, answers)
139
- print(assignment_results)
 
 
 
2
  from huggingface_hub import login
3
  import agent
4
  import json
5
+ import os
6
 
7
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
8
  SPACE_ID = "https://huggingface.co/spaces/IngoTB303/Final_Assignment_Template/tree/main"
 
13
 
14
  agent = agent.BasicAgent()
15
 
16
+ def fetch_questions():
17
  """Fetch questions from the API endpoint."""
18
  print(f"Fetching questions from: {questions_url}")
19
+ questions_data = None
20
  try:
21
+ response = requests.get(questions_url, timeout=30)
 
 
 
22
  response.raise_for_status()
23
  questions_data = response.json()
24
  if not questions_data:
25
  return None
26
+ # Ensure cache directory exists
27
+ os.makedirs("cache", exist_ok=True)
28
  # Fetch attachments for questions with file_name
29
  for question in questions_data:
30
  file_name = question.get("file_name", "")
31
  task_id = question.get("task_id")
32
  if file_name and task_id:
33
  try:
34
+ att_response = requests.get(f"{attachments_url}{task_id}", timeout=15)
 
 
 
35
  att_response.raise_for_status()
36
+ # Save binary content to file in cache folder
37
+ file_path = os.path.join("cache", file_name)
38
+ with open(file_path, "wb") as f:
39
+ f.write(att_response.content)
40
+ # Store the local file path in the question dict with double backslashes
41
+ question["attachment_file"] = file_path
42
  except Exception as e:
43
  print(f"Error fetching attachment for task {task_id}: {e}")
44
+ question["attachment_file"] = None
45
+ else:
46
+ question["attachment_file"] = ""
47
  return questions_data
48
  except Exception as e:
49
  print(f"Error fetching questions: {e}")
 
58
  for item in questions_data:
59
  task_id = item.get("task_id")
60
  question_text = item.get("question", "")
61
+ attachment_file = item.get("attachment_file", None)
 
 
 
62
  if not task_id or question_text is None:
63
  print(f"Skipping item with missing task_id or question: {item}")
64
  continue
65
  try:
66
+ if attachment_file not in (None, ""):
67
+ submitted_answer = agent.forward(question=question_text, attachment=attachment_file)
68
+ else:
69
+ submitted_answer = agent.forward(question=question_text)
70
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
71
  except Exception as e:
72
  print(f"Error running agent on task {task_id}: {e}")
 
118
  print(status_message)
119
  return status_message
120
 
121
+ # --- fetch questions from server ---
122
+ questions = fetch_questions()
123
 
124
+ # --- load cached questions ---
125
+ # questions = load_questions("questions.json")
126
+ # test print the questions to verify, if attachments were loaded
 
127
  # for question in questions:
128
+ # print(question["question"])
129
+ # print(question["attachment_file"])
130
+
131
+ # --- generate answers ---
132
+ answers = run_agent(questions)
133
+ # save answers to publish them later
134
+ if answers:
135
+ with open("answers.json", "w", encoding="utf-8") as f:
136
+ json.dump(answers, f, ensure_ascii=False, indent=2)
137
 
138
+ # --- submit results to Huggingface ---
139
+ # answers = load_answers("answers.json"))
140
+ # assignment_results = submit_answers_to_hf("IngoTB303", SPACE_ID, answers)
141
+ # print(assignment_results)
 
 
142
 
143
+ # # Find the question with the specified task_id
144
+ # question_text = next((q["question"] for q in questions if q.get("task_id") == "7bd855d8-463d-4ed5-93ca-5fe35145f733"), None)
145
+ # attachment_file = next((q["attachment_file"] for q in questions if q.get("task_id") == "7bd855d8-463d-4ed5-93ca-5fe35145f733"), None)
146
+ # print(question_text)
147
+ # print(attachment_file)
148
+ # print(agent.forward(question=question_text, attachment=attachment_file))
requirements.txt CHANGED
@@ -1,7 +1,14 @@
1
  gradio
 
2
  requests
3
  tavily-python
4
  smolagents
 
 
5
  regex
6
  pandas
7
- numpy
 
 
 
 
 
1
  gradio
2
+ gradio_client
3
  requests
4
  tavily-python
5
  smolagents
6
+ smolagents[transformers]
7
+ smolagents[audio]
8
  regex
9
  pandas
10
+ numpy
11
+ wikipedia-api
12
+ duckduckgo_search
13
+ markdownify
14
+ python-dotenv