Final_Assignment_Template

Sleeping

App Files Files Community

IngoTB303 commited on Apr 29

Commit

9e58d03

1 Parent(s): 4046252

Enhance agent functionality: add new agents for audio, visual, and Python tasks; update question fetching to handle attachments; improve tool initialization and requirements.

Browse files

Files changed (6) hide show

.gitignore +2 -0
TODOs.txt +6 -3
agent.py +47 -9
agent_tools.py +44 -69
app-test.py +45 -36
requirements.txt +8 -1

.gitignore CHANGED Viewed

@@ -94,3 +94,5 @@ Desktop.ini
 app_tokens.py
 questions.json
 answers.json

 app_tokens.py
 questions.json
 answers.json
+cache/
+cache/*.*

TODOs.txt CHANGED Viewed

@@ -1,5 +1,8 @@
 Notes:
-- prompt tuning
-- check existing classes for tools: video/image capabilities, audio transcription, etc.
-- checkout different quantized models locally, e. g. Gemini-flash 2.5, Deepseek-R1
 - checkout https://huggingface.co/blog/open-deep-research#using-a-codeagent and https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research

 Notes:
+- checkout different quantized models locally, e. g. Gemini-flash 2.5, Deepseek-R1
+- update agent.run() method, so it attaches the attachments via additional_args or images parameter
+  see https://smolagents.org/docs/tools-of-smolagents-in-depth-guide/ and https://github.com/huggingface/smolagents/blob/main/src/smolagents/agents.py
 - checkout https://huggingface.co/blog/open-deep-research#using-a-codeagent and https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research
+- add more tools:
+  - video q&a: https://huggingface.co/spaces/lixin4ever/VideoLLaMA2,

agent.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import agent_tools
-from smolagents import CodeAgent, FinalAnswerTool, DuckDuckGoSearchTool, AzureOpenAIServerModel
 import app_tokens
 model = AzureOpenAIServerModel(
@@ -13,22 +13,60 @@ class BasicAgent:
     def __init__(self):
         self.web_agent = CodeAgent(
             model=model,
-            tools=[agent_tools.VisitWebpageTool(), FinalAnswerTool(), DuckDuckGoSearchTool()],
             max_steps=8,
             name="web_agent",
-            description="Runs web searches for you."
         )
         self.manager_agent = CodeAgent(
             model=model,
             tools=[],
-            managed_agents=[self.web_agent],
-            additional_authorized_imports=["json","pandas","numpy", "regex"],
-            planning_interval=5,
             verbosity_level=2,
             max_steps=12,
         )
-    def forward(self, question: str) -> str:
-        result = self.manager_agent.run(question)
         return result

 import agent_tools
+from smolagents import CodeAgent, AzureOpenAIServerModel
 import app_tokens
 model = AzureOpenAIServerModel(
     def __init__(self):
         self.web_agent = CodeAgent(
             model=model,
+            tools=[#agent_tools.tavily_search,
+                   #agent_tools.duck_search,
+                   agent_tools.visit_page,
+                   agent_tools.wiki_search,
+                   agent_tools.google_search,
+                   agent_tools.final_answer
+                   ],
             max_steps=8,
             name="web_agent",
+            description="Runs web searches for you. Can use Google, DuckDuckGo and Wikipedia for search.",
+            add_base_tools = True
         )
+        self.audio_agent = CodeAgent(
+            model=model,
+            tools=[agent_tools.speech_to_text_tool, agent_tools.final_answer],
+            max_steps=4,
+            name="audio_agent",
+            description="This agent can help you with converting audio to text.",
+            add_base_tools = True
+        )
+        self.py_agent = CodeAgent(
+            model=model,
+            tools=[agent_tools.do_python, agent_tools.final_answer],
+            additional_authorized_imports=["json","pandas","numpy", "regex"],
+            max_steps=8,
+            name="python_code_agent",
+            description="This agent can help you with executing and validating python code.",
+            add_base_tools = True
+        )
+        self.visual_agent = CodeAgent(
+            model=model,
+            tools=[agent_tools.visual_qa_tool, agent_tools.final_answer],
+            max_steps=4,
+            name="visual_qa_agent",
+            description="This agent can help you with answering questions about pictures.",
+            add_base_tools = True
+        )
         self.manager_agent = CodeAgent(
             model=model,
             tools=[],
+            managed_agents=[self.web_agent, self.audio_agent, self.py_agent, self.visual_agent],
+            planning_interval=8,
             verbosity_level=2,
             max_steps=12,
+            add_base_tools = True
         )
+    def forward(self, question: str, attachment: str = None) -> str:
+        if attachment:
+            result = self.manager_agent.run(question, additional_args={"attachment": attachment})
+        else:
+            result = self.manager_agent.run(question)
         return result

agent_tools.py CHANGED Viewed

@@ -1,71 +1,46 @@
 from smolagents import tool, Tool
 from tavily import TavilyClient
-import app_tokens
-@tool
-def web_search(query: str, proxy: bool = False) -> str:
-    """Searches the web for your query.
-    Args:
-        query: Your query.
-        proxy: An optional boolean parameter, if a local proxy should be used or not.
-    """
-    # tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
-    if proxy:
-        tavily_client = TavilyClient(api_key=app_tokens.get_tavily_api_key(), proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
-    else:
-        tavily_client = TavilyClient(api_key=app_tokens.get_tavily_api_key())
-    response = tavily_client.search(query)
-    return str(response["results"])
-class VisitWebpageTool(Tool):
-    name = "visit_webpage"
-    description = (
-        "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
-    )
-    inputs = {
-        "url": {
-            "type": "string",
-            "description": "The url of the webpage to visit.",
-        },
-        "proxy": {
-            "type": "boolean",
-            "description": "An optional boolean parameter, if a local proxy should be used or not. Should be True, if the request timed out.",
-            "nullable": "True",
-        },
-    }
-    output_type = "string"
-    def forward(self, url: str, proxy: bool = True) -> str:
-        try:
-            import re
-            import requests
-            from markdownify import markdownify
-            from requests.exceptions import RequestException
-            from smolagents.utils import truncate_content
-        except ImportError as e:
-            raise ImportError(
-                "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
-            ) from e
-        try:
-            if proxy:
-                response = requests.get(url, timeout=20, proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
-            else:
-                response = requests.get(url, timeout=20)
-            response.raise_for_status()  # Raise an exception for bad status codes
-            markdown_content = markdownify(response.text).strip()
-            markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
-            return truncate_content(markdown_content, 40000)
-        except requests.exceptions.Timeout:
-            return "The request timed out. Please try again later or check the URL."
-        except RequestException as e:
-            return f"Error fetching the webpage: {str(e)}"
-        except Exception as e:
-            return f"An unexpected error occurred: {str(e)}"
-# test = VisitWebpageTool()
-# print(test.forward("https://www.wikipedia.de", proxy=True))

 from smolagents import tool, Tool
 from tavily import TavilyClient
+from smolagents import DuckDuckGoSearchTool, GoogleSearchTool, VisitWebpageTool, WikipediaSearchTool, PythonInterpreterTool, FinalAnswerTool
+from dotenv import load_dotenv
+import os
+load_dotenv()  # take environment variables
+# init tools
+duck_search = DuckDuckGoSearchTool()
+google_search = GoogleSearchTool()
+visit_page = VisitWebpageTool()
+wiki_search = WikipediaSearchTool()
+do_python = PythonInterpreterTool()
+final_answer = FinalAnswerTool()
+tavily_search = TavilyClient()
+speech_to_text_tool = Tool.from_space("hf-audio/whisper-large-v3-turbo",
+                                      name="speech_to_text_tool",
+                                      description="""Converts audio to text by providing a file or url.
+                                                     Use the with the command speed_to_text_tool(filename).
+                                                     Example: 'speech_to_text_tool("cache\\audio.mp3'""",
+                                      api_name="/predict")
+visual_qa_tool = Tool.from_space("sitammeur/PicQ",
+                                 name="visual_qa_tool",
+                                 description="""Can answer question about a provided image.
+                                                Use it with visual_qa_tool(question=<question>, image=<image filename>).
+                                                Example visual_qa_tool(question='How many items are in the image?', image='cache\\image.png').""",
+                                  api_name="/predict")
+# image_generation_tool = Tool.from_space(
+#     "black-forest-labs/FLUX.1-schnell",
+#     name="image_generator",
+#     description="Generate an image from a prompt"
+# )
+# image = image_generation_tool("A sunny beach")
+# print(image)
+## Testing the tools
+# print(duck_search.forward("smolagents")) # connect error, does not take proxy
+# print(google_search.forward("smolagents"))
+# print(visit_page.forward("https://www.wikipedia.de"))
+# print(wiki_search.forward("Python_(programming_language)"))
+# print(do_python('print("hello world")'))
+# print(tavily_search.search("smolagents"))
+# print(speech_to_text_tool("cache\\1f975693-876d-457b-a649-393859e79bf3.mp3"))
+# print(visual_qa_tool(question="Name all black and white figures with each coordinates, e. g. black king on g8", image="cache\\cca530fc-4052-43b2-b130-b30968d8aa44.png"))

app-test.py CHANGED Viewed

@@ -2,7 +2,7 @@ import requests
 from huggingface_hub import login
 import agent
 import json
-import base64
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 SPACE_ID = "https://huggingface.co/spaces/IngoTB303/Final_Assignment_Template/tree/main"
@@ -13,35 +13,37 @@ submit_url = f"{api_url}/submit"
 agent = agent.BasicAgent()
-def fetch_questions(proxy: bool = True):
     """Fetch questions from the API endpoint."""
     print(f"Fetching questions from: {questions_url}")
     try:
-        if proxy:
-            response = requests.get(questions_url, timeout=30, proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
-        else:
-            response = requests.get(questions_url, timeout=30)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
             return None
         # Fetch attachments for questions with file_name
         for question in questions_data:
             file_name = question.get("file_name", "")
             task_id = question.get("task_id")
             if file_name and task_id:
                 try:
-                    if proxy:
-                        att_response = requests.get(f"{attachments_url}{task_id}", timeout=15, proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
-                    else:
-                        att_response = requests.get(f"{attachments_url}{task_id}", timeout=15)
                     att_response.raise_for_status()
-                    # Encode binary content to base64 string
-                    question["attachment_b64"] = base64.b64encode(att_response.content).decode("utf-8")
                 except Exception as e:
                     print(f"Error fetching attachment for task {task_id}: {e}")
-                    question["attachment_b64"] = None
         return questions_data
     except Exception as e:
         print(f"Error fetching questions: {e}")
@@ -56,15 +58,15 @@ def run_agent(questions_data):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question", "")
-        attachment_b64 = item.get("attachment_b64", "")
-        # Concatenate question and attachment_b64 if present
-        if attachment_b64:
-            question_text = f"{question_text}\n\n[ATTACHMENT:]\n{attachment_b64}"
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent.forward(question=question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
         except Exception as e:
              print(f"Error running agent on task {task_id}: {e}")
@@ -116,24 +118,31 @@ def submit_answers_to_hf(username, agent_code, answers_payload):
         print(status_message)
         return status_message
-# # for use without proxy
-# questions = fetch_questions()
-# for use with proxy, after questions have been fetched
-questions = load_questions("questions.json")
-# # test print the questions to verify, if attachments were loaded
 # for question in questions:
-#     print(question["question"],"\n")
-# # for use with proxy
-# answers = run_agent(questions)
-# # save answers to publish them later without use of proxy
-# if answers:
-#     with open("answers.json", "w", encoding="utf-8") as f:
-#         json.dump(answers, f, ensure_ascii=False, indent=2)
-# submit results to Huggingface
-answers = load_answers("answers.json")
-assignment_results = submit_answers_to_hf("IngoTB303", SPACE_ID, answers)
-print(assignment_results)

 from huggingface_hub import login
 import agent
 import json
+import os
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 SPACE_ID = "https://huggingface.co/spaces/IngoTB303/Final_Assignment_Template/tree/main"
 agent = agent.BasicAgent()
+def fetch_questions():
     """Fetch questions from the API endpoint."""
     print(f"Fetching questions from: {questions_url}")
+    questions_data = None
     try:
+        response = requests.get(questions_url, timeout=30)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
             return None
+        # Ensure cache directory exists
+        os.makedirs("cache", exist_ok=True)
         # Fetch attachments for questions with file_name
         for question in questions_data:
             file_name = question.get("file_name", "")
             task_id = question.get("task_id")
             if file_name and task_id:
                 try:
+                    att_response = requests.get(f"{attachments_url}{task_id}", timeout=15)
                     att_response.raise_for_status()
+                    # Save binary content to file in cache folder
+                    file_path = os.path.join("cache", file_name)
+                    with open(file_path, "wb") as f:
+                        f.write(att_response.content)
+                    # Store the local file path in the question dict with double backslashes
+                    question["attachment_file"] = file_path
                 except Exception as e:
                     print(f"Error fetching attachment for task {task_id}: {e}")
+                    question["attachment_file"] = None
+            else:
+                question["attachment_file"] = ""
         return questions_data
     except Exception as e:
         print(f"Error fetching questions: {e}")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question", "")
+        attachment_file = item.get("attachment_file", None)
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            if attachment_file not in (None, ""):
+                submitted_answer = agent.forward(question=question_text, attachment=attachment_file)
+            else:
+                submitted_answer = agent.forward(question=question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
         except Exception as e:
              print(f"Error running agent on task {task_id}: {e}")
         print(status_message)
         return status_message
+# --- fetch questions from server ---
+questions = fetch_questions()
+# --- load cached questions ---
+# questions = load_questions("questions.json")
+# test print the questions to verify, if attachments were loaded
 # for question in questions:
+#     print(question["question"])
+#     print(question["attachment_file"])
+# --- generate answers ---
+answers = run_agent(questions)
+# save answers to publish them later
+if answers:
+    with open("answers.json", "w", encoding="utf-8") as f:
+        json.dump(answers, f, ensure_ascii=False, indent=2)
+# --- submit results to Huggingface ---
+# answers = load_answers("answers.json"))
+# assignment_results = submit_answers_to_hf("IngoTB303", SPACE_ID, answers)
+# print(assignment_results)
+# # Find the question with the specified task_id
+# question_text = next((q["question"] for q in questions if q.get("task_id") == "7bd855d8-463d-4ed5-93ca-5fe35145f733"), None)
+# attachment_file = next((q["attachment_file"] for q in questions if q.get("task_id") == "7bd855d8-463d-4ed5-93ca-5fe35145f733"), None)
+# print(question_text)
+# print(attachment_file)
+# print(agent.forward(question=question_text, attachment=attachment_file))

requirements.txt CHANGED Viewed

@@ -1,7 +1,14 @@
 gradio
 requests
 tavily-python
 smolagents
 regex
 pandas
-numpy

 gradio
+gradio_client
 requests
 tavily-python
 smolagents
+smolagents[transformers]
+smolagents[audio]
 regex
 pandas
+numpy
+wikipedia-api
+duckduckgo_search
+markdownify
+python-dotenv