IngoTB303 commited on
Commit
1aa16a4
·
1 Parent(s): 81917a3

Refactor app structure: add agent and tools, enhance question fetching with attachments, and update requirements

Browse files
Files changed (7) hide show
  1. .gitignore +96 -0
  2. TODOs.txt +6 -0
  3. agent.py +34 -0
  4. agent_tools.py +71 -0
  5. app-test.py +139 -0
  6. app.py +33 -12
  7. requirements.txt +6 -1
.gitignore ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ *.egg-info/
23
+ .installed.cfg
24
+ *.egg
25
+
26
+ # Installer logs
27
+ pip-log.txt
28
+ pip-delete-this-directory.txt
29
+
30
+ # Unit test / coverage reports
31
+ htmlcov/
32
+ .tox/
33
+ .nox/
34
+ .coverage
35
+ .coverage.*
36
+ .cache
37
+ nosetests.xml
38
+ coverage.xml
39
+ *.cover
40
+ .hypothesis/
41
+ .pytest_cache/
42
+
43
+ # Jupyter Notebook
44
+ .ipynb_checkpoints
45
+
46
+ # pyenv
47
+ .python-version
48
+
49
+ # mypy
50
+ .mypy_cache/
51
+ .dmypy.json
52
+
53
+ # Pyre type checker
54
+ .pyre/
55
+
56
+ # VS Code
57
+ .vscode/
58
+
59
+ # Environments
60
+ .env
61
+ .venv
62
+ env/
63
+ venv/
64
+ ENV/
65
+ env.bak/
66
+ venv.bak/
67
+
68
+ # IDEs
69
+ .idea/
70
+ *.sublime-workspace
71
+ *.sublime-project
72
+
73
+ # Mac
74
+ .DS_Store
75
+
76
+ # Windows
77
+ Thumbs.db
78
+ Desktop.ini
79
+
80
+ # Logs
81
+ *.log
82
+
83
+ # Local config
84
+ *.local
85
+
86
+ # Secret files
87
+ *.secret
88
+ *.key
89
+ *.pem
90
+
91
+ # Azure
92
+ .azure/
93
+
94
+ app_tokens.py
95
+ questions.json
96
+ answers.json
TODOs.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Notes:
2
+ - prompt tuning
3
+ - check existing agents
4
+ - check existing classes for tools: video/image capabilities, audio transcription, etc.
5
+ - checkout different models
6
+ - checkout https://huggingface.co/blog/open-deep-research#using-a-codeagent and https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research
agent.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import agent_tools
2
+ from smolagents import CodeAgent, FinalAnswerTool, DuckDuckGoSearchTool, AzureOpenAIServerModel
3
+ import app_tokens
4
+
5
+ model = AzureOpenAIServerModel(
6
+ model_id = app_tokens.AZURE_OPENAI_MODEL,
7
+ azure_endpoint = app_tokens.AZURE_OPENAI_ENDPOINT,
8
+ api_key = app_tokens.AZURE_OPENAI_API_KEY,
9
+ api_version = app_tokens.OPENAI_API_VERSION
10
+ )
11
+
12
+ class BasicAgent:
13
+ def __init__(self):
14
+ self.web_agent = CodeAgent(
15
+ model=model,
16
+ tools=[agent_tools.VisitWebpageTool(), FinalAnswerTool(), DuckDuckGoSearchTool()],
17
+ max_steps=8,
18
+ name="web_agent",
19
+ description="Runs web searches for you."
20
+ )
21
+
22
+ self.manager_agent = CodeAgent(
23
+ model=model,
24
+ tools=[],
25
+ managed_agents=[self.web_agent],
26
+ additional_authorized_imports=["json","pandas","numpy", "regex"],
27
+ planning_interval=5,
28
+ verbosity_level=2,
29
+ max_steps=12,
30
+ )
31
+
32
+ def forward(self, question: str) -> str:
33
+ result = self.manager_agent.run(question)
34
+ return result
agent_tools.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import tool, Tool
2
+ from tavily import TavilyClient
3
+ import app_tokens
4
+
5
+ @tool
6
+ def web_search(query: str, proxy: bool = False) -> str:
7
+ """Searches the web for your query.
8
+
9
+ Args:
10
+ query: Your query.
11
+ proxy: An optional boolean parameter, if a local proxy should be used or not.
12
+
13
+ """
14
+ # tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
15
+ if proxy:
16
+ tavily_client = TavilyClient(api_key=app_tokens.get_tavily_api_key(), proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
17
+ else:
18
+ tavily_client = TavilyClient(api_key=app_tokens.get_tavily_api_key())
19
+ response = tavily_client.search(query)
20
+ return str(response["results"])
21
+
22
+ class VisitWebpageTool(Tool):
23
+ name = "visit_webpage"
24
+ description = (
25
+ "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
26
+ )
27
+ inputs = {
28
+ "url": {
29
+ "type": "string",
30
+ "description": "The url of the webpage to visit.",
31
+ },
32
+ "proxy": {
33
+ "type": "boolean",
34
+ "description": "An optional boolean parameter, if a local proxy should be used or not. Should be True, if the request timed out.",
35
+ "nullable": "True",
36
+ },
37
+ }
38
+ output_type = "string"
39
+
40
+ def forward(self, url: str, proxy: bool = True) -> str:
41
+ try:
42
+ import re
43
+
44
+ import requests
45
+ from markdownify import markdownify
46
+ from requests.exceptions import RequestException
47
+
48
+ from smolagents.utils import truncate_content
49
+ except ImportError as e:
50
+ raise ImportError(
51
+ "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
52
+ ) from e
53
+ try:
54
+ if proxy:
55
+ response = requests.get(url, timeout=20, proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
56
+ else:
57
+ response = requests.get(url, timeout=20)
58
+ response.raise_for_status() # Raise an exception for bad status codes
59
+ markdown_content = markdownify(response.text).strip()
60
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
61
+ return truncate_content(markdown_content, 40000)
62
+
63
+ except requests.exceptions.Timeout:
64
+ return "The request timed out. Please try again later or check the URL."
65
+ except RequestException as e:
66
+ return f"Error fetching the webpage: {str(e)}"
67
+ except Exception as e:
68
+ return f"An unexpected error occurred: {str(e)}"
69
+
70
+ # test = VisitWebpageTool()
71
+ # print(test.forward("https://www.wikipedia.de", proxy=True))
app-test.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from huggingface_hub import login
3
+ import agent
4
+ import json
5
+ import base64
6
+
7
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
8
+ SPACE_ID = "https://huggingface.co/spaces/IngoTB303/Final_Assignment_Template/tree/main"
9
+ api_url = DEFAULT_API_URL
10
+ questions_url = f"{api_url}/questions"
11
+ attachments_url = f"{api_url}/files/"
12
+ submit_url = f"{api_url}/submit"
13
+
14
+ agent = agent.BasicAgent()
15
+
16
+ def fetch_questions(proxy: bool = True):
17
+ """Fetch questions from the API endpoint."""
18
+ print(f"Fetching questions from: {questions_url}")
19
+ try:
20
+ if proxy:
21
+ response = requests.get(questions_url, timeout=30, proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
22
+ else:
23
+ response = requests.get(questions_url, timeout=30)
24
+ response.raise_for_status()
25
+ questions_data = response.json()
26
+ if not questions_data:
27
+ return None
28
+
29
+ # Fetch attachments for questions with file_name
30
+ for question in questions_data:
31
+ file_name = question.get("file_name", "")
32
+ task_id = question.get("task_id")
33
+ if file_name and task_id:
34
+ try:
35
+ if proxy:
36
+ att_response = requests.get(f"{attachments_url}{task_id}", timeout=15, proxies={'http': 'http://localhost:3128', 'https': 'http://localhost:3128'}, verify=False)
37
+ else:
38
+ att_response = requests.get(f"{attachments_url}{task_id}", timeout=15)
39
+ att_response.raise_for_status()
40
+ # Encode binary content to base64 string
41
+ question["attachment_b64"] = base64.b64encode(att_response.content).decode("utf-8")
42
+ except Exception as e:
43
+ print(f"Error fetching attachment for task {task_id}: {e}")
44
+ question["attachment_b64"] = None
45
+ return questions_data
46
+ except Exception as e:
47
+ print(f"Error fetching questions: {e}")
48
+ finally:
49
+ if questions_data:
50
+ with open("questions.json", "w", encoding="utf-8") as f:
51
+ json.dump(questions_data, f, ensure_ascii=False, indent=2)
52
+
53
+ def run_agent(questions_data):
54
+ answers_payload = []
55
+ print(f"Running agent on {len(questions_data)} questions...")
56
+ for item in questions_data:
57
+ task_id = item.get("task_id")
58
+ question_text = item.get("question", "")
59
+ attachment_b64 = item.get("attachment_b64", "")
60
+ # Concatenate question and attachment_b64 if present
61
+ if attachment_b64:
62
+ question_text = f"{question_text}\n\n[ATTACHMENT:]\n{attachment_b64}"
63
+ if not task_id or question_text is None:
64
+ print(f"Skipping item with missing task_id or question: {item}")
65
+ continue
66
+ try:
67
+ submitted_answer = agent.forward(question=question_text)
68
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
69
+ except Exception as e:
70
+ print(f"Error running agent on task {task_id}: {e}")
71
+
72
+ return answers_payload
73
+
74
+ def load_questions(filename):
75
+ """Load questions from a local JSON file and return as questions_data."""
76
+ try:
77
+ with open(filename, "r", encoding="utf-8") as f:
78
+ questions_data = json.load(f)
79
+ return questions_data
80
+ except Exception as e:
81
+ print(f"Error loading questions from {filename}: {e}")
82
+ return None
83
+
84
+ def load_answers(filename):
85
+ """Load answers from a local JSON file."""
86
+ try:
87
+ with open(filename, "r", encoding="utf-8") as f:
88
+ answers = json.load(f)
89
+ return answers
90
+ except Exception as e:
91
+ print(f"Error loading answers from {filename}: {e}")
92
+ return None
93
+
94
+ def submit_answers_to_hf(username, agent_code, answers_payload):
95
+ # Prepare Submission
96
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
97
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
98
+ print(status_update)
99
+ # Submit
100
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
+ try:
102
+ response = requests.post(submit_url, json=submission_data, timeout=60)
103
+ response.raise_for_status()
104
+ result_data = response.json()
105
+ final_status = (
106
+ f"Submission Successful!\n"
107
+ f"User: {result_data.get('username')}\n"
108
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
109
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
+ f"Message: {result_data.get('message', 'No message received.')}"
111
+ )
112
+ print("Submission successful.")
113
+ return final_status
114
+ except Exception as e:
115
+ status_message = f"An unexpected error occurred during submission: {e}"
116
+ print(status_message)
117
+ return status_message
118
+
119
+ # # for use without proxy
120
+ # questions = fetch_questions()
121
+
122
+ # for use with proxy, after questions have been fetched
123
+ questions = load_questions("questions.json")
124
+
125
+ # # test print the questions to verify, if attachments were loaded
126
+ # for question in questions:
127
+ # print(question["question"],"\n")
128
+
129
+ # # for use with proxy
130
+ # answers = run_agent(questions)
131
+ # # save answers to publish them later without use of proxy
132
+ # if answers:
133
+ # with open("answers.json", "w", encoding="utf-8") as f:
134
+ # json.dump(answers, f, ensure_ascii=False, indent=2)
135
+
136
+ # submit results to Huggingface
137
+ answers = load_answers("answers.json")
138
+ assignment_results = submit_answers_to_hf("IngoTB303", SPACE_ID, answers)
139
+ print(assignment_results)
app.py CHANGED
@@ -3,21 +3,25 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
@@ -36,11 +40,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
36
 
37
  api_url = DEFAULT_API_URL
38
  questions_url = f"{api_url}/questions"
 
39
  submit_url = f"{api_url}/submit"
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
@@ -58,6 +63,18 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
58
  print("Fetched questions list is empty.")
59
  return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
 
 
 
 
 
 
 
 
 
 
 
 
61
  except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
@@ -75,7 +92,11 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
75
  print(f"Running agent on {len(questions_data)} questions...")
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
- question_text = item.get("question")
 
 
 
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from smolagents import CodeAgent, HfApiModel, FinalAnswerTool, DuckDuckGoSearchTool, AzureOpenAIServerModel
7
+ from huggingface_hub import login
8
+
9
+ import app_tokens
10
+
11
+ login(os.getenv('HF_API_KEY'))
12
+
13
+
14
 
15
  # (Keep Constants as is)
16
  # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
+ model = AzureOpenAIServerModel(
20
+ model_id = app_tokens.AZURE_OPENAI_MODEL,
21
+ azure_endpoint = app_tokens.AZURE_OPENAI_ENDPOINT,
22
+ api_key = app_tokens.AZURE_OPENAI_API_KEY,
23
+ api_version = app_tokens.OPENAI_API_VERSION
24
+ )
 
 
 
 
25
 
26
  def run_and_submit_all( profile: gr.OAuthProfile | None):
27
  """
 
40
 
41
  api_url = DEFAULT_API_URL
42
  questions_url = f"{api_url}/questions"
43
+ attachments_url = f"{api_url}/files/"
44
  submit_url = f"{api_url}/submit"
45
 
46
  # 1. Instantiate Agent ( modify this part to create your agent)
47
  try:
48
+ agent = agent.BasicAgent()
49
  except Exception as e:
50
  print(f"Error instantiating agent: {e}")
51
  return f"Error initializing agent: {e}", None
 
63
  print("Fetched questions list is empty.")
64
  return "Fetched questions list is empty or invalid format.", None
65
  print(f"Fetched {len(questions_data)} questions.")
66
+ # Fetch attachments for questions with file_name
67
+ for q in questions_data:
68
+ file_name = q.get("file_name", "")
69
+ task_id = q.get("task_id")
70
+ if file_name and task_id:
71
+ try:
72
+ att_response = requests.get(f"{attachments_url}{task_id}", timeout=15)
73
+ att_response.raise_for_status()
74
+ q["attachment_b64"] = att_response.text
75
+ except Exception as e:
76
+ print(f"Error fetching attachment for task {task_id}: {e}")
77
+ q["attachment_b64"] = None
78
  except requests.exceptions.RequestException as e:
79
  print(f"Error fetching questions: {e}")
80
  return f"Error fetching questions: {e}", None
 
92
  print(f"Running agent on {len(questions_data)} questions...")
93
  for item in questions_data:
94
  task_id = item.get("task_id")
95
+ question_text = item.get("question", "")
96
+ attachment_b64 = item.get("attachment_b64", "")
97
+ # Concatenate question and attachment_b64 if present
98
+ if attachment_b64:
99
+ question_text = f"{question_text}\n\n[ATTACHMENT:]\n{attachment_b64}"
100
  if not task_id or question_text is None:
101
  print(f"Skipping item with missing task_id or question: {item}")
102
  continue
requirements.txt CHANGED
@@ -1,2 +1,7 @@
1
  gradio
2
- requests
 
 
 
 
 
 
1
  gradio
2
+ requests
3
+ tavily-python
4
+ smolagents
5
+ regex
6
+ pandas
7
+ numpy