dawid-lorek commited on
Commit
c8bf6ed
·
verified ·
1 Parent(s): b61460e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -103
app.py CHANGED
@@ -4,14 +4,13 @@ import gradio as gr
4
  import requests
5
  import pandas as pd
6
 
7
- from smolagents import CodeAgent, OpenAIServerModel
8
- from smolagents.tools import DuckDuckGoSearchTool
9
 
10
  # Constants
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
  MAX_QUESTION_LENGTH = 4000
13
 
14
- # --- Reliable DuckDuckGo Tool using smolagents ---
15
  class ReliableDuckDuckGoTool(DuckDuckGoSearchTool):
16
  def run(self, query: str) -> str:
17
  for attempt in range(3):
@@ -19,136 +18,79 @@ class ReliableDuckDuckGoTool(DuckDuckGoSearchTool):
19
  return super().run(query)
20
  except Exception as e:
21
  if "rate" in str(e).lower():
22
- print(f"[DuckDuckGo] Rate limit hit. Retrying ({attempt + 1}/3)...")
23
  time.sleep(2 * (attempt + 1))
24
  else:
25
- raise e
26
  raise RuntimeError("DuckDuckGo search failed after retries")
27
 
28
- # --- Main Agent ---
29
  class SmartGAIAAgent:
30
  def __init__(self):
31
- self.api_key = os.getenv("OPENAI_API_KEY")
32
- if not self.api_key:
33
  raise ValueError("Missing OPENAI_API_KEY")
34
- self.model = OpenAIServerModel(model_id="gpt-4", api_key=self.api_key)
35
-
36
  self.agent = CodeAgent(
37
  tools=[ReliableDuckDuckGoTool()],
38
- model=self.model,
39
  add_base_tools=True
40
  )
41
 
42
- def truncate_question(self, question: str) -> str:
43
- return question[:MAX_QUESTION_LENGTH]
44
-
45
  def __call__(self, question: str) -> str:
 
46
  try:
47
- question = self.truncate_question(question)
48
- return self.agent.run(question).strip()
49
  except Exception as e:
50
- print(f"Agent error: {e}")
51
  return "error"
52
 
53
- # --- Evaluation and Submission Logic ---
54
  def run_and_submit_all(profile: gr.OAuthProfile | None):
55
- space_id = os.getenv("SPACE_ID")
56
- if profile:
57
- username = f"{profile.username}"
58
- print(f"User logged in: {username}")
59
- else:
60
- return "Please Login to Hugging Face with the button.", None
61
-
62
- questions_url = f"{DEFAULT_API_URL}/questions"
63
- submit_url = f"{DEFAULT_API_URL}/submit"
64
 
 
65
  try:
66
  agent = SmartGAIAAgent()
67
  except Exception as e:
68
  return f"Error initializing agent: {e}", None
69
 
70
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
71
-
72
- try:
73
- response = requests.get(questions_url, timeout=15)
74
- response.raise_for_status()
75
- questions_data = response.json()
76
- except Exception as e:
77
- return f"Error fetching questions: {e}", None
78
-
79
- answers_payload = []
80
- results_log = []
81
-
82
- for item in questions_data:
83
- task_id = item.get("task_id")
84
- question_text = item.get("question", "")
85
-
86
- if not task_id or not question_text:
87
- continue
88
- if len(question_text) > MAX_QUESTION_LENGTH:
89
- print(f"Skipping long question: {task_id}")
90
- continue
91
- if any(keyword in question_text.lower() for keyword in [
92
- 'attached', '.mp3', '.wav', '.png', '.jpg', '.jpeg',
93
- 'youtube', '.mp4', 'video', 'listen', 'watch'
94
- ]):
95
- print(f"Skipping unsupported question: {task_id}")
96
  continue
 
 
 
97
 
98
- try:
99
- submitted_answer = agent(question_text)
100
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
101
- results_log.append({
102
- "Task ID": task_id,
103
- "Question": question_text,
104
- "Submitted Answer": submitted_answer
105
- })
106
- except Exception as e:
107
- results_log.append({
108
- "Task ID": task_id,
109
- "Question": question_text,
110
- "Submitted Answer": f"ERROR: {e}"
111
- })
112
 
113
- if not answers_payload:
114
- return "No answers were submitted.", pd.DataFrame(results_log)
 
 
 
 
115
 
116
- submission_data = {
117
- "username": username,
118
- "agent_code": agent_code,
119
- "answers": answers_payload
120
- }
121
-
122
- try:
123
- response = requests.post(submit_url, json=submission_data, timeout=60)
124
- response.raise_for_status()
125
- result_data = response.json()
126
- final_status = (
127
- f"Submission Successful!\n"
128
- f"User: {result_data.get('username')}\n"
129
- f"Score: {result_data.get('score')}% "
130
- f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})\n"
131
- f"Message: {result_data.get('message')}"
132
- )
133
- return final_status, pd.DataFrame(results_log)
134
- except Exception as e:
135
- return f"Submission failed: {e}", pd.DataFrame(results_log)
136
-
137
- # --- Gradio UI ---
138
  with gr.Blocks() as demo:
139
- gr.Markdown("# 🧠 GAIA Agent Evaluation")
140
- gr.Markdown("""
141
- 1. Log in to Hugging Face
142
- 2. Click 'Run Evaluation & Submit All Answers'
143
- 3. View your score on the leaderboard
144
- """)
145
  gr.LoginButton()
146
- run_button = gr.Button("Run Evaluation & Submit All Answers")
147
- status_output = gr.Textbox(label="Submission Status", lines=5)
148
- results_table = gr.DataFrame(label="Evaluation Results")
149
-
150
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
151
 
152
  if __name__ == "__main__":
153
- print("Launching Gradio Interface...")
154
  demo.launch(debug=True, share=False)
 
4
  import requests
5
  import pandas as pd
6
 
7
+ from smolagents import CodeAgent, OpenAIServerModel, DuckDuckGoSearchTool
 
8
 
9
  # Constants
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
  MAX_QUESTION_LENGTH = 4000
12
 
13
+ # Reliable search tool with retry
14
  class ReliableDuckDuckGoTool(DuckDuckGoSearchTool):
15
  def run(self, query: str) -> str:
16
  for attempt in range(3):
 
18
  return super().run(query)
19
  except Exception as e:
20
  if "rate" in str(e).lower():
21
+ print(f"[DuckDuckGo] Rate limit, retry {attempt+1}/3")
22
  time.sleep(2 * (attempt + 1))
23
  else:
24
+ raise
25
  raise RuntimeError("DuckDuckGo search failed after retries")
26
 
27
+ # Main agent using GPT-4 & tools
28
  class SmartGAIAAgent:
29
  def __init__(self):
30
+ key = os.getenv("OPENAI_API_KEY")
31
+ if not key:
32
  raise ValueError("Missing OPENAI_API_KEY")
33
+ model = OpenAIServerModel(model_id="gpt-4", api_key=key)
 
34
  self.agent = CodeAgent(
35
  tools=[ReliableDuckDuckGoTool()],
36
+ model=model,
37
  add_base_tools=True
38
  )
39
 
 
 
 
40
  def __call__(self, question: str) -> str:
41
+ q = question[:MAX_QUESTION_LENGTH]
42
  try:
43
+ return self.agent.run(q).strip()
 
44
  except Exception as e:
45
+ print("Agent error:", e)
46
  return "error"
47
 
48
+ # Fetch, filter, run, and submit answers
49
  def run_and_submit_all(profile: gr.OAuthProfile | None):
50
+ username = profile.username if profile else None
51
+ if not username:
52
+ return "Please Login to Hugging Face", None
 
 
 
 
 
 
53
 
54
+ # instantiate agent
55
  try:
56
  agent = SmartGAIAAgent()
57
  except Exception as e:
58
  return f"Error initializing agent: {e}", None
59
 
60
+ # get questions
61
+ resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
62
+ resp.raise_for_status()
63
+ questions = resp.json()
64
+
65
+ payload, logs = [], []
66
+ skip_kw = ['.mp3','.wav','.png','.jpg','youtube','video','watch','listen']
67
+ for item in questions:
68
+ tid = item.get("task_id")
69
+ q = item.get("question","")
70
+ if not tid or not q or len(q)>MAX_QUESTION_LENGTH or any(k in q.lower() for k in skip_kw):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  continue
72
+ ans = agent(q)
73
+ payload.append({"task_id": tid, "submitted_answer": ans})
74
+ logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
75
 
76
+ if not payload:
77
+ return "No valid questions to submit.", pd.DataFrame(logs)
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ sub = {"username": username, "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main", "answers": payload}
80
+ resp = requests.post(f"{DEFAULT_API_URL}/submit", json=sub, timeout=60)
81
+ resp.raise_for_status()
82
+ result = resp.json()
83
+ status = f"Score: {result.get('score')}% ({result.get('correct_count')}/{result.get('total_attempted')})"
84
+ return status, pd.DataFrame(logs)
85
 
86
+ # Gradio UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  with gr.Blocks() as demo:
88
+ gr.Markdown("# GAIA Agent")
 
 
 
 
 
89
  gr.LoginButton()
90
+ run_btn = gr.Button("Run & Submit")
91
+ status = gr.Textbox(lines=5)
92
+ table = gr.DataFrame()
93
+ run_btn.click(run_and_submit_all, outputs=[status, table])
 
94
 
95
  if __name__ == "__main__":
 
96
  demo.launch(debug=True, share=False)