dlaima commited on
Commit
2d05710
·
verified ·
1 Parent(s): 4cb05ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -53
app.py CHANGED
@@ -6,7 +6,7 @@ import pandas as pd
6
  from smolagents import CodeAgent, DuckDuckGoSearchTool
7
  from smolagents.models import OpenAIServerModel
8
 
9
- # System prompt as per your instructions
10
  SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question.
11
  Report your thoughts, and finish your answer with the following template:
12
  FINAL ANSWER: [YOUR FINAL ANSWER].
@@ -15,38 +15,21 @@ of numbers and/or strings. If you are asked for a number, don't use comma to wri
15
 
16
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
 
18
- # Custom model wrapper that injects system message
19
- #class PatchedOpenAIServerModel(OpenAIServerModel):
20
- # def generate(self, messages, *args, **kwargs):
21
- # messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages
22
- # return super().generate(messages, *args, **kwargs)
23
-
24
  class PatchedOpenAIServerModel(OpenAIServerModel):
25
- def generate(self, messages, *args, **kwargs):
26
- # Handle string inputs (raw prompts)
27
- if isinstance(messages, str):
28
- messages = [
29
- {"role": "system", "content": SYSTEM_PROMPT},
30
- {"role": "user", "content": messages}
31
- ]
32
- elif isinstance(messages, list):
33
- # Assume already in proper format
34
- messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages
35
  else:
36
- raise TypeError("Expected messages to be a string or list of dicts")
37
-
38
- return super().generate(messages, *args, **kwargs)
39
 
 
40
 
41
  class MyAgent:
42
  def __init__(self):
43
- self.model = PatchedOpenAIServerModel(
44
- model_id="gpt-4" # system_message removed; injected manually
45
- )
46
- self.agent = CodeAgent(
47
- tools=[DuckDuckGoSearchTool()],
48
- model=self.model
49
- )
50
 
51
  def __call__(self, question: str) -> str:
52
  return self.agent.run(question)
@@ -80,11 +63,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
80
  response.raise_for_status()
81
  questions_data = response.json()
82
  if not questions_data:
83
- print("Fetched questions list is empty.")
84
  return "Fetched questions list is empty or invalid format.", None
85
  print(f"Fetched {len(questions_data)} questions.")
86
  except Exception as e:
87
- print(f"Error fetching questions: {e}")
88
  return f"Error fetching questions: {e}", None
89
 
90
  results_log = []
@@ -94,18 +75,16 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
94
  task_id = item.get("task_id")
95
  question_text = item.get("question")
96
  if not task_id or question_text is None:
97
- print(f"Skipping invalid item: {item}")
98
  continue
99
  try:
100
  submitted_answer = agent(question_text)
101
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
102
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
103
  except Exception as e:
104
- print(f"Error running agent on task {task_id}: {e}")
105
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
106
 
107
  if not answers_payload:
108
- print("Agent did not produce any answers to submit.")
109
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
110
 
111
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
@@ -121,40 +100,29 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
121
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
122
  f"Message: {result_data.get('message', 'No message received.')}"
123
  )
124
- print("Submission successful.")
125
  results_df = pd.DataFrame(results_log)
126
  return final_status, results_df
127
  except requests.exceptions.HTTPError as e:
128
- error_detail = f"Server responded with status {e.response.status_code}."
129
  try:
130
- error_json = e.response.json()
131
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
132
  except Exception:
133
- error_detail += f" Response: {e.response.text[:500]}"
134
- status_message = f"Submission Failed: {error_detail}"
135
- print(status_message)
136
- return status_message, pd.DataFrame(results_log)
137
  except requests.exceptions.Timeout:
138
- status_message = "Submission Failed: The request timed out."
139
- print(status_message)
140
- return status_message, pd.DataFrame(results_log)
141
  except Exception as e:
142
- status_message = f"An unexpected error occurred during submission: {e}"
143
- print(status_message)
144
- return status_message, pd.DataFrame(results_log)
145
 
 
146
  with gr.Blocks() as demo:
147
  gr.Markdown("# Basic Agent Evaluation Runner")
148
- gr.Markdown(
149
- """
150
  **Instructions:**
151
  1. Clone this space, modify code to define your agent's logic, tools, and packages.
152
  2. Log in to your Hugging Face account using the button below.
153
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see your score.
154
-
155
  **Note:** Submitting can take some time.
156
- """
157
- )
158
 
159
  gr.LoginButton()
160
  run_button = gr.Button("Run Evaluation & Submit All Answers")
@@ -183,6 +151,5 @@ if __name__ == "__main__":
183
  print("ℹ️ SPACE_ID environment variable not found (running locally?).")
184
 
185
  print("-"*(60 + len(" App Starting ")) + "\n")
186
-
187
  print("Launching Gradio Interface for Basic Agent Evaluation...")
188
  demo.launch(debug=True, share=False)
 
6
  from smolagents import CodeAgent, DuckDuckGoSearchTool
7
  from smolagents.models import OpenAIServerModel
8
 
9
+ # Define the system prompt
10
  SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question.
11
  Report your thoughts, and finish your answer with the following template:
12
  FINAL ANSWER: [YOUR FINAL ANSWER].
 
15
 
16
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
 
18
+ # Patched model to prepend system prompt correctly
 
 
 
 
 
19
  class PatchedOpenAIServerModel(OpenAIServerModel):
20
+ def generate(self, messages, stop_sequences=None, **kwargs):
21
+ if isinstance(messages, list):
22
+ if not any(m["role"] == "system" for m in messages):
23
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages
 
 
 
 
 
 
24
  else:
25
+ raise TypeError("Expected 'messages' to be a list of message dicts")
 
 
26
 
27
+ return super().generate(messages=messages, stop_sequences=stop_sequences, **kwargs)
28
 
29
  class MyAgent:
30
  def __init__(self):
31
+ self.model = PatchedOpenAIServerModel(model_id="gpt-4")
32
+ self.agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=self.model)
 
 
 
 
 
33
 
34
  def __call__(self, question: str) -> str:
35
  return self.agent.run(question)
 
63
  response.raise_for_status()
64
  questions_data = response.json()
65
  if not questions_data:
 
66
  return "Fetched questions list is empty or invalid format.", None
67
  print(f"Fetched {len(questions_data)} questions.")
68
  except Exception as e:
 
69
  return f"Error fetching questions: {e}", None
70
 
71
  results_log = []
 
75
  task_id = item.get("task_id")
76
  question_text = item.get("question")
77
  if not task_id or question_text is None:
 
78
  continue
79
  try:
80
  submitted_answer = agent(question_text)
81
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
82
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
83
  except Exception as e:
84
+ error_msg = f"AGENT ERROR: {e}"
85
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": error_msg})
86
 
87
  if not answers_payload:
 
88
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
89
 
90
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
100
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
101
  f"Message: {result_data.get('message', 'No message received.')}"
102
  )
 
103
  results_df = pd.DataFrame(results_log)
104
  return final_status, results_df
105
  except requests.exceptions.HTTPError as e:
 
106
  try:
107
+ detail = e.response.json().get("detail", e.response.text)
 
108
  except Exception:
109
+ detail = e.response.text[:500]
110
+ return f"Submission Failed: {detail}", pd.DataFrame(results_log)
 
 
111
  except requests.exceptions.Timeout:
112
+ return "Submission Failed: The request timed out.", pd.DataFrame(results_log)
 
 
113
  except Exception as e:
114
+ return f"An unexpected error occurred during submission: {e}", pd.DataFrame(results_log)
 
 
115
 
116
+ # Gradio UI setup
117
  with gr.Blocks() as demo:
118
  gr.Markdown("# Basic Agent Evaluation Runner")
119
+ gr.Markdown("""
 
120
  **Instructions:**
121
  1. Clone this space, modify code to define your agent's logic, tools, and packages.
122
  2. Log in to your Hugging Face account using the button below.
123
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see your score.
 
124
  **Note:** Submitting can take some time.
125
+ """)
 
126
 
127
  gr.LoginButton()
128
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
151
  print("ℹ️ SPACE_ID environment variable not found (running locally?).")
152
 
153
  print("-"*(60 + len(" App Starting ")) + "\n")
 
154
  print("Launching Gradio Interface for Basic Agent Evaluation...")
155
  demo.launch(debug=True, share=False)