dawid-lorek commited on
Commit
27383b9
·
verified ·
1 Parent(s): 2a7c7e6

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +114 -15
agent.py CHANGED
@@ -1,3 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  from openai import OpenAI
3
 
@@ -6,40 +80,65 @@ class GaiaAgent:
6
  self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
7
  self.instructions = (
8
  "You are a research assistant solving GAIA benchmark questions using 2022 English Wikipedia knowledge.\n"
9
- "For each question, reason step-by-step and only return the final answer in exact format."
10
  )
11
  self.task_templates = {
12
- "8e867cd7-cff9-4e6c-867a-ff5ddc2550be": self.q_mercedes_sosa_albums
 
 
13
  }
14
 
15
  def __call__(self, question: str, task_id: str = None) -> str:
16
  if task_id in self.task_templates:
17
- return self.task_templates[task_id](question)
 
18
  else:
19
- return "[SKIPPED: Task not yet implemented in Agent V9]"
20
 
21
  def q_mercedes_sosa_albums(self, question: str) -> str:
22
  prompt = (
23
- "You are a research assistant using 2022 English Wikipedia knowledge.\n"
24
- "\nQUESTION:\n"
25
- "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)?\n"
26
- "\nFollow this step-by-step reasoning chain:\n"
27
- "Step 1: Identify all studio albums by Mercedes Sosa.\n"
28
- "Step 2: Filter the albums published between 2000 and 2009 inclusive.\n"
29
  "Step 3: Count them.\n"
30
- "Step 4: Return only the count as a number (no text, no explanation).\n"
31
- "\nANSWER:"
32
  )
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  try:
35
  response = self.client.chat.completions.create(
36
  model="gpt-4-turbo",
37
  messages=[
38
  {"role": "system", "content": self.instructions},
39
- {"role": "user", "content": prompt}
40
  ],
41
  temperature=0.0
42
  )
43
- return response.choices[0].message.content.strip()
44
  except Exception as e:
45
- return f"[AGENT ERROR: {e}]"
 
1
+ Agent V9
2
+ 1
3
+ 2
4
+ 3
5
+ 4
6
+ 5
7
+ 6
8
+ 7
9
+ 8
10
+ 9
11
+ 10
12
+ 11
13
+ 12
14
+ 13
15
+ 14
16
+ 15
17
+ 16
18
+ 17
19
+ 18
20
+ 19
21
+ 20
22
+ 21
23
+ 22
24
+ 23
25
+ 24
26
+ 25
27
+ 26
28
+ 27
29
+ 28
30
+ 29
31
+ 30
32
+ 31
33
+ 32
34
+ 33
35
+ 34
36
+ 35
37
+ 36
38
+ 37
39
+ 38
40
+ 39
41
+ 40
42
+ 41
43
+ 42
44
+ 43
45
+ 44
46
+ 45
47
+ 46
48
+ 47
49
+ 48
50
+ 49
51
+ 50
52
+ 51
53
+ 52
54
+ 53
55
+ 54
56
+ 55
57
+ 56
58
+ 57
59
+ 58
60
+ 59
61
+ 60
62
+ 61
63
+ 62
64
+ 63
65
+ 64
66
+ 65
67
+ 66
68
+ 67
69
+ 68
70
+ 69
71
+ 70
72
+ 71
73
+ 72
74
+ # agent_v9.py
75
  import os
76
  from openai import OpenAI
77
 
 
80
  self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
81
  self.instructions = (
82
  "You are a research assistant solving GAIA benchmark questions using 2022 English Wikipedia knowledge.\n"
83
+ "For each question, reason step-by-step and only return the final answer in exact format (no explanation, no punctuation, no text)."
84
  )
85
  self.task_templates = {
86
+ "8e867cd7-cff9-4e6c-867a-ff5ddc2550be": self.q_mercedes_sosa_albums,
87
+ "2d83110e-a098-4ebb-9987-066c06fa42d0": self.q_reversed_text,
88
+ "3cef3a44-215e-4aed-8e3b-b1e3f08063b7": self.q_botanical_vegetables
89
  }
90
 
91
  def __call__(self, question: str, task_id: str = None) -> str:
92
  if task_id in self.task_templates:
93
+ raw = self.task_templates[task_id](question)
94
+ return raw.strip().replace(".\n", "").replace("\n", "").strip()
95
  else:
96
+ return "[SKIPPED: Task not yet implemented in Agent V9.1]"
97
 
98
  def q_mercedes_sosa_albums(self, question: str) -> str:
99
  prompt = (
100
+ "QUESTION: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)?\n"
101
+ "\nScratchpad reasoning:\n"
102
+ "Step 1: List all studio albums of Mercedes Sosa from Wikipedia (2022).\n"
103
+ "Step 2: Filter albums released between 2000 and 2009 inclusive.\n"
 
 
104
  "Step 3: Count them.\n"
105
+ "\nFinal Answer (number only):"
 
106
  )
107
+ return self.query_llm(prompt)
108
 
109
+ def q_reversed_text(self, question: str) -> str:
110
+ prompt = (
111
+ "QUESTION: .rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI\n"
112
+ "\nScratchpad reasoning:\n"
113
+ "Step 1: Reverse the question.\n"
114
+ "Step 2: Understand it.\n"
115
+ "Step 3: The opposite of the word \"left\" is \"right\".\n"
116
+ "\nFinal Answer (word only):"
117
+ )
118
+ return self.query_llm(prompt)
119
+
120
+ def q_botanical_vegetables(self, question: str) -> str:
121
+ prompt = (
122
+ "QUESTION: Classify each item botanically and return only the vegetables from the list.\n"
123
+ "milk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts\n"
124
+ "\nScratchpad reasoning:\n"
125
+ "Step 1: Identify botanical vegetables (roots, stems, leaves).\n"
126
+ "Step 2: Exclude botanical fruits and seeds.\n"
127
+ "Step 3: Sort alphabetically.\n"
128
+ "\nFinal Answer (comma-separated list):"
129
+ )
130
+ return self.query_llm(prompt)
131
+
132
+ def query_llm(self, prompt: str) -> str:
133
  try:
134
  response = self.client.chat.completions.create(
135
  model="gpt-4-turbo",
136
  messages=[
137
  {"role": "system", "content": self.instructions},
138
+ {"role": "user", "content": prompt.strip()}
139
  ],
140
  temperature=0.0
141
  )
142
+ return response.choices[0].message.content
143
  except Exception as e:
144
+ return f"[LLM ERROR: {e}]"