Final_Assignment_Template

Sleeping

App Files Files Community

dawid-lorek commited on 7 days ago

Commit

0afb7b8

verified ·

1 Parent(s): e95dfeb

Update agent.py

Browse files

Files changed (1) hide show

agent.py +36 -25

agent.py CHANGED Viewed

@@ -8,7 +8,6 @@ def duckduckgo_search(query: str) -> str:
         with DDGS() as ddg:
             results = ddg.text(query=query, region="wt-wt", max_results=5)
             bodies = [r.get('body', '') for r in results if r.get('body')]
-            # For GAIA, prefer the first non-empty, or join a few if possible
             return "\n".join(bodies[:3])
     except Exception as e:
         return f"ERROR: {e}"
@@ -20,21 +19,24 @@ def eval_python_code(code: str) -> str:
         return f"ERROR: {e}"
 def format_gaia_answer(answer: str, question: str = "") -> str:
-    """Strictly format GAIA output and eliminate apologies or error text."""
     if not answer:
         return ""
-    # Remove apology/error phrases
     answer = re.sub(
-        r'(?i)(unfortunately|unable to|error:|not available|i cannot|i am unable|i can\'t|no file|skip|I do not have|I cannot access|I am currently unable|If you have access).*',
         '', answer).strip()
-    # Remove leading/trailing quotes/brackets
-    answer = answer.strip(' "\'[]')
     # Only numbers for count questions
     if re.search(r'how many|number of|albums|at bats|total sales|output', question, re.I):
         match = re.search(r'(\d+)', answer)
         if match:
             return match.group(1)
-    # Only the last word for "surname" or first for "first name"
     if "surname" in question:
         return answer.split()[-1]
     if "first name" in question:
@@ -48,7 +50,7 @@ def format_gaia_answer(answer: str, question: str = "") -> str:
         code = re.search(r'[A-Z0-9]{3,}', answer)
         if code:
             return code.group(0)
-    # For lists: split, merge common phrases, dedupe, alpha-sort, comma-sep
     if "list" in question or "ingredient" in question or "vegetable" in question:
         items = [x.strip(' "\'') for x in re.split(r'[,\n]', answer) if x.strip()]
         merged = []
@@ -65,59 +67,68 @@ def format_gaia_answer(answer: str, question: str = "") -> str:
         merged = [x.lower() for x in merged]
         merged = sorted(set(merged))
         return ', '.join(merged)
-    # For chess: algebraic move (like Qd1+)
     if "algebraic notation" in question or "chess" in question:
         move = re.findall(r'[KQRBN]?[a-h]?[1-8]?x?[a-h][1-8][+#]?', answer)
         if move:
             return move[-1]
-    # Remove everything after first period for single-word answers
-    answer = answer.split('\n')[0].split('.')[0].strip()
-    return answer
 class GaiaAgent:
     def __init__(self):
         self.llm = OpenAIClient(api_key=os.getenv("OPENAI_API_KEY"))
     def __call__(self, question: str, task_id: str = None) -> str:
-        # 1. Try tool-based search for all fact/list/code questions
-        ql = question.lower()
-        # Try to route every "who", "what", "number", "albums", "at bats", "surname", etc. to web search
         search_keywords = [
             "who", "when", "what", "which", "how many", "number", "name", "albums", "surname", "at bats",
             "nasa", "city", "winner", "code", "vegetable", "ingredient", "magda m.", "featured article"
         ]
-        if any(kw in ql for kw in search_keywords):
             web_result = duckduckgo_search(question)
             llm_answer = self.llm.chat.completions.create(
                 model="gpt-4o",
                 messages=[
-                    {"role": "system", "content": "You are a research assistant. Based on the following web search results and question, answer strictly and concisely for the GAIA benchmark. Only the answer, no explanations or apologies."},
                     {"role": "user", "content": f"Web search results:\n{web_result}\n\nQuestion: {question}"}
                 ],
                 temperature=0.0,
                 max_tokens=256,
             ).choices[0].message.content.strip()
-            return format_gaia_answer(llm_answer, question)
-        # 2. For code/math
-        if "output" in ql and "python" in ql:
             code_match = re.search(r'```python(.*?)```', question, re.DOTALL)
             code = code_match.group(1) if code_match else ""
             result = eval_python_code(code)
             return format_gaia_answer(result, question)
-        # 3. For lists or ingredients, always web search and format
-        if "list" in ql or "ingredient" in ql or "vegetable" in ql:
             web_result = duckduckgo_search(question)
             llm_answer = self.llm.chat.completions.create(
                 model="gpt-4o",
                 messages=[
-                    {"role": "system", "content": "You are a research assistant. Based on the following web search results and question, answer strictly and concisely for the GAIA benchmark. Only the answer, no explanations or apologies."},
                     {"role": "user", "content": f"Web search results:\n{web_result}\n\nQuestion: {question}"}
                 ],
                 temperature=0.0,
                 max_tokens=256,
             ).choices[0].message.content.strip()
             return format_gaia_answer(llm_answer, question)
-        # 4. Fallback: strict LLM answer, formatted
         llm_answer = self.llm.chat.completions.create(
             model="gpt-4o",
             messages=[
@@ -125,6 +136,6 @@ class GaiaAgent:
                 {"role": "user", "content": question}
             ],
             temperature=0.0,
-            max_tokens=256,
         ).choices[0].message.content.strip()
         return format_gaia_answer(llm_answer, question)

         with DDGS() as ddg:
             results = ddg.text(query=query, region="wt-wt", max_results=5)
             bodies = [r.get('body', '') for r in results if r.get('body')]
             return "\n".join(bodies[:3])
     except Exception as e:
         return f"ERROR: {e}"
         return f"ERROR: {e}"
 def format_gaia_answer(answer: str, question: str = "") -> str:
+    """Strict GAIA output, eliminate apologies, extract only answer value."""
     if not answer:
         return ""
+    # Remove apologies and anything after
     answer = re.sub(
+        r'(?i)(I[\' ]?m sorry.*|Unfortunately.*|I cannot.*|I am unable.*|error:.*|no file.*|but.*|however.*|unable to.*|not available.*|if you have access.*|I can\'t.*)',
         '', answer).strip()
+    # Remove everything after the first period if it's not a list
+    if not ("list" in question or "ingredient" in question or "vegetable" in question):
+        answer = answer.split('\n')[0].split('.')[0]
+    # Remove quotes/brackets
+    answer = answer.strip(' "\'[](),;:')
     # Only numbers for count questions
     if re.search(r'how many|number of|albums|at bats|total sales|output', question, re.I):
         match = re.search(r'(\d+)', answer)
         if match:
             return match.group(1)
+    # Only last word for "surname", first for "first name"
     if "surname" in question:
         return answer.split()[-1]
     if "first name" in question:
         code = re.search(r'[A-Z0-9]{3,}', answer)
         if code:
             return code.group(0)
+    # For lists: comma-separated, alpha, deduped, merged phrases
     if "list" in question or "ingredient" in question or "vegetable" in question:
         items = [x.strip(' "\'') for x in re.split(r'[,\n]', answer) if x.strip()]
         merged = []
         merged = [x.lower() for x in merged]
         merged = sorted(set(merged))
         return ', '.join(merged)
+    # For chess: algebraic move
     if "algebraic notation" in question or "chess" in question:
         move = re.findall(r'[KQRBN]?[a-h]?[1-8]?x?[a-h][1-8][+#]?', answer)
         if move:
             return move[-1]
+    return answer.strip(' "\'[](),;:')
 class GaiaAgent:
     def __init__(self):
         self.llm = OpenAIClient(api_key=os.getenv("OPENAI_API_KEY"))
     def __call__(self, question: str, task_id: str = None) -> str:
         search_keywords = [
             "who", "when", "what", "which", "how many", "number", "name", "albums", "surname", "at bats",
             "nasa", "city", "winner", "code", "vegetable", "ingredient", "magda m.", "featured article"
         ]
+        needs_search = any(kw in question.lower() for kw in search_keywords)
+        if needs_search:
             web_result = duckduckgo_search(question)
             llm_answer = self.llm.chat.completions.create(
                 model="gpt-4o",
                 messages=[
+                    {"role": "system", "content": "You are a research assistant. Based on the web search results and question, answer strictly and concisely for the GAIA benchmark. Only the answer, no explanations or apologies."},
                     {"role": "user", "content": f"Web search results:\n{web_result}\n\nQuestion: {question}"}
                 ],
                 temperature=0.0,
                 max_tokens=256,
             ).choices[0].message.content.strip()
+            formatted = format_gaia_answer(llm_answer, question)
+            # Retry if apology/empty/incorrect
+            if not formatted or "sorry" in formatted.lower() or "unable" in formatted.lower():
+                llm_answer2 = self.llm.chat.completions.create(
+                    model="gpt-4o",
+                    messages=[
+                        {"role": "system", "content": "Only answer with the value. No explanation. Do not apologize. Do not begin with 'I'm sorry', 'Unfortunately', or similar."},
+                        {"role": "user", "content": f"Web search results:\n{web_result}\n\nQuestion: {question}"}
+                    ],
+                    temperature=0.0,
+                    max_tokens=128,
+                ).choices[0].message.content.strip()
+                formatted = format_gaia_answer(llm_answer2, question)
+            return formatted
+        # For code/math output
+        if "output" in question.lower() and "python" in question.lower():
             code_match = re.search(r'```python(.*?)```', question, re.DOTALL)
             code = code_match.group(1) if code_match else ""
             result = eval_python_code(code)
             return format_gaia_answer(result, question)
+        # For lists/ingredients, always web search and format
+        if "list" in question.lower() or "ingredient" in question.lower() or "vegetable" in question.lower():
             web_result = duckduckgo_search(question)
             llm_answer = self.llm.chat.completions.create(
                 model="gpt-4o",
                 messages=[
+                    {"role": "system", "content": "You are a research assistant. Based on the web search results and question, answer strictly and concisely for the GAIA benchmark. Only the answer, no explanations or apologies."},
                     {"role": "user", "content": f"Web search results:\n{web_result}\n\nQuestion: {question}"}
                 ],
                 temperature=0.0,
                 max_tokens=256,
             ).choices[0].message.content.strip()
             return format_gaia_answer(llm_answer, question)
+        # Fallback: strict LLM answer, formatted
         llm_answer = self.llm.chat.completions.create(
             model="gpt-4o",
             messages=[
                 {"role": "user", "content": question}
             ],
             temperature=0.0,
+            max_tokens=128,
         ).choices[0].message.content.strip()
         return format_gaia_answer(llm_answer, question)