import os import re from openai import OpenAI as OpenAIClient from duckduckgo_search import DDGS def duckduckgo_search(query: str) -> str: try: with DDGS() as ddg: results = ddg.text(query=query, region="wt-wt", max_results=5) bodies = [r.get('body', '') for r in results if r.get('body')] # For GAIA, prefer the first non-empty, or join a few if possible return "\n".join(bodies[:3]) except Exception as e: return f"ERROR: {e}" def eval_python_code(code: str) -> str: try: return str(eval(code, {"__builtins__": {}})) except Exception as e: return f"ERROR: {e}" def format_gaia_answer(answer: str, question: str = "") -> str: """Strictly format GAIA output and eliminate apologies or error text.""" if not answer: return "" # Remove apology/error phrases answer = re.sub( r'(?i)(unfortunately|unable to|error:|not available|i cannot|i am unable|i can\'t|no file|skip|I do not have|I cannot access|I am currently unable|If you have access).*', '', answer).strip() # Remove leading/trailing quotes/brackets answer = answer.strip(' "\'[]') # Only numbers for count questions if re.search(r'how many|number of|albums|at bats|total sales|output', question, re.I): match = re.search(r'(\d+)', answer) if match: return match.group(1) # Only the last word for "surname" or first for "first name" if "surname" in question: return answer.split()[-1] if "first name" in question: return answer.split()[0] # For code outputs, numbers only if "output" in question and "python" in question: num = re.search(r'(\d+)', answer) return num.group(1) if num else answer # Only country code (3+ uppercase letters or digits) if re.search(r'IOC country code|award number|NASA', question, re.I): code = re.search(r'[A-Z0-9]{3,}', answer) if code: return code.group(0) # For lists: split, merge common phrases, dedupe, alpha-sort, comma-sep if "list" in question or "ingredient" in question or "vegetable" in question: items = [x.strip(' "\'') for x in re.split(r'[,\n]', answer) if x.strip()] merged = [] skip = False for i, item in enumerate(items): if skip: skip = False continue if i + 1 < len(items) and item in ['sweet', 'green', 'lemon', 'ripe', 'whole', 'fresh', 'bell']: merged.append(f"{item} {items[i+1]}") skip = True else: merged.append(item) merged = [x.lower() for x in merged] merged = sorted(set(merged)) return ', '.join(merged) # For chess: algebraic move (like Qd1+) if "algebraic notation" in question or "chess" in question: move = re.findall(r'[KQRBN]?[a-h]?[1-8]?x?[a-h][1-8][+#]?', answer) if move: return move[-1] # Remove everything after first period for single-word answers answer = answer.split('\n')[0].split('.')[0].strip() return answer class GaiaAgent: def __init__(self): self.llm = OpenAIClient(api_key=os.getenv("OPENAI_API_KEY")) def __call__(self, question: str, task_id: str = None) -> str: # 1. Try tool-based search for all fact/list/code questions ql = question.lower() # Try to route every "who", "what", "number", "albums", "at bats", "surname", etc. to web search search_keywords = [ "who", "when", "what", "which", "how many", "number", "name", "albums", "surname", "at bats", "nasa", "city", "winner", "code", "vegetable", "ingredient", "magda m.", "featured article" ] if any(kw in ql for kw in search_keywords): web_result = duckduckgo_search(question) llm_answer = self.llm.chat.completions.create( model="gpt-4o", messages=[ {"role": "system", "content": "You are a research assistant. Based on the following web search results and question, answer strictly and concisely for the GAIA benchmark. Only the answer, no explanations or apologies."}, {"role": "user", "content": f"Web search results:\n{web_result}\n\nQuestion: {question}"} ], temperature=0.0, max_tokens=256, ).choices[0].message.content.strip() return format_gaia_answer(llm_answer, question) # 2. For code/math if "output" in ql and "python" in ql: code_match = re.search(r'```python(.*?)```', question, re.DOTALL) code = code_match.group(1) if code_match else "" result = eval_python_code(code) return format_gaia_answer(result, question) # 3. For lists or ingredients, always web search and format if "list" in ql or "ingredient" in ql or "vegetable" in ql: web_result = duckduckgo_search(question) llm_answer = self.llm.chat.completions.create( model="gpt-4o", messages=[ {"role": "system", "content": "You are a research assistant. Based on the following web search results and question, answer strictly and concisely for the GAIA benchmark. Only the answer, no explanations or apologies."}, {"role": "user", "content": f"Web search results:\n{web_result}\n\nQuestion: {question}"} ], temperature=0.0, max_tokens=256, ).choices[0].message.content.strip() return format_gaia_answer(llm_answer, question) # 4. Fallback: strict LLM answer, formatted llm_answer = self.llm.chat.completions.create( model="gpt-4o", messages=[ {"role": "system", "content": "You are a research assistant. Answer strictly and concisely for the GAIA benchmark. Only the answer, no explanations or apologies."}, {"role": "user", "content": question} ], temperature=0.0, max_tokens=256, ).choices[0].message.content.strip() return format_gaia_answer(llm_answer, question)