File size: 5,775 Bytes
61e2bd8
c36ad5a
 
 
61e2bd8
c36ad5a
 
 
 
 
61e2bd8
c36ad5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61e2bd8
c36ad5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61e2bd8
c36ad5a
 
 
 
 
 
61e2bd8
c36ad5a
 
 
 
 
 
61e2bd8
c36ad5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61e2bd8
c36ad5a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import re
import google.generativeai as genai
from tools import web_search, read_file_from_api, python_interpreter

# --- The ReAct Prompt Template ---
# This master prompt is the "brain" of the agent. It tells the LLM how to behave.
# It's explicitly told that the "Final Answer:" prefix is for its internal use only.
REACT_PROMPT = """
You are a helpful and intelligent agent designed to solve complex problems. You have access to a set of tools to help you.

Your task is to answer the user's question accurately. To do this, you must operate in a loop of Thought, Action, and Observation.

1.  **Thought:** First, reason about the problem and your strategy.
2.  **Action:** Based on your thought, choose ONE of the following tools to use. The format must be `Action: tool_name[input]`.
3.  **Observation:** After you perform an action, you will receive an observation.
4.  **Repeat:** You will repeat this process until you are certain of the final answer.

Your available tools are:
- `web_search[query]`: Searches the web to find up-to-date information or facts.
- `read_file_from_api[task_id]`: Reads a file required by the question. The `task_id` is implicitly available from the context.
- `python_interpreter[code]`: Executes Python code for calculations or complex logic.

**CRITICAL INSTRUCTION:** When you have the final answer, you MUST use the following format for your last step:
`Final Answer: [The single, exact answer]`

This `Final Answer:` prefix is a signal for the system to stop. The system will automatically extract *only the text after the prefix* for the submission. Do not add any other text, explanation, or formatting around the final answer.

---
Here is the problem:
Question: {question}
"""

class GeminiAgent:
    def __init__(self):
        print("Initializing GeminiAgent (ReAct)...")
        api_key = os.getenv("GEMINI_API_KEY")
        if not api_key:
            raise ValueError("GEMINI_API_KEY secret not found! Please set it in your Space's settings.")
            
        genai.configure(api_key=api_key)
        
        # --- CORRECTED MODEL NAME ---
        # Using the state-of-the-art gemini-2.5-pro model.
        self.model = genai.GenerativeModel('gemini-2.5-pro')
        
        self.tools = {
            "web_search": web_search,
            "read_file_from_api": read_file_from_api,
            "python_interpreter": python_interpreter
        }
        print("GeminiAgent initialized successfully with model 'gemini-2.5-pro'.")

    def __call__(self, question: str) -> str:
        # The task_id is often encoded in the question for GAIA.
        task_id_match = re.search(r'gaia-id:(\S+)', question)
        task_id = task_id_match.group(1) if task_id_match else "unknown"
        
        prompt = REACT_PROMPT.format(question=question)
        
        # ReAct loop - Max 10 turns to prevent runaways
        for turn in range(10):
            print(f"\n--- Turn {turn + 1} ---\n")
            
            # 1. THOUGHT + ACTION
            response = self.model.generate_content(prompt)
            
            # Handle cases where the model response might be empty or blocked
            if not response.parts:
                print("Warning: Model returned an empty response.")
                prompt += "\nObservation: The model returned an empty response. Please try again."
                continue
            
            response_text = response.text
            print(f"LLM Response:\n{response_text}\n")
            
            # --- PARSING LOGIC THAT COMPLIES WITH SUBMISSION RULES ---
            # 2. Check for the "Final Answer:" prefix.
            final_answer_match = re.search(r"Final Answer: (.*)", response_text, re.DOTALL)
            if final_answer_match:
                # If the prefix is found, extract ONLY the answer part.
                answer = final_answer_match.group(1).strip()
                print(f"Final Answer signal detected. Extracting and returning: '{answer}'")
                # This return value is what gets submitted to the API. It does NOT contain the prefix.
                return answer

            # 3. ACT - If no final answer, look for a tool to use.
            action_match = re.search(r"Action: (\w+)\[(.*)\]", response_text, re.DOTALL)
            if not action_match:
                # This can happen if the model is confused. We'll let it try again.
                observation = "No valid 'Action:' or 'Final Answer:' found in your response. Please think step-by-step and select a tool or provide the final answer."
            else:
                tool_name = action_match.group(1).strip()
                tool_input = action_match.group(2).strip()
                
                if tool_name not in self.tools:
                    observation = f"Error: Unknown tool '{tool_name}'. Please choose from the available tools."
                else:
                    try:
                        # Special handling for the file reader tool to pass the task_id
                        if tool_name == "read_file_from_api":
                            observation = self.tools[tool_name](task_id)
                        else:
                            observation = self.tools[tool_name](tool_input)
                    except Exception as e:
                        observation = f"Error executing tool {tool_name}: {e}"
            
            print(f"Observation:\n{observation}\n")

            # 4. OBSERVE - Append the full turn to the prompt for the next loop.
            prompt += f"{response_text}\nObservation: {observation}\n"

        # Fallback if the agent gets stuck in a loop
        print("Agent failed to find an answer within the turn limit.")
        return "Agent failed to find an answer within 10 turns."