File size: 4,814 Bytes
47c6176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# agent.py
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
from tools import tool_search, tool_calculate, tool_load_file

class CustomAgent:
    def __init__(self, model_name="google/flan-t5-xl", use_gpu=False):
        """Initialize the agent with an LLM (planner) and set up tools and prompt templates."""
        # Load the language model pipeline for text generation (the 'planner' LLM)
        device = 0 if use_gpu else -1
        self.llm = pipeline("text2text-generation", model=model_name, tokenizer=model_name, device=device)
        # Define the system prompt describing the agent and its tools
        self.tool_descriptions = (
            "Available tools:\n"
            "1. search(query) - searches for information about 'query' and returns a summary.\n"
            "2. calculate(expression) - evaluates a mathematical expression and returns the result.\n"
            "3. load_file(task_id) - loads an attached file for the task if any (returns a description or content snippet).\n"
        )
        self.system_message = (
            "You are an AI agent that can use tools to answer questions. "
            "You have the following tools:\n"
            f"{self.tool_descriptions}\n"
            "Follow this format:\n"
            "Thought: (think about the problem step by step)\n"
            "Action: (choose one of the tools and specify input)\n"
            "Observation: (result of the tool will be given)\n"
            "… [this Thought->Action->Observation cycle can repeat] …\n"
            "Thought: (when you have enough information, think final step)\n"
            "Final Answer: (provide the answer to the user's question)\n"
            "Make sure to output the final answer directly with no extra text.\n"
        )

    def answer(self, question: str) -> str:
        """Generate an answer for the given question by reasoning and using tools as needed."""
        # Initialize the dialogue history with system instructions and user question
        dialog = f"{self.system_message}\nUser Question: {question}\n"
        # We will accumulate the agent's reasoning in this string as we loop
        agent_thoughts = ""
        for step in range(1, 10):  # limit to 10 steps to avoid infinite loops
            # Prompt the LLM with the conversation so far (system + history + current accumulated reasoning)
            prompt = f"{dialog}{agent_thoughts}\nThought:"
            response = self.llm(prompt, max_new_tokens=200, do_sample=False, return_text=True)[0]['generated_text']
            # The LLM is expected to continue from "Thought:" and produce something like:
            # "Thought: ...\nAction: tool_name(...)\n" or "Thought: ...\nFinal Answer: ...\n"
            agent_output = response.strip()
            # Append the LLM output to agent_thoughts
            agent_thoughts += agent_output + "\n"
            # Parse the LLM output to see if an action was proposed or a final answer given
            if "Action:" in agent_output:
                # Extract the tool name and argument from the action line
                try:
                    action_line = agent_output.split("Action:")[1].strip()
                    # e.g. action_line = "search(World War 2)" or "calculate(12*7)"
                    tool_name, arg = action_line.split("(")
                    tool_name = tool_name.strip()
                    arg = arg.rstrip(")")  # remove closing parenthesis
                except Exception as e:
                    return "(Parsing Error: Invalid action format)"
                # Execute the appropriate tool
                if tool_name.lower() == "search":
                    result = tool_search(arg.strip().strip('"\''))
                elif tool_name.lower() == "calculate":
                    result = tool_calculate(arg)
                elif tool_name.lower() == "load_file":
                    result = tool_load_file(arg.strip().strip('"\''))
                else:
                    result = f"(Unknown tool: {tool_name})"
                # Add the observation to the conversation for the next loop iteration
                agent_thoughts += f"Observation: {result}\n"
            elif "Final Answer:" in agent_output:
                # The agent is presenting a final answer – extract and return it
                answer_text = agent_output.split("Final Answer:")[1].strip()
                return answer_text  # return without any "FINAL ANSWER" prefix
            else:
                # If neither Action nor Final Answer is found (LLM didn't follow format), break
                break
        # If loop ends without Final Answer, return whatever the agent last said or a fallback
        return "(No conclusive answer)"