Upload 3 files
#44
by
Tesvia
- opened
agent.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# agent.py
|
2 |
+
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
3 |
+
from tools import tool_search, tool_calculate, tool_load_file
|
4 |
+
|
5 |
+
class CustomAgent:
|
6 |
+
def __init__(self, model_name="google/flan-t5-xl", use_gpu=False):
|
7 |
+
"""Initialize the agent with an LLM (planner) and set up tools and prompt templates."""
|
8 |
+
# Load the language model pipeline for text generation (the 'planner' LLM)
|
9 |
+
device = 0 if use_gpu else -1
|
10 |
+
self.llm = pipeline("text2text-generation", model=model_name, tokenizer=model_name, device=device)
|
11 |
+
# Define the system prompt describing the agent and its tools
|
12 |
+
self.tool_descriptions = (
|
13 |
+
"Available tools:\n"
|
14 |
+
"1. search(query) - searches for information about 'query' and returns a summary.\n"
|
15 |
+
"2. calculate(expression) - evaluates a mathematical expression and returns the result.\n"
|
16 |
+
"3. load_file(task_id) - loads an attached file for the task if any (returns a description or content snippet).\n"
|
17 |
+
)
|
18 |
+
self.system_message = (
|
19 |
+
"You are an AI agent that can use tools to answer questions. "
|
20 |
+
"You have the following tools:\n"
|
21 |
+
f"{self.tool_descriptions}\n"
|
22 |
+
"Follow this format:\n"
|
23 |
+
"Thought: (think about the problem step by step)\n"
|
24 |
+
"Action: (choose one of the tools and specify input)\n"
|
25 |
+
"Observation: (result of the tool will be given)\n"
|
26 |
+
"β¦ [this Thought->Action->Observation cycle can repeat] β¦\n"
|
27 |
+
"Thought: (when you have enough information, think final step)\n"
|
28 |
+
"Final Answer: (provide the answer to the user's question)\n"
|
29 |
+
"Make sure to output the final answer directly with no extra text.\n"
|
30 |
+
)
|
31 |
+
|
32 |
+
def answer(self, question: str) -> str:
|
33 |
+
"""Generate an answer for the given question by reasoning and using tools as needed."""
|
34 |
+
# Initialize the dialogue history with system instructions and user question
|
35 |
+
dialog = f"{self.system_message}\nUser Question: {question}\n"
|
36 |
+
# We will accumulate the agent's reasoning in this string as we loop
|
37 |
+
agent_thoughts = ""
|
38 |
+
for step in range(1, 10): # limit to 10 steps to avoid infinite loops
|
39 |
+
# Prompt the LLM with the conversation so far (system + history + current accumulated reasoning)
|
40 |
+
prompt = f"{dialog}{agent_thoughts}\nThought:"
|
41 |
+
response = self.llm(prompt, max_new_tokens=200, do_sample=False, return_text=True)[0]['generated_text']
|
42 |
+
# The LLM is expected to continue from "Thought:" and produce something like:
|
43 |
+
# "Thought: ...\nAction: tool_name(...)\n" or "Thought: ...\nFinal Answer: ...\n"
|
44 |
+
agent_output = response.strip()
|
45 |
+
# Append the LLM output to agent_thoughts
|
46 |
+
agent_thoughts += agent_output + "\n"
|
47 |
+
# Parse the LLM output to see if an action was proposed or a final answer given
|
48 |
+
if "Action:" in agent_output:
|
49 |
+
# Extract the tool name and argument from the action line
|
50 |
+
try:
|
51 |
+
action_line = agent_output.split("Action:")[1].strip()
|
52 |
+
# e.g. action_line = "search(World War 2)" or "calculate(12*7)"
|
53 |
+
tool_name, arg = action_line.split("(")
|
54 |
+
tool_name = tool_name.strip()
|
55 |
+
arg = arg.rstrip(")") # remove closing parenthesis
|
56 |
+
except Exception as e:
|
57 |
+
return "(Parsing Error: Invalid action format)"
|
58 |
+
# Execute the appropriate tool
|
59 |
+
if tool_name.lower() == "search":
|
60 |
+
result = tool_search(arg.strip().strip('"\''))
|
61 |
+
elif tool_name.lower() == "calculate":
|
62 |
+
result = tool_calculate(arg)
|
63 |
+
elif tool_name.lower() == "load_file":
|
64 |
+
result = tool_load_file(arg.strip().strip('"\''))
|
65 |
+
else:
|
66 |
+
result = f"(Unknown tool: {tool_name})"
|
67 |
+
# Add the observation to the conversation for the next loop iteration
|
68 |
+
agent_thoughts += f"Observation: {result}\n"
|
69 |
+
elif "Final Answer:" in agent_output:
|
70 |
+
# The agent is presenting a final answer β extract and return it
|
71 |
+
answer_text = agent_output.split("Final Answer:")[1].strip()
|
72 |
+
return answer_text # return without any "FINAL ANSWER" prefix
|
73 |
+
else:
|
74 |
+
# If neither Action nor Final Answer is found (LLM didn't follow format), break
|
75 |
+
break
|
76 |
+
# If loop ends without Final Answer, return whatever the agent last said or a fallback
|
77 |
+
return "(No conclusive answer)"
|
main.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# main.py
|
2 |
+
import requests
|
3 |
+
from agent import CustomAgent
|
4 |
+
from config import HF_USERNAME, QUESTIONS_ENDPOINT, SUBMIT_ENDPOINT, DEFAULT_MODEL
|
5 |
+
|
6 |
+
def get_questions():
|
7 |
+
"""Retrieve the list of evaluation questions from the GAIA Unit4 API."""
|
8 |
+
resp = requests.get(QUESTIONS_ENDPOINT, timeout=15)
|
9 |
+
resp.raise_for_status()
|
10 |
+
questions = resp.json()
|
11 |
+
if not isinstance(questions, list):
|
12 |
+
raise ValueError("Unexpected response format for questions.")
|
13 |
+
return questions
|
14 |
+
|
15 |
+
def submit_answers(username, answers_payload):
|
16 |
+
"""Submit the answers to the GAIA API and return the result data."""
|
17 |
+
submission = {
|
18 |
+
"username": username.strip(),
|
19 |
+
"agent_code": f"https://huggingface.co/spaces/{username}/Final_Assignment_Template/tree/main",
|
20 |
+
"answers": answers_payload
|
21 |
+
}
|
22 |
+
resp = requests.post(SUBMIT_ENDPOINT, json=submission, timeout=60)
|
23 |
+
resp.raise_for_status()
|
24 |
+
return resp.json()
|
25 |
+
|
26 |
+
if __name__ == "__main__":
|
27 |
+
# Initialize our custom agent (you can change model or settings if needed)
|
28 |
+
agent = CustomAgent(model_name=DEFAULT_MODEL, use_gpu=False)
|
29 |
+
print("Agent initialized with model:", DEFAULT_MODEL)
|
30 |
+
# Fetch evaluation questions
|
31 |
+
try:
|
32 |
+
questions = get_questions()
|
33 |
+
except Exception as e:
|
34 |
+
print("Error fetching questions:", e)
|
35 |
+
exit(1)
|
36 |
+
print(f"Retrieved {len(questions)} questions for evaluation.")
|
37 |
+
# Run the agent on each question
|
38 |
+
answers_payload = []
|
39 |
+
for item in questions:
|
40 |
+
task_id = item.get("task_id")
|
41 |
+
question_text = item.get("question")
|
42 |
+
if not task_id or not question_text:
|
43 |
+
continue # skip if malformed
|
44 |
+
print(f"\n=== Question {task_id} ===")
|
45 |
+
print("Q:", question_text)
|
46 |
+
try:
|
47 |
+
ans = agent.answer(question_text)
|
48 |
+
except Exception as err:
|
49 |
+
ans = "(Agent failed to produce an answer)"
|
50 |
+
print("Error during agent reasoning:", err)
|
51 |
+
print("A:", ans)
|
52 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": ans})
|
53 |
+
# All answers ready, submit them for scoring
|
54 |
+
try:
|
55 |
+
result = submit_answers(HF_USERNAME, answers_payload)
|
56 |
+
except Exception as e:
|
57 |
+
print("Submission failed:", e)
|
58 |
+
exit(1)
|
59 |
+
# Print the results
|
60 |
+
score = result.get('score', 'N/A')
|
61 |
+
correct = result.get('correct_count', '?')
|
62 |
+
total = result.get('total_attempted', '?')
|
63 |
+
message = result.get('message', '')
|
64 |
+
print(f"\nSubmission complete! Score: {score}% ({correct}/{total} correct)")
|
65 |
+
if message:
|
66 |
+
print("Message from server:", message)
|
tools.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tools.py
|
2 |
+
import math
|
3 |
+
import requests
|
4 |
+
import wikipedia # using Wikipedia API for a search tool
|
5 |
+
|
6 |
+
# Tool 1: Wikipedia Search
|
7 |
+
def tool_search(query: str) -> str:
|
8 |
+
"""Search the web (Wikipedia API) for the query and return a summary of results."""
|
9 |
+
try:
|
10 |
+
# Use wikipedia library to get a summary
|
11 |
+
summary = wikipedia.summary(query, sentences=2)
|
12 |
+
return summary
|
13 |
+
except Exception as e:
|
14 |
+
return f"(Search tool failed: {e})"
|
15 |
+
|
16 |
+
# Tool 2: Calculator
|
17 |
+
def tool_calculate(expression: str) -> str:
|
18 |
+
"""Evaluate a mathematical expression and return the result as a string."""
|
19 |
+
try:
|
20 |
+
result = eval(expression, {"__builtins__": None}, {"sqrt": math.sqrt, "pow": math.pow})
|
21 |
+
return str(result)
|
22 |
+
except Exception as e:
|
23 |
+
return f"(Calculation error: {e})"
|
24 |
+
|
25 |
+
# Tool 3: File loader (for image or text files from GAIA, if needed)
|
26 |
+
def tool_load_file(task_id: str) -> str:
|
27 |
+
"""Fetch the file for a given task (if any) and return its content or a description."""
|
28 |
+
url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
|
29 |
+
try:
|
30 |
+
resp = requests.get(url, timeout=10)
|
31 |
+
resp.raise_for_status()
|
32 |
+
except Exception as e:
|
33 |
+
return f"(File download error: {e})"
|
34 |
+
# Determine content type
|
35 |
+
content_type = resp.headers.get("Content-Type", "")
|
36 |
+
if "image" in content_type:
|
37 |
+
# An image was received (could run image captioning model here)
|
38 |
+
return "[Image received from task]"
|
39 |
+
elif "text" in content_type or "json" in content_type:
|
40 |
+
text_data = resp.text[:500] # take first 500 chars to avoid huge text
|
41 |
+
return f"[File content snippet: {text_data}]"
|
42 |
+
else:
|
43 |
+
return "(Unknown file type or binary data received)"
|