Spaces:

agents-course
/

Final_Assignment_Template

Running

App Files Files Community

205

Upload 3 files

#44

by Tesvia - opened May 6

base: refs/heads/main

←

from: refs/pr/44

Discussion Files changed

+186

-0

Files changed (3) hide show

agent.py +77 -0
main.py +66 -0
tools.py +43 -0

agent.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# agent.py
+from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
+from tools import tool_search, tool_calculate, tool_load_file
+class CustomAgent:
+    def __init__(self, model_name="google/flan-t5-xl", use_gpu=False):
+        """Initialize the agent with an LLM (planner) and set up tools and prompt templates."""
+        # Load the language model pipeline for text generation (the 'planner' LLM)
+        device = 0 if use_gpu else -1
+        self.llm = pipeline("text2text-generation", model=model_name, tokenizer=model_name, device=device)
+        # Define the system prompt describing the agent and its tools
+        self.tool_descriptions = (
+            "Available tools:\n"
+            "1. search(query) - searches for information about 'query' and returns a summary.\n"
+            "2. calculate(expression) - evaluates a mathematical expression and returns the result.\n"
+            "3. load_file(task_id) - loads an attached file for the task if any (returns a description or content snippet).\n"
+        )
+        self.system_message = (
+            "You are an AI agent that can use tools to answer questions. "
+            "You have the following tools:\n"
+            f"{self.tool_descriptions}\n"
+            "Follow this format:\n"
+            "Thought: (think about the problem step by step)\n"
+            "Action: (choose one of the tools and specify input)\n"
+            "Observation: (result of the tool will be given)\n"
+            "… [this Thought->Action->Observation cycle can repeat] …\n"
+            "Thought: (when you have enough information, think final step)\n"
+            "Final Answer: (provide the answer to the user's question)\n"
+            "Make sure to output the final answer directly with no extra text.\n"
+        )
+    def answer(self, question: str) -> str:
+        """Generate an answer for the given question by reasoning and using tools as needed."""
+        # Initialize the dialogue history with system instructions and user question
+        dialog = f"{self.system_message}\nUser Question: {question}\n"
+        # We will accumulate the agent's reasoning in this string as we loop
+        agent_thoughts = ""
+        for step in range(1, 10):  # limit to 10 steps to avoid infinite loops
+            # Prompt the LLM with the conversation so far (system + history + current accumulated reasoning)
+            prompt = f"{dialog}{agent_thoughts}\nThought:"
+            response = self.llm(prompt, max_new_tokens=200, do_sample=False, return_text=True)[0]['generated_text']
+            # The LLM is expected to continue from "Thought:" and produce something like:
+            # "Thought: ...\nAction: tool_name(...)\n" or "Thought: ...\nFinal Answer: ...\n"
+            agent_output = response.strip()
+            # Append the LLM output to agent_thoughts
+            agent_thoughts += agent_output + "\n"
+            # Parse the LLM output to see if an action was proposed or a final answer given
+            if "Action:" in agent_output:
+                # Extract the tool name and argument from the action line
+                try:
+                    action_line = agent_output.split("Action:")[1].strip()
+                    # e.g. action_line = "search(World War 2)" or "calculate(12*7)"
+                    tool_name, arg = action_line.split("(")
+                    tool_name = tool_name.strip()
+                    arg = arg.rstrip(")")  # remove closing parenthesis
+                except Exception as e:
+                    return "(Parsing Error: Invalid action format)"
+                # Execute the appropriate tool
+                if tool_name.lower() == "search":
+                    result = tool_search(arg.strip().strip('"\''))
+                elif tool_name.lower() == "calculate":
+                    result = tool_calculate(arg)
+                elif tool_name.lower() == "load_file":
+                    result = tool_load_file(arg.strip().strip('"\''))
+                else:
+                    result = f"(Unknown tool: {tool_name})"
+                # Add the observation to the conversation for the next loop iteration
+                agent_thoughts += f"Observation: {result}\n"
+            elif "Final Answer:" in agent_output:
+                # The agent is presenting a final answer – extract and return it
+                answer_text = agent_output.split("Final Answer:")[1].strip()
+                return answer_text  # return without any "FINAL ANSWER" prefix
+            else:
+                # If neither Action nor Final Answer is found (LLM didn't follow format), break
+                break
+        # If loop ends without Final Answer, return whatever the agent last said or a fallback
+        return "(No conclusive answer)"

main.py ADDED Viewed

	@@ -0,0 +1,66 @@

+# main.py
+import requests
+from agent import CustomAgent
+from config import HF_USERNAME, QUESTIONS_ENDPOINT, SUBMIT_ENDPOINT, DEFAULT_MODEL
+def get_questions():
+    """Retrieve the list of evaluation questions from the GAIA Unit4 API."""
+    resp = requests.get(QUESTIONS_ENDPOINT, timeout=15)
+    resp.raise_for_status()
+    questions = resp.json()
+    if not isinstance(questions, list):
+        raise ValueError("Unexpected response format for questions.")
+    return questions
+def submit_answers(username, answers_payload):
+    """Submit the answers to the GAIA API and return the result data."""
+    submission = {
+        "username": username.strip(),
+        "agent_code": f"https://huggingface.co/spaces/{username}/Final_Assignment_Template/tree/main",
+        "answers": answers_payload
+    }
+    resp = requests.post(SUBMIT_ENDPOINT, json=submission, timeout=60)
+    resp.raise_for_status()
+    return resp.json()
+if __name__ == "__main__":
+    # Initialize our custom agent (you can change model or settings if needed)
+    agent = CustomAgent(model_name=DEFAULT_MODEL, use_gpu=False)
+    print("Agent initialized with model:", DEFAULT_MODEL)
+    # Fetch evaluation questions
+    try:
+        questions = get_questions()
+    except Exception as e:
+        print("Error fetching questions:", e)
+        exit(1)
+    print(f"Retrieved {len(questions)} questions for evaluation.")
+    # Run the agent on each question
+    answers_payload = []
+    for item in questions:
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or not question_text:
+            continue  # skip if malformed
+        print(f"\n=== Question {task_id} ===")
+        print("Q:", question_text)
+        try:
+            ans = agent.answer(question_text)
+        except Exception as err:
+            ans = "(Agent failed to produce an answer)"
+            print("Error during agent reasoning:", err)
+        print("A:", ans)
+        answers_payload.append({"task_id": task_id, "submitted_answer": ans})
+    # All answers ready, submit them for scoring
+    try:
+        result = submit_answers(HF_USERNAME, answers_payload)
+    except Exception as e:
+        print("Submission failed:", e)
+        exit(1)
+    # Print the results
+    score = result.get('score', 'N/A')
+    correct = result.get('correct_count', '?')
+    total = result.get('total_attempted', '?')
+    message = result.get('message', '')
+    print(f"\nSubmission complete! Score: {score}% ({correct}/{total} correct)")
+    if message:
+        print("Message from server:", message)

tools.py ADDED Viewed

	@@ -0,0 +1,43 @@

+# tools.py
+import math
+import requests
+import wikipedia  # using Wikipedia API for a search tool
+# Tool 1: Wikipedia Search
+def tool_search(query: str) -> str:
+    """Search the web (Wikipedia API) for the query and return a summary of results."""
+    try:
+        # Use wikipedia library to get a summary
+        summary = wikipedia.summary(query, sentences=2)
+        return summary
+    except Exception as e:
+        return f"(Search tool failed: {e})"
+# Tool 2: Calculator
+def tool_calculate(expression: str) -> str:
+    """Evaluate a mathematical expression and return the result as a string."""
+    try:
+        result = eval(expression, {"__builtins__": None}, {"sqrt": math.sqrt, "pow": math.pow})
+        return str(result)
+    except Exception as e:
+        return f"(Calculation error: {e})"
+# Tool 3: File loader (for image or text files from GAIA, if needed)
+def tool_load_file(task_id: str) -> str:
+    """Fetch the file for a given task (if any) and return its content or a description."""
+    url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+    try:
+        resp = requests.get(url, timeout=10)
+        resp.raise_for_status()
+    except Exception as e:
+        return f"(File download error: {e})"
+    # Determine content type
+    content_type = resp.headers.get("Content-Type", "")
+    if "image" in content_type:
+        # An image was received (could run image captioning model here)
+        return "[Image received from task]"
+    elif "text" in content_type or "json" in content_type:
+        text_data = resp.text[:500]  # take first 500 chars to avoid huge text
+        return f"[File content snippet: {text_data}]"
+    else:
+        return "(Unknown file type or binary data received)"