FinalTest

Runtime error

App Files Files Community

yoshizen commited on May 27

Commit

98c40a0

verified ·

1 Parent(s): aade89a

Update gaia_agent.py

Browse files

Files changed (1) hide show

gaia_agent.py +98 -12

gaia_agent.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Enhanced GAIA Agent with Hybrid Rule-LLM Architecture for Hugging Face Course
 """
 import os
@@ -15,7 +15,7 @@ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
 class EnhancedGAIAAgent:
     """
     An enhanced agent designed to pass the GAIA evaluation by combining rule-based precision
-    with LLM-powered flexibility for general knowledge and reasoning.
     """
     def __init__(self, model_name="google/flan-t5-large", device=None):
@@ -64,21 +64,85 @@ class EnhancedGAIAAgent:
             self.tokenizer = None
             self.model = None
-    def __call__(self, question: str) -> str:
-        """Process a question and return a specific, concise answer."""
         print(f"Processing question: {question}")
         # Determine question type
         question_type = self._classify_question(question)
         print(f"Classified as: {question_type}")
-        # Use the appropriate handler
-        answer = self.handlers[question_type](question)
         # Ensure answer is concise and specific
-        answer = self._ensure_concise_answer(answer, question_type)
-        return answer
     def _classify_question(self, question: str) -> str:
         """Determine the type of question for specialized handling."""
@@ -503,15 +567,25 @@ class EvaluationRunner:
                 continue
             try:
-                submitted_answer = agent(question_text)
                 answers_payload.append({
                     "task_id": task_id,
                     "submitted_answer": submitted_answer
                 })
                 results_log.append({
                     "Task ID": task_id,
                     "Question": question_text,
-                    "Submitted Answer": submitted_answer
                 })
             except Exception as e:
                 print(f"Error running agent on task {task_id}: {e}")
@@ -598,9 +672,21 @@ def test_agent():
     print("\n=== AGENT TEST RESULTS ===")
     for question in test_questions:
-        answer = agent(question)
         print(f"\nQ: {question}")
-        print(f"A: {answer}")
     return "Test completed successfully"

 """
+Enhanced GAIA Agent with Strict Output Formatting for Hugging Face Course
 """
 import os
 class EnhancedGAIAAgent:
     """
     An enhanced agent designed to pass the GAIA evaluation by combining rule-based precision
+    with LLM-powered flexibility and strict output formatting.
     """
     def __init__(self, model_name="google/flan-t5-large", device=None):
             self.tokenizer = None
             self.model = None
+    def __call__(self, question: str, task_id: str = None) -> str:
+        """
+        Process a question and return a formatted answer according to GAIA benchmark requirements.
+        Args:
+            question: The question to answer
+            task_id: Optional task ID for the GAIA benchmark
+        Returns:
+            JSON string with the required GAIA format
+        """
         print(f"Processing question: {question}")
         # Determine question type
         question_type = self._classify_question(question)
         print(f"Classified as: {question_type}")
+        # Generate reasoning trace if appropriate
+        reasoning_trace = self._generate_reasoning_trace(question, question_type)
+        # Use the appropriate handler to get the answer
+        model_answer = self.handlers[question_type](question)
         # Ensure answer is concise and specific
+        model_answer = self._ensure_concise_answer(model_answer, question_type)
+        # Format the response according to GAIA requirements
+        response = {
+            "task_id": task_id if task_id else "unknown_task",
+            "model_answer": model_answer,
+            "reasoning_trace": reasoning_trace
+        }
+        # Return the formatted JSON response
+        return json.dumps(response, ensure_ascii=False)
+    def _generate_reasoning_trace(self, question: str, question_type: str) -> str:
+        """Generate a reasoning trace for the question if appropriate."""
+        # For calculation and reasoning questions, provide a trace
+        if question_type == 'calculation':
+            # Extract numbers and operation from the question
+            numbers = re.findall(r'\d+', question)
+            if len(numbers) >= 2:
+                if re.search(r'(sum|add|plus|\+)', question.lower()):
+                    return f"To find the sum, I add the numbers: {' + '.join(numbers)} = {sum(int(num) for num in numbers)}"
+                elif re.search(r'(difference|subtract|minus|\-)', question.lower()) and len(numbers) >= 2:
+                    return f"To find the difference, I subtract: {numbers[0]} - {numbers[1]} = {int(numbers[0]) - int(numbers[1])}"
+                elif re.search(r'(product|multiply|times|\*)', question.lower()) and len(numbers) >= 2:
+                    return f"To find the product, I multiply: {numbers[0]} × {numbers[1]} = {int(numbers[0]) * int(numbers[1])}"
+                elif re.search(r'(divide|division|\/)', question.lower()) and len(numbers) >= 2:
+                    if int(numbers[1]) != 0:
+                        return f"To find the quotient, I divide: {numbers[0]} ÷ {numbers[1]} = {int(numbers[0]) / int(numbers[1])}"
+            # If we can't generate a specific trace, use a generic one
+            return "I need to identify the numbers and operations in the question, then perform the calculation step by step."
+        elif question_type in ['factual', 'general'] and self.llm_available:
+            # For factual and general questions, use LLM to generate a trace
+            try:
+                prompt = f"Explain your reasoning for answering this question: {question}"
+                inputs = self.tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).to(self.device)
+                outputs = self.model.generate(
+                    inputs["input_ids"],
+                    max_length=150,
+                    min_length=20,
+                    temperature=0.3,
+                    top_p=0.95,
+                    do_sample=True,
+                    num_return_sequences=1
+                )
+                trace = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+                return trace[:200]  # Limit trace length
+            except:
+                pass
+        # For other question types or if LLM fails, provide a minimal trace
+        return ""
     def _classify_question(self, question: str) -> str:
         """Determine the type of question for specialized handling."""
                 continue
             try:
+                # Call agent with task_id to ensure proper formatting
+                json_response = agent(question_text, task_id)
+                # Parse the JSON response
+                response_obj = json.loads(json_response)
+                # Extract the model_answer for submission
+                submitted_answer = response_obj.get("model_answer", "")
                 answers_payload.append({
                     "task_id": task_id,
                     "submitted_answer": submitted_answer
                 })
                 results_log.append({
                     "Task ID": task_id,
                     "Question": question_text,
+                    "Submitted Answer": submitted_answer,
+                    "Full Response": json_response
                 })
             except Exception as e:
                 print(f"Error running agent on task {task_id}: {e}")
     print("\n=== AGENT TEST RESULTS ===")
     for question in test_questions:
+        # Generate a mock task_id for testing
+        task_id = f"test_{hash(question) % 10000}"
+        # Get formatted JSON response
+        json_response = agent(question, task_id)
         print(f"\nQ: {question}")
+        print(f"Response: {json_response}")
+        # Parse and print the model_answer for clarity
+        try:
+            response_obj = json.loads(json_response)
+            print(f"Model Answer: {response_obj.get('model_answer', '')}")
+        except:
+            print("Error parsing JSON response")
     return "Test completed successfully"