Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 29

Commit

843728a

1 Parent(s): 07c53f3

Optimization

Browse files

Files changed (5) hide show

.config +0 -60
app.py +237 -389
data/knowledge.txt +0 -0
requirements.txt +32 -14
test.py +146 -0

.config DELETED Viewed

@@ -1,60 +0,0 @@
-# Configuration file for GAIA Agent
-# Model Configuration
-MODEL_CONFIG = {
-    "model_id": "microsoft/DialoGPT-medium",  # Lightweight model for resource constraints
-    "max_tokens": 512,  # Reduced for memory efficiency
-    "temperature": 0.1,  # Low temperature for factual responses
-    "fallback_model": "gpt-3.5-turbo",  # Fallback if primary model fails
-}
-# Agent Configuration
-AGENT_CONFIG = {
-    "max_iterations": 5,  # Limit iterations to prevent infinite loops
-    "verbosity_level": 1,  # Moderate verbosity for debugging
-    "timeout_seconds": 30,  # Timeout for individual operations
-    "max_retries": 2,  # Number of retries for failed operations
-}
-# Tool Configuration
-TOOL_CONFIG = {
-    "web_search": {
-        "enabled": True,
-        "max_results": 5,  # Limit search results for efficiency
-        "timeout": 10,
-    },
-    "calculator": {
-        "enabled": True,
-        "safe_mode": True,  # Only allow safe mathematical expressions
-    },
-    "image_analyzer": {
-        "enabled": True,
-        "max_image_size": 5 * 1024 * 1024,  # 5MB limit
-        "supported_formats": [".jpg", ".jpeg", ".png", ".gif", ".bmp"],
-    },
-    "file_reader": {
-        "enabled": True,
-        "max_file_size": 10 * 1024 * 1024,  # 10MB limit
-        "supported_formats": [".txt", ".csv", ".json", ".md", ".py", ".js", ".html", ".css"],
-    },
-    "data_processor": {
-        "enabled": True,
-        "max_data_points": 10000,  # Limit for large datasets
-    }
-}
-# Performance Configuration
-PERFORMANCE_CONFIG = {
-    "memory_limit_mb": 2048,  # 2GB memory limit per process
-    "cpu_limit_percent": 80,  # Maximum CPU usage
-    "garbage_collection_frequency": 10,  # Run GC every N operations
-    "cache_size": 100,  # Number of cached responses
-}
-# API Configuration
-API_CONFIG = {
-    "default_api_url": "https://agents-course-unit4-scoring.hf.space",
-    "request_timeout": 60,
-    "max_concurrent_requests": 2,  # Limit concurrent requests
-}

app.py CHANGED Viewed

@@ -3,361 +3,241 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 import json
-import re
-import time
-from typing import List, Dict, Any, Optional
-from datetime import datetime
-import threading
-import queue
-from ctransformers import AutoModelForCausalLM
-import logging
-# Setup logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-class WebSearchTool:
-    """Web search tool using Serper API for real-time information retrieval"""
-    def __init__(self, api_key: str):
-        self.api_key = api_key
-        self.base_url = "https://google.serper.dev/search"
-    def search(self, query: str, num_results: int = 5) -> Dict[str, Any]:
-        """Perform web search and return structured results"""
         try:
             headers = {
                 'X-API-KEY': self.api_key,
                 'Content-Type': 'application/json'
             }
-            payload = {
-                'q': query,
-                'num': num_results,
-                'gl': 'us',
-                'hl': 'en'
-            }
-            response = requests.post(self.base_url, json=payload, headers=headers, timeout=10)
             response.raise_for_status()
             data = response.json()
-            # Extract and format results
             results = []
             if 'organic' in data:
-                for item in data['organic'][:num_results]:
-                    results.append({
-                        'title': item.get('title', ''),
-                        'snippet': item.get('snippet', ''),
-                        'link': item.get('link', ''),
-                        'position': item.get('position', 0)
-                    })
-            return {
-                'success': True,
-                'results': results,
-                'query': query,
-                'total_results': len(results)
-            }
         except Exception as e:
-            logger.error(f"Web search error: {e}")
-            return {
-                'success': False,
-                'error': str(e),
-                'results': [],
-                'query': query,
-                'total_results': 0
-            }
-class CalculatorTool:
-    """Enhanced calculator tool for mathematical operations"""
-    def calculate(self, expression: str) -> Dict[str, Any]:
         """Safely evaluate mathematical expressions"""
         try:
-            # Clean the expression
-            expression = expression.strip()
-            # Replace common mathematical functions
-            expression = expression.replace('^', '**')  # Power operator
-            expression = re.sub(r'\b(\d+)x(\d+)\b', r'\1*\2', expression)  # Handle multiplication like 5x3
-            # Allow only safe mathematical operations
-            allowed_chars = set('0123456789+-*/().,eE pi')
-            allowed_funcs = ['abs', 'round', 'min', 'max', 'sum', 'pow', 'sqrt']
-            # Basic safety check
-            if any(char.isalpha() and char not in 'pie' for char in expression):
-                # Check if it contains allowed function names
-                import math
-                safe_dict = {
-                    "__builtins__": {},
-                    "abs": abs, "round": round, "min": min, "max": max,
-                    "sum": sum, "pow": pow, "sqrt": math.sqrt,
-                    "pi": math.pi, "e": math.e,
-                    "sin": math.sin, "cos": math.cos, "tan": math.tan,
-                    "log": math.log, "log10": math.log10,
-                    "exp": math.exp, "floor": math.floor, "ceil": math.ceil
-                }
-                result = eval(expression, safe_dict)
-            else:
-                result = eval(expression)
-            return {
-                'success': True,
-                'result': result,
-                'expression': expression
-            }
         except Exception as e:
-            logger.error(f"Calculator error: {e}")
-            return {
-                'success': False,
-                'error': str(e),
-                'expression': expression,
-                'result': None
-            }
-class LocalLLMManager:
-    """Manages local quantized LLM for reasoning"""
-    def __init__(self):
-        self.model = None
-        self.model_loaded = False
-        self.load_lock = threading.Lock()
-    def load_model(self):
-        """Load quantized model optimized for CPU inference"""
-        with self.load_lock:
-            if self.model_loaded:
-                return
-            try:
-                logger.info("Loading quantized model...")
-                # Use Phi-3-mini for better performance on CPU with limited resources
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    "microsoft/Phi-3-mini-4k-instruct-gguf",
-                    model_file="Phi-3-mini-4k-instruct-q4.gguf",
-                    model_type="phi3",
-                    gpu_layers=0,  # CPU only
-                    context_length=3072,  # Reduced context to save memory
-                    max_new_tokens=512,
-                    temperature=0.1,
-                    top_p=0.9,
-                    repetition_penalty=1.1
-                )
-                self.model_loaded = True
-                logger.info("Model loaded successfully")
-            except Exception as e:
-                logger.error(f"Error loading model: {e}")
-                # Fallback to a smaller model if Phi-3 fails
-                try:
-                    logger.info("Trying fallback model...")
-                    self.model = AutoModelForCausalLM.from_pretrained(
-                        "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
-                        model_file="tinyllama-1.1b-chat-v1.0.q4_k_m.gguf",
-                        model_type="llama",
-                        gpu_layers=0,
-                        context_length=2048,
-                        max_new_tokens=256
-                    )
-                    self.model_loaded = True
-                    logger.info("Fallback model loaded successfully")
-                except Exception as e2:
-                    logger.error(f"Fallback model also failed: {e2}")
-                    raise
-    def generate(self, prompt: str, max_tokens: int = 256) -> str:
-        """Generate response from local model"""
-        if not self.model_loaded:
-            self.load_model()
-        if not self.model:
-            return "Error: Model not available"
-        try:
-            # Format prompt for Phi-3
-            formatted_prompt = f"<|user|>\n{prompt}<|end|>\n<|assistant|>\n"
-            response = self.model(
-                formatted_prompt,
-                max_new_tokens=min(max_tokens, 256),  # Limit tokens for speed
-                temperature=0.1,
-                stop=["<|end|>", "<|user|>"]
-            )
-            # Clean response
-            response = response.replace(formatted_prompt, "").strip()
-            if "<|end|>" in response:
-                response = response.split("<|end|>")[0].strip()
-            return response
         except Exception as e:
-            logger.error(f"Generation error: {e}")
-            return f"Error generating response: {e}"
 class GAIAAgent:
-    """Advanced GAIA agent with reasoning, tools, and multi-step problem solving"""
     def __init__(self):
-        # Initialize tools
-        self.serper_api_key = os.getenv("SERPER_API_KEY")
-        if not self.serper_api_key:
-            logger.warning("SERPER_API_KEY not found. Web search will be disabled.")
-            self.web_search = None
-        else:
-            self.web_search = WebSearchTool(self.serper_api_key)
-        self.calculator = CalculatorTool()
-        self.llm = LocalLLMManager()
-        # Agent configuration
-        self.max_iterations = 5
-        self.max_reasoning_length = 1000
-        logger.info("GAIA Agent initialized")
-    def _identify_question_type(self, question: str) -> str:
-        """Identify the type of question to determine approach"""
-        question_lower = question.lower()
-        if any(word in question_lower for word in ['calculate', 'compute', 'math', '+', '-', '*', '/', '=', 'sum', 'multiply', 'divide']):
-            return 'mathematical'
-        elif any(word in question_lower for word in ['current', 'latest', 'recent', 'today', 'now', '2024', '2025']):
-            return 'current_info'
-        elif any(word in question_lower for word in ['who', 'what', 'where', 'when', 'why', 'how']):
-            return 'factual'
-        elif any(word in question_lower for word in ['analyze', 'compare', 'explain', 'reason']):
-            return 'analytical'
-        else:
-            return 'general'
-    def _use_web_search(self, query: str) -> str:
-        """Use web search tool and format results"""
-        if not self.web_search:
-            return "Web search not available (API key missing)"
-        results = self.web_search.search(query, num_results=3)
-        if not results['success']:
-            return f"Search failed: {results.get('error', 'Unknown error')}"
-        if not results['results']:
-            return "No search results found"
-        formatted_results = f"Search results for '{query}':\n"
-        for i, result in enumerate(results['results'], 1):
-            formatted_results += f"{i}. {result['title']}\n   {result['snippet']}\n\n"
-        return formatted_results
-    def _use_calculator(self, expression: str) -> str:
-        """Use calculator tool and format result"""
-        result = self.calculator.calculate(expression)
-        if result['success']:
-            return f"Calculation: {result['expression']} = {result['result']}"
-        else:
-            return f"Calculation error: {result['error']}"
-    def _generate_reasoning(self, question: str, context: str = "") -> str:
-        """Generate reasoning step using local LLM"""
-        reasoning_prompt = f"""Question: {question}
-Context: {context}
-Think step by step about this question. Consider:
-1. What information do I need?
-2. What tools might help?
-3. How should I approach this problem?
-Provide a clear reasoning step:"""
-        try:
-            reasoning = self.llm.generate(reasoning_prompt, max_tokens=200)
-            return reasoning
-        except Exception as e:
-            logger.error(f"Reasoning generation error: {e}")
-            return "Unable to generate reasoning step"
-    def _generate_final_answer(self, question: str, context: str, reasoning_steps: List[str]) -> str:
-        """Generate final answer using all available information"""
-        all_reasoning = "\n".join([f"Step {i+1}: {step}" for i, step in enumerate(reasoning_steps)])
-        answer_prompt = f"""Question: {question}
-Context and Information:
-{context}
-Reasoning Steps:
-{all_reasoning}
-Based on all the information and reasoning above, provide a clear, concise, and accurate final answer to the question:"""
         try:
-            answer = self.llm.generate(answer_prompt, max_tokens=200)
-            return answer.strip()
         except Exception as e:
-            logger.error(f"Answer generation error: {e}")
-            return "Unable to generate final answer"
     def __call__(self, question: str) -> str:
-        """Main agent execution method"""
-        logger.info(f"Processing question: {question[:100]}...")
         try:
-            # Initialize
-            context = ""
-            reasoning_steps = []
-            question_type = self._identify_question_type(question)
-            logger.info(f"Question type identified: {question_type}")
-            # Step 1: Initial reasoning
-            initial_reasoning = self._generate_reasoning(question)
-            reasoning_steps.append(initial_reasoning)
-            context += f"Initial reasoning: {initial_reasoning}\n\n"
-            # Step 2: Apply tools based on question type
-            if question_type == 'mathematical':
-                # Try to extract mathematical expressions
-                math_matches = re.findall(r'[\d\+\-\*/\(\)\.\s\^]+', question)
-                for match in math_matches:
-                    if len(match.strip()) > 3:  # Avoid single digits
-                        calc_result = self._use_calculator(match.strip())
-                        context += f"Calculation: {calc_result}\n"
-            elif question_type in ['current_info', 'factual']:
-                # Use web search for factual or current information
-                search_result = self._use_web_search(question)
-                context += f"Web search results: {search_result}\n"
-            # Step 3: Additional reasoning with context
-            if context:
-                additional_reasoning = self._generate_reasoning(question, context)
-                reasoning_steps.append(additional_reasoning)
-                context += f"Additional reasoning: {additional_reasoning}\n\n"
-            # Step 4: Generate final answer
-            final_answer = self._generate_final_answer(question, context, reasoning_steps)
-            logger.info(f"Generated answer: {final_answer[:100]}...")
-            return final_answer
         except Exception as e:
-            logger.error(f"Agent execution error: {e}")
-            return f"Error processing question: {str(e)}"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
@@ -365,7 +245,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
     if profile:
         username = f"{profile.username}"
@@ -380,15 +260,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent
     try:
-        print("Initializing GAIA Agent...")
         agent = GAIAAgent()
-        print("GAIA Agent initialized successfully")
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # Agent code link
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(f"Agent code: {agent_code}")
     # 2. Fetch Questions
@@ -406,7 +286,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
         print(f"Error decoding JSON response from questions endpoint: {e}")
-        print(f"Response text: {response.text[:500]}")
         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
@@ -424,36 +303,30 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
-        print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
         try:
-            start_time = time.time()
             submitted_answer = agent(question_text)
-            processing_time = time.time() - start_time
-            print(f"Question {task_id} processed in {processing_time:.2f}s")
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
-                "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer,
-                "Processing Time (s)": f"{processing_time:.2f}"
             })
         except Exception as e:
             print(f"Error running agent on task {task_id}: {e}")
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
-                "Submitted Answer": f"AGENT ERROR: {e}",
-                "Processing Time (s)": "Error"
             })
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
@@ -461,7 +334,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
@@ -485,61 +358,49 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
-# --- Build Gradio Interface using Blocks ---
 with gr.Blocks(title="GAIA Agent Evaluation") as demo:
-    gr.Markdown("# GAIA Agent Evaluation Runner")
     gr.Markdown(
         """
-        **Advanced GAIA Agent Features:**
-        - 🧠 Local quantized LLM for reasoning (Phi-3-mini optimized for CPU)
-        - 🔍 Web search capabilities via Serper API
-        - 🧮 Mathematical calculation tools
-        - 🎯 Multi-step problem solving approach
-        - 🚀 Optimized for 16GB RAM / 2 vCPU constraints
         **Instructions:**
-        1. Ensure your SERPER_API_KEY environment variable is set for web search
-        2. Log in to your Hugging Face account using the button below
-        3. Click 'Run GAIA Evaluation' to start the comprehensive evaluation
-        **Note:** Initial model loading may take 1-2 minutes. Subsequent questions will be processed faster.
         """
     )
     gr.LoginButton()
-    run_button = gr.Button("🚀 Run GAIA Evaluation & Submit All Answers", variant="primary")
-    status_output = gr.Textbox(label="📊 Evaluation Status & Results", lines=8, interactive=False)
-    results_table = gr.DataFrame(label="📋 Detailed Question Results", wrap=True)
-    # Add system info
-    with gr.Accordion("🔧 System Information", open=False):
-        gr.Markdown(f"""
-        - **Environment**: Hugging Face Space
-        - **Resources**: 16GB RAM, 2 vCPU
-        - **Model**: Phi-3-mini-4k-instruct (quantized)
-        - **Web Search**: {'✅ Enabled' if os.getenv('SERPER_API_KEY') else '❌ Disabled (no API key)'}
-        - **Calculator**: ✅ Enabled
-        - **Timestamp**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}
-        """)
     run_button.click(
         fn=run_and_submit_all,
@@ -547,39 +408,26 @@ with gr.Blocks(title="GAIA Agent Evaluation") as demo:
     )
 if __name__ == "__main__":
-    print("\n" + "="*70)
-    print("🚀 GAIA AGENT EVALUATION SYSTEM STARTING")
-    print("="*70)
-    # Environment check
-    space_host = os.getenv("SPACE_HOST")
-    space_id = os.getenv("SPACE_ID")
     serper_key = os.getenv("SERPER_API_KEY")
-    if space_host:
-        print(f"✅ SPACE_HOST: {space_host}")
-        print(f"   🌐 Runtime URL: https://{space_host}.hf.space")
-    else:
-        print("ℹ️  Running locally (SPACE_HOST not found)")
     if space_id:
-        print(f"✅ SPACE_ID: {space_id}")
-        print(f"   📁 Repo URL: https://huggingface.co/spaces/{space_id}")
-    else:
-        print("ℹ️  SPACE_ID not found")
-    if serper_key:
-        print("✅ SERPER_API_KEY: Configured")
-    else:
-        print("⚠️  SERPER_API_KEY: Not found - Web search will be disabled")
-    print("="*70)
-    print("📚 GAIA Agent Features:")
-    print("  🧠 Local LLM reasoning")
-    print("  🔍 Web search integration")
-    print("  🧮 Mathematical calculations")
-    print("  🎯 Multi-step problem solving")
-    print("="*70 + "\n")
-    print("🎯 Launching GAIA Agent Evaluation Interface...")
     demo.launch(debug=True, share=False)

 import requests
 import inspect
 import pandas as pd
+from smolagents import CodeAgent, HfApiModel
+from smolagents.tools import DuckDuckGoSearchTool, PythonInterpreterTool
 import json
+import tempfile
+import urllib.parse
+from pathlib import Path
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Custom Tools ---
+class SerperSearchTool:
+    """Enhanced search tool using Serper API for more reliable results"""
+    name = "serper_search"
+    description = "Search the web using Serper API. Use this for finding current information, facts, and data."
+    def __init__(self):
+        self.api_key = os.getenv("SERPER_API_KEY")
+        if not self.api_key:
+            print("Warning: SERPER_API_KEY not found, falling back to DuckDuckGo")
+    def __call__(self, query: str) -> str:
+        """Search the web and return formatted results"""
+        if not self.api_key:
+            # Fallback to basic search if no Serper API key
+            return f"Search query: {query} - API key not available"
         try:
+            url = "https://google.serper.dev/search"
+            payload = json.dumps({
+                "q": query,
+                "num": 5
+            })
             headers = {
                 'X-API-KEY': self.api_key,
                 'Content-Type': 'application/json'
             }
+            response = requests.post(url, headers=headers, data=payload, timeout=10)
             response.raise_for_status()
             data = response.json()
             results = []
+            # Process organic results
             if 'organic' in data:
+                for item in data['organic'][:3]:  # Top 3 results
+                    results.append(f"Title: {item.get('title', 'N/A')}")
+                    results.append(f"Content: {item.get('snippet', 'N/A')}")
+                    results.append(f"URL: {item.get('link', 'N/A')}")
+                    results.append("---")
+            # Add answer box if available
+            if 'answerBox' in data:
+                answer = data['answerBox']
+                results.insert(0, f"Answer: {answer.get('answer', answer.get('snippet', 'N/A'))}")
+                results.insert(1, "---")
+            return "\n".join(results) if results else f"No results found for: {query}"
         except Exception as e:
+            print(f"Serper search error: {e}")
+            return f"Search error for '{query}': {str(e)}"
+class MathCalculatorTool:
+    """Tool for mathematical calculations and computations"""
+    name = "math_calculator"
+    description = "Perform mathematical calculations, solve equations, and handle numerical computations."
+    def __call__(self, expression: str) -> str:
         """Safely evaluate mathematical expressions"""
         try:
+            # Import math functions for calculations
+            import math
+            import operator
+            # Safe evaluation context
+            safe_dict = {
+                "abs": abs, "round": round, "min": min, "max": max,
+                "sum": sum, "pow": pow, "sqrt": math.sqrt,
+                "sin": math.sin, "cos": math.cos, "tan": math.tan,
+                "log": math.log, "log10": math.log10, "exp": math.exp,
+                "pi": math.pi, "e": math.e
+            }
+            # Clean the expression
+            expression = expression.replace("^", "**")  # Handle exponents
+            result = eval(expression, {"__builtins__": {}}, safe_dict)
+            return f"Result: {result}"
         except Exception as e:
+            return f"Math calculation error: {str(e)}"
+class FileProcessorTool:
+    """Tool for processing various file formats"""
+    name = "file_processor"
+    description = "Process and extract information from files (text, CSV, JSON, etc.)"
+    def __call__(self, file_path: str, action: str = "read") -> str:
+        """Process files based on action type"""
+        try:
+            if not os.path.exists(file_path):
+                return f"File not found: {file_path}"
+            file_ext = Path(file_path).suffix.lower()
+            if file_ext in ['.txt', '.md']:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    content = f.read()
+                return f"File content ({len(content)} chars):\n{content[:1000]}..."
+            elif file_ext == '.csv':
+                import pandas as pd
+                df = pd.read_csv(file_path)
+                return f"CSV file with {len(df)} rows and {len(df.columns)} columns:\n{df.head().to_string()}"
+            elif file_ext == '.json':
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                return f"JSON data:\n{json.dumps(data, indent=2)[:1000]}..."
+            else:
+                return f"Unsupported file type: {file_ext}"
         except Exception as e:
+            return f"File processing error: {str(e)}"
+# --- Enhanced Agent Definition ---
 class GAIAAgent:
     def __init__(self):
+        """Initialize the GAIA agent with tools and model"""
+        print("Initializing GAIA Agent...")
+        # Initialize model
         try:
+            hf_token = os.getenv("HUGGINGFACE_INFERENCE_TOKEN")
+            if not hf_token:
+                print("Warning: HUGGINGFACE_INFERENCE_TOKEN not found")
+            # Use a good model for reasoning
+            model = HfApiModel(
+                model_id="meta-llama/Llama-3.1-70B-Instruct",
+                token=hf_token
+            )
+            # Initialize tools
+            self.tools = [
+                SerperSearchTool(),
+                PythonInterpreterTool(),
+                MathCalculatorTool(),
+                FileProcessorTool(),
+                DuckDuckGoSearchTool()  # Backup search
+            ]
+            # Initialize the agent
+            self.agent = CodeAgent(
+                tools=self.tools,
+                model=model,
+                max_steps=10,
+                verbosity_level=1
+            )
+            print("GAIA Agent initialized successfully with tools:", [tool.name for tool in self.tools])
         except Exception as e:
+            print(f"Error initializing GAIA Agent: {e}")
+            # Fallback to basic setup
+            try:
+                model = HfApiModel(model_id="microsoft/DialoGPT-medium")
+                self.agent = CodeAgent(tools=[PythonInterpreterTool()], model=model)
+                print("Fallback agent initialized")
+            except Exception as fallback_error:
+                print(f"Fallback initialization failed: {fallback_error}")
+                self.agent = None
     def __call__(self, question: str) -> str:
+        """Process a question using the GAIA agent"""
+        print(f"Processing question: {question[:100]}...")
+        if not self.agent:
+            return "Agent initialization failed. Please check your configuration."
         try:
+            # Enhanced prompt for better reasoning
+            enhanced_prompt = f"""
+You are an AI assistant designed to answer questions accurately and thoroughly.
+You have access to web search, Python interpreter, math calculator, and file processing tools.
+Question: {question}
+Please think step by step:
+1. Analyze what type of question this is
+2. Determine what tools or information you need
+3. Use appropriate tools to gather information
+4. Reason through the problem
+5. Provide a clear, accurate answer
+If the question requires:
+- Current information or facts: Use search tools
+- Calculations: Use the math calculator or Python interpreter
+- File analysis: Use the file processor tool
+- Multi-step reasoning: Break it down systematically
+Answer:"""
+            # Run the agent
+            result = self.agent.run(enhanced_prompt)
+            # Extract the final answer if it's structured
+            if isinstance(result, dict) and 'output' in result:
+                answer = result['output']
+            else:
+                answer = str(result)
+            # Clean up the answer
+            if "Answer:" in answer:
+                answer = answer.split("Answer:")[-1].strip()
+            print(f"Agent response: {answer[:100]}...")
+            return answer
         except Exception as e:
+            error_msg = f"Error processing question: {str(e)}"
+            print(error_msg)
+            # Fallback to basic response
+            try:
+                basic_response = f"I encountered an error while processing this question: {question}. Error: {str(e)}"
+                return basic_response
+            except:
+                return "Unable to process this question due to technical difficulties."
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")
     if profile:
         username = f"{profile.username}"
     # 1. Instantiate Agent
     try:
         agent = GAIAAgent()
+        if not agent.agent:
+            return "Failed to initialize GAIA Agent. Please check your tokens and try again.", None
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    # Agent code URL
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local"
     print(f"Agent code: {agent_code}")
     # 2. Fetch Questions
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
         print(f"Error decoding JSON response from questions endpoint: {e}")
         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
             })
         except Exception as e:
             print(f"Error running agent on task {task_id}: {e}")
+            error_answer = f"AGENT ERROR: {e}"
+            answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": error_answer
             })
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=120)  # Increased timeout
         response.raise_for_status()
         result_data = response.json()
         final_status = (
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
+# --- Build Gradio Interface ---
 with gr.Blocks(title="GAIA Agent Evaluation") as demo:
+    gr.Markdown("# GAIA Benchmark Agent Evaluation")
     gr.Markdown(
         """
+        **Enhanced GAIA Agent with Multiple Tools:**
+        - 🔍 Web Search (Serper API + DuckDuckGo fallback)
+        - 🐍 Python Interpreter for calculations
+        - 🧮 Mathematical calculator
+        - 📁 File processor for various formats
+        - 🧠 Advanced reasoning with Llama-3.1-70B
         **Instructions:**
+        1. Make sure you have SERPER_API_KEY and HUGGINGFACE_INFERENCE_TOKEN set
+        2. Log in to your Hugging Face account
+        3. Click 'Run GAIA Evaluation' to start the benchmark
+        **Target:** >40% accuracy on GAIA benchmark questions
         """
     )
     gr.LoginButton()
+    run_button = gr.Button("🚀 Run GAIA Evaluation & Submit", variant="primary")
+    status_output = gr.Textbox(
+        label="Evaluation Status & Results",
+        lines=6,
+        interactive=False,
+        placeholder="Click the button above to start evaluation..."
+    )
+    results_table = gr.DataFrame(
+        label="Questions and Agent Responses",
+        wrap=True,
+        interactive=False
+    )
     run_button.click(
         fn=run_and_submit_all,
     )
 if __name__ == "__main__":
+    print("\n" + "="*50)
+    print("🤖 GAIA Agent Evaluation System Starting")
+    print("="*50)
+    # Check environment variables
     serper_key = os.getenv("SERPER_API_KEY")
+    hf_token = os.getenv("HUGGINGFACE_INFERENCE_TOKEN")
+    space_id = os.getenv("SPACE_ID")
+    print(f"✅ SERPER_API_KEY: {'Found' if serper_key else 'Missing (will use fallback search)'}")
+    print(f"✅ HF_TOKEN: {'Found' if hf_token else 'Missing (required for model access)'}")
+    print(f"✅ SPACE_ID: {space_id if space_id else 'Not found (running locally)'}")
     if space_id:
+        print(f"🔗 Space URL: https://huggingface.co/spaces/{space_id}")
+    print("="*50)
+    print("🎯 Target: >40% accuracy on GAIA benchmark")
+    print("🛠️  Tools: Search, Python, Math, File Processing")
+    print("🧠 Model: Llama-3.1-70B-Instruct")
+    print("="*50 + "\n")
     demo.launch(debug=True, share=False)

data/knowledge.txt DELETED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -1,14 +1,32 @@
-gradio>=4.0.0
-transformers>=4.35.0
-torch>=2.0.0
-pandas>=1.5.0
-requests>=2.28.0
-beautifulsoup4>=4.11.0
-wikipedia>=1.4.0
-smolagents>=0.1.0
-accelerate>=0.20.0
-sentencepiece>=0.1.99
-openpyxl
-PyPDF2
-pillow
-ctransformers

+# Core dependencies
+gradio==4.44.0
+requests==2.31.0
+pandas==2.1.4
+# SmolagentS and AI dependencies
+smolagents==0.2.0
+transformers==4.45.2
+torch==2.1.2
+tokenizers==0.19.1
+# Tool dependencies
+duckduckgo-search==3.9.6
+python-dotenv==1.0.0
+# Utility libraries
+numpy==1.24.4
+urllib3==2.0.7
+certifi==2023.11.17
+charset-normalizer==3.3.2
+idna==3.6
+# Optional: for better JSON handling
+orjson==3.9.10
+# For file processing
+openpyxl==3.1.2
+python-docx==1.1.0
+# Security and compatibility
+cryptography==41.0.8
+PyYAML==6.0.1

test.py ADDED Viewed

	@@ -0,0 +1,146 @@

+#!/usr/bin/env python3
+"""
+Test script for GAIA Agent
+Run this to verify your agent works before deploying
+"""
+import os
+import sys
+from pathlib import Path
+# Add current directory to path
+sys.path.append(str(Path(__file__).parent))
+def test_environment():
+    """Test environment variables and dependencies"""
+    print("🧪 Testing Environment Setup")
+    print("-" * 40)
+    # Check environment variables
+    serper_key = os.getenv("SERPER_API_KEY")
+    hf_token = os.getenv("HUGGINGFACE_INFERENCE_TOKEN")
+    print(f"SERPER_API_KEY: {'✅ Found' if serper_key else '❌ Missing'}")
+    print(f"HF_TOKEN: {'✅ Found' if hf_token else '❌ Missing'}")
+    # Test imports
+    try:
+        import gradio as gr
+        print("Gradio: ✅ Imported")
+    except ImportError as e:
+        print(f"Gradio: ❌ Import failed - {e}")
+    try:
+        import smolagents
+        print("SmolagentS: ✅ Imported")
+    except ImportError as e:
+        print(f"SmolagentS: ❌ Import failed - {e}")
+    try:
+        import pandas as pd
+        print("Pandas: ✅ Imported")
+    except ImportError as e:
+        print(f"Pandas: ❌ Import failed - {e}")
+    try:
+        import requests
+        print("Requests: ✅ Imported")
+    except ImportError as e:
+        print(f"Requests: ❌ Import failed - {e}")
+def test_agent_basic():
+    """Test basic agent functionality"""
+    print("\n🤖 Testing Agent Initialization")
+    print("-" * 40)
+    try:
+        # Import the agent
+        from app import GAIAAgent
+        # Initialize agent
+        agent = GAIAAgent()
+        if agent.agent is None:
+            print("❌ Agent initialization failed")
+            return False
+        print("✅ Agent initialized successfully")
+        # Test with simple questions
+        test_questions = [
+            "What is 2 + 2?",
+            "What is the capital of France?",
+            "Calculate the square root of 16"
+        ]
+        for i, question in enumerate(test_questions, 1):
+            print(f"\n📝 Test Question {i}: {question}")
+            try:
+                answer = agent(question)
+                print(f"✅ Answer: {answer[:100]}...")
+            except Exception as e:
+                print(f"❌ Error: {e}")
+        return True
+    except Exception as e:
+        print(f"❌ Agent test failed: {e}")
+        return False
+def test_tools():
+    """Test individual tools"""
+    print("\n🛠️ Testing Individual Tools")
+    print("-" * 40)
+    try:
+        from app import SerperSearchTool, MathCalculatorTool
+        # Test search tool
+        search_tool = SerperSearchTool()
+        try:
+            result = search_tool("Python programming")
+            print(f"✅ Search Tool: {result[:100]}...")
+        except Exception as e:
+            print(f"❌ Search Tool Error: {e}")
+        # Test math tool
+        math_tool = MathCalculatorTool()
+        try:
+            result = math_tool("2 + 2")
+            print(f"✅ Math Tool: {result}")
+        except Exception as e:
+            print(f"❌ Math Tool Error: {e}")
+        # Test math tool with complex expression
+        try:
+            result = math_tool("sqrt(16) + 3 * 2")
+            print(f"✅ Math Complex: {result}")
+        except Exception as e:
+            print(f"❌ Math Complex Error: {e}")
+    except Exception as e:
+        print(f"❌ Tools test failed: {e}")
+def main():
+    """Run all tests"""
+    print("🚀 GAIA Agent Test Suite")
+    print("=" * 50)
+    # Test environment
+    test_environment()
+    # Test tools
+    test_tools()
+    # Test agent
+    success = test_agent_basic()
+    print("\n" + "=" * 50)
+    if success:
+        print("✅ All tests passed! Your agent is ready for deployment.")
+    else:
+        print("❌ Some tests failed. Please check the errors above.")
+    print("=" * 50)
+if __name__ == "__main__":
+    main()