Spaces:

davidgturner
/

GaiaAgentEvaluator

Sleeping

File size: 11,660 Bytes

311c0d0
 
4bb25ec
6f446d0
07ad0d5
c1db1fc
f5bafc2
 
07ad0d5
f5bafc2
 
07ad0d5
c1db1fc
f5bafc2
 
08e2c16
07ad0d5
f5bafc2
 
 
 
c1db1fc
07ad0d5
 
 
 
 
 
 
 
 
 
 
 
 
f5bafc2
 
07ad0d5
f5bafc2
c1db1fc
f5bafc2
 
c1db1fc
f5bafc2
c1db1fc
f5bafc2
07ad0d5
f5bafc2
 
 
c1db1fc
f5bafc2
c1db1fc
f5bafc2
 
c1db1fc
07ad0d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08e2c16
f5bafc2
 
 
 
c1db1fc
f5bafc2
 
 
 
 
 
 
 
 
 
 
08e2c16
c1db1fc
07ad0d5
 
 
 
f5bafc2
08e2c16
f5bafc2
 
c1db1fc
07ad0d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e305927
f5bafc2
 
07ad0d5
f5bafc2
 
 
 
07ad0d5
f5bafc2
 
 
 
 
 
 
 
 
e305927
07ad0d5
f5bafc2
e305927
f5bafc2
 
 
 
 
 
 
 
08e2c16
f5bafc2
08e2aa5
f5bafc2
08e2aa5
 
8ea0ccb
7a29ecc
6f446d0
cedc6dd
f5bafc2
6f446d0
cedc6dd
6f446d0
8ea0ccb
6f446d0
cedc6dd
08e2aa5
f5bafc2
c1db1fc
f5bafc2
 
 
 
 
 
 
08e2aa5
f5bafc2
 
6f446d0
8ea0ccb
08e2aa5
4bb25ec
08e2aa5
8ea0ccb
08e2aa5
 
f5bafc2
 
08e2aa5
d68dd9c
08e2aa5
 
8ea0ccb
08e2aa5
 
d68dd9c
f5bafc2
8ea0ccb
 
6f446d0
08e2aa5
 
 
 
 
 
 
f5bafc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08e2aa5
f5bafc2
 
 
 
 
 
08e2aa5
 
6f446d0
08e2aa5
 
f5bafc2
 
 
 
 
 
 
 
d68dd9c
8ea0ccb
d68dd9c
 
f5bafc2
 
d68dd9c
 
6f446d0
 
 
d68dd9c
 
f5bafc2
8ea0ccb
f5bafc2
 
 
d68dd9c
 
 
f5bafc2
1c3bf8f
148ab21
 
f5bafc2
 
 
 
148ab21
d68dd9c
 
cedc6dd
08e2aa5
9ef5250
8ea0ccb
d68dd9c
08e2aa5
 
 
d68dd9c
 
 
f5bafc2
6f446d0

import os
import gradio as gr
import requests
import pandas as pd
from smolagents import Tool, CodeAgent, Model

# Import internal modules
from config import (
    DEFAULT_API_URL
)
from tools.tool_manager import ToolManager
from utils.local_model import LocalTransformersModel

class GaiaToolCallingAgent:
    """Tool-calling agent specifically designed for the GAIA system."""
    
    def __init__(self, local_model=None):
        print("GaiaToolCallingAgent initialized.")
        self.tool_manager = ToolManager()
        self.name = "tool_agent"  # Add required name attribute for smolagents integration
        self.description = "A specialized agent that uses various tools to answer questions"  # Required by smolagents
        
        # Use local model if provided, or create a simpler one
        self.local_model = local_model
        if not self.local_model:
            try:
                from utils.local_model import LocalTransformersModel
                self.local_model = LocalTransformersModel(
                    model_name="TinyLlama/TinyLlama-1.1B-Chat-v0.6",
                    max_tokens=512
                )
            except Exception as e:
                print(f"Couldn't initialize local model in tool agent: {e}")
                self.local_model = None
        
    def run(self, query: str) -> str:
        """Process a query and return a response using available tools."""
        print(f"Processing query: {query}")
        tools = self.tool_manager.get_tools()
        
        # For each tool, try to get relevant information
        context_info = []
        
        for tool in tools:
            try:
                if self._should_use_tool(tool, query):
                    print(f"Using tool: {tool.name}")
                    result = tool.forward(query)
                    if result:
                        context_info.append(f"{tool.name} Results:\n{result}")
            except Exception as e:
                print(f"Error using {tool.name}: {e}")
        
        # Combine all context information
        full_context = "\n\n".join(context_info) if context_info else ""
        
        # If we have context and a local model, generate a proper response
        if full_context and self.local_model:
            try:
                prompt = f"""
                Based on the following information, please provide a comprehensive answer to the question: "{query}"
                
                CONTEXT INFORMATION:
                {full_context}
                
                Answer:
                """
                
                response = self.local_model.generate(prompt)
                return response
            except Exception as e:
                print(f"Error generating response with local model: {e}")
                # Fall back to returning just the context
                return full_context
        else:
            # No context or no model, return whatever we have
            if not full_context:
                return "I couldn't find any relevant information to answer your question."
            return full_context
        
    def __call__(self, query: str) -> str:
        """Make the agent callable so it can be used directly by CodeAgent."""
        print(f"Tool agent received query: {query}")
        return self.run(query)
    
    def _should_use_tool(self, tool: Tool, query: str) -> bool:
        """Determine if a specific tool should be used for the query."""
        query_lower = query.lower()
        
        # Tool-specific patterns
        patterns = {
            "web_search": ["current", "latest", "recent", "who", "what", "when", "where", "how"],
            "web_content": ["content", "webpage", "website", "page"],
            "youtube_video": ["youtube.com", "youtu.be"],
            "wikipedia_search": ["wikipedia", "wiki", "article"],
            "gaia_retriever": ["gaia", "agent", "ai", "artificial intelligence"]
        }
        
        # Use all tools if patterns dict doesn't have the tool name
        if tool.name not in patterns:
            return True
            
        return any(pattern in query_lower for pattern in patterns.get(tool.name, []))

def create_manager_agent() -> CodeAgent:
    """Create and configure the main GAIA agent."""
    
    try:
        # Import config for local model
        from config import LOCAL_MODEL_CONFIG
        
        # Use local model to avoid credit limits
        model = LocalTransformersModel(
            model_name=LOCAL_MODEL_CONFIG["model_name"],
            device=LOCAL_MODEL_CONFIG["device"],
            max_tokens=LOCAL_MODEL_CONFIG["max_tokens"],
            temperature=LOCAL_MODEL_CONFIG["temperature"]
        )
        print(f"Using local model: {LOCAL_MODEL_CONFIG['model_name']}")
    except Exception as e:
        print(f"Error setting up local model: {e}")
        # Use a simplified configuration as fallback
        model = LocalTransformersModel(
            model_name="TinyLlama/TinyLlama-1.1B-Chat-v0.6",
            device="cpu"
        )
        print("Using fallback model configuration")
    
    # Initialize the managed tool-calling agent, sharing the model
    tool_agent = GaiaToolCallingAgent(local_model=model)
    
    # Create the manager agent
    manager_agent = CodeAgent(
        model=model,
        tools=[],  # No direct tools for manager
        managed_agents=[tool_agent],
        additional_authorized_imports=[
            "json",
            "pandas", 
            "numpy",
            "re",
            "requests",
            "bs4"
        ],
        planning_interval=3,
        verbosity_level=2,
        max_steps=10
    )
    
    print("Manager agent created with local model")
    return manager_agent

def create_agent():
    """Create the GAIA agent system."""
    try:
        print("Initializing GAIA agent system...")
        return create_manager_agent()
    except Exception as e:
        print(f"Error creating GAIA agent: {e}")
        return None

def run_and_submit_all(profile: gr.OAuthProfile | None):
    """
    Fetches all questions, runs the GAIA agent on them, submits all answers,
    and displays the results.
    """
    # --- Determine HF Space Runtime URL and Repo URL ---
    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code

    if profile:
        username = f"{profile.username}"
        print(f"User logged in: {username}")
    else:
        print("User not logged in.")
        return "Please Login to Hugging Face with the button.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # 1. Initialize Agent
    try:
        print("Initializing GAIA agent system...")
        agent = create_agent()
        if not agent:
            return "Error: Could not initialize agent.", None
        print("GAIA agent initialization complete.")
    except Exception as e:
        print(f"Error initializing agent: {e}")
        return f"Error initializing agent: {e}", None

    # 2. Fetch Questions
    print(f"Fetching questions from: {questions_url}")
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            print("Fetched questions list is empty.")
            return "Fetched questions list is empty or invalid format.", None
        print(f"Fetched {len(questions_data)} questions.")
    except requests.exceptions.RequestException as e:
        print(f"Error fetching questions: {e}")
        return f"Error fetching questions: {e}", None
    except Exception as e:
        print(f"An unexpected error occurred fetching questions: {e}")
        return f"An unexpected error occurred fetching questions: {e}", None

    # 3. Run Agent on Questions
    results_log = []
    answers_payload = []
    print(f"Running agent on {len(questions_data)} questions...")
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            print(f"Skipping item with missing task_id or question: {item}")
            continue
        try:
            # Run the agent and get the response
            response = agent.run(f"Answer this question concisely: {question_text}")
            
            # Clean up the response if needed
            if isinstance(response, dict):
                submitted_answer = response.get("answer", str(response))
            else:
                submitted_answer = str(response)
            
            # Add to submission payload
            answers_payload.append({
                "task_id": task_id,
                "submitted_answer": submitted_answer
            })
            
            # Log the result
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": submitted_answer
            })
            
        except Exception as e:
            print(f"Error running agent on task {task_id}: {e}")
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": f"AGENT ERROR: {e}"
            })

    if not answers_payload:
        print("Agent did not produce any answers to submit.")
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    # 4. Prepare Submission
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    submission_data = {
        "username": username.strip(),
        "agent_code": agent_code,
        "answers": answers_payload
    }    # 5. Submit
    print(f"Submitting {len(answers_payload)} answers to API...")
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        
        status_message = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        print("Submission successful.")
        return status_message, pd.DataFrame(results_log)
    except Exception as e:
        status_message = f"Submission Failed: {str(e)}"
        print(f"Error during submission: {e}")
        return status_message, pd.DataFrame(results_log)

# --- Build Gradio Interface using Blocks ---
with gr.Blocks() as demo:
    gr.Markdown("# GAIA Agent Evaluation Runner")
    gr.Markdown(
        """
        **Instructions:**
        1. Log in to your Hugging Face account using the button below.
        2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and see the score.
        
        The agent uses a managed tool-calling architecture and the smolagents framework for reliable answers.
        """
    )

    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_button.click(
        fn=run_and_submit_all,
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    print("\n" + "-"*30 + " GAIA Agent Starting " + "-"*30)
    demo.launch(debug=True, share=False)