Final_Assignment_Template

Running

App Files Files Community

mjschock commited on Apr 25

Commit

401799d

unverified ·

1 Parent(s): 4ff8224

Refactor agent.py and graph.py to enhance agent functionality and logging. Introduce Configuration class for managing parameters, improve state handling in AgentRunner, and update agent graph to support step logging and user interaction. Add new tests for agent capabilities and update requirements for code formatting tools.

Browse files

Files changed (7) hide show

agent.py +37 -18
app.py +51 -27
configuration.py +33 -0
graph.py +171 -111
requirements.txt +2 -0
test_agent.py +170 -67
tools.py +11 -6

agent.py CHANGED Viewed

@@ -1,5 +1,7 @@
-import os
 import logging
 from graph import agent_graph
 # Configure logging
@@ -8,34 +10,51 @@ logger = logging.getLogger(__name__)
 # Enable LiteLLM debug logging only if environment variable is set
 import litellm
-if os.getenv('LITELLM_DEBUG', 'false').lower() == 'true':
     litellm.set_verbose = True
     logger.setLevel(logging.DEBUG)
 else:
     litellm.set_verbose = False
     logger.setLevel(logging.INFO)
 class AgentRunner:
     def __init__(self):
-        logger.debug("Initializing AgentRunner")
-        logger.info("AgentRunner initialized.")
     def __call__(self, question: str) -> str:
-        logger.debug(f"Processing question: {question[:50]}...")
-        logger.info(f"Agent received question (first 50 chars): {question[:50]}...")
         try:
-            # Run the graph with the question
-            result = agent_graph.invoke({
-                "messages": [],
                 "question": question,
-                "answer": None
-            })
-            # Extract and return the answer
-            answer = result["answer"]
-            logger.debug(f"Successfully generated answer: {answer}")
-            logger.info(f"Agent returning answer: {answer}")
-            return answer
         except Exception as e:
-            logger.error(f"Error in agent execution: {str(e)}", exc_info=True)
             raise

 import logging
+import os
+import uuid
 from graph import agent_graph
 # Configure logging
 # Enable LiteLLM debug logging only if environment variable is set
 import litellm
+if os.getenv("LITELLM_DEBUG", "false").lower() == "true":
     litellm.set_verbose = True
     logger.setLevel(logging.DEBUG)
 else:
     litellm.set_verbose = False
     logger.setLevel(logging.INFO)
 class AgentRunner:
+    """Runner class for the code agent."""
     def __init__(self):
+        """Initialize the agent runner with graph and tools."""
+        logger.info("Initializing AgentRunner")
+        self.graph = agent_graph
+        self.last_state = None  # Store the last state for testing/debugging
     def __call__(self, question: str) -> str:
+        """Process a question through the agent graph and return the answer.
+        Args:
+            question: The question to process
+        Returns:
+            str: The agent's response
+        """
         try:
+            logger.info(f"Processing question: {question}")
+            initial_state = {
                 "question": question,
+                "messages": [],
+                "answer": None,
+                "step_logs": [],
+                "is_complete": False,  # Initialize is_complete
+                "step_count": 0,  # Initialize step_count
+            }
+            # Generate a unique thread_id for this interaction
+            thread_id = str(uuid.uuid4())
+            config = {"configurable": {"thread_id": thread_id}}
+            final_state = self.graph.invoke(initial_state, config)
+            self.last_state = final_state  # Store the final state
+            return final_state.get("answer", "No answer generated")
         except Exception as e:
+            logger.error(f"Error processing question: {str(e)}")
             raise

app.py CHANGED Viewed

@@ -1,23 +1,26 @@
 import os
 import gradio as gr
-import requests
 import pandas as pd
 from agent import AgentRunner
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the AgentRunner on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
@@ -44,16 +47,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
@@ -70,18 +73,36 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             continue
         try:
             submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
@@ -151,20 +172,19 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -172,14 +192,18 @@ if __name__ == "__main__":
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import os
 import gradio as gr
 import pandas as pd
+import requests
 from agent import AgentRunner
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the AgentRunner on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
     if profile:
+        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
             continue
         try:
             submitted_answer = agent(question_text)
+            answers_payload.append(
+                {"task_id": task_id, "submitted_answer": submitted_answer}
+            )
+            results_log.append(
+                {
+                    "Task ID": task_id,
+                    "Question": question_text,
+                    "Submitted Answer": submitted_answer,
+                }
+            )
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append(
+                {
+                    "Task ID": task_id,
+                    "Question": question_text,
+                    "Submitted Answer": f"AGENT ERROR: {e}",
+                }
+            )
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload,
+    }
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(
+        label="Run Status / Submission Result", lines=5, interactive=False
+    )
     # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
+    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")  # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:  # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(
+            f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
+        )
     else:
+        print(
+            "ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
+        )
+    print("-" * (60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

configuration.py ADDED Viewed

	@@ -0,0 +1,33 @@

+"""Define the configurable parameters for the agent."""
+from __future__ import annotations
+import os
+from dataclasses import dataclass, fields
+from typing import Optional
+from langchain_core.runnables import RunnableConfig
+@dataclass(kw_only=True)
+class Configuration:
+    """The configuration for the agent."""
+    # API configuration
+    api_base: Optional[str] = "http://localhost:11434"
+    api_key: Optional[str] = os.getenv("MODEL_API_KEY")
+    model_id: Optional[str] = (
+        f"ollama/{os.getenv('OLLAMA_MODEL', 'qwen2.5-coder:0.5b')}"
+    )
+    # Agent configuration
+    my_configurable_param: str = "changeme"
+    @classmethod
+    def from_runnable_config(
+        cls, config: Optional[RunnableConfig] = None
+    ) -> Configuration:
+        """Create a Configuration instance from a RunnableConfig object."""
+        configurable = (config.get("configurable") or {}) if config else {}
+        _fields = {f.name for f in fields(cls) if f.init}
+        return cls(**{k: v for k, v in configurable.items() if k in _fields})

graph.py CHANGED Viewed

@@ -1,130 +1,190 @@
 import logging
-from typing import Callable, List, Optional, TypedDict
-from langgraph.graph import StateGraph, END
-from smolagents import CodeAgent, ToolCallingAgent, LiteLLMModel
-from tools import tools
-import yaml
 import os
-import litellm
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Configure LiteLLM to drop unsupported parameters
 litellm.drop_params = True
-# Define the state for our agent graph
 class AgentState(TypedDict):
-    messages: list
     question: str
-    answer: str | None
 class AgentNode:
-    def __init__(self):
-        # Load default prompt templates from local file
-        current_dir = os.path.dirname(os.path.abspath(__file__))
-        prompts_dir = os.path.join(current_dir, "prompts")
-        # yaml_path = os.path.join(prompts_dir, "toolcalling_agent.yaml")
-        yaml_path = os.path.join(prompts_dir, "code_agent.yaml")
-        with open(yaml_path, 'r') as f:
-            prompt_templates = yaml.safe_load(f)
-        # Log the default system prompt
-        logger.info("Default system prompt:")
-        logger.info("-" * 80)
-        logger.info(prompt_templates["system_prompt"])
-        logger.info("-" * 80)
-#         # Define our custom system prompt
-#         custom_system_prompt = "..."
-#         # Update the system prompt in the loaded templates
-#         prompt_templates["system_prompt"] = custom_system_prompt
-        # Log our custom system prompt
-        # logger.info("Custom system prompt:")
-        # logger.info("-" * 80)
-        # logger.info(custom_system_prompt)
-        # logger.info("-" * 80)
-        # Initialize the model and agent
-        self.model = LiteLLMModel(
-            api_base="http://localhost:11434",
-            api_key=None,
-            model_id="ollama/codellama",
-        )
-        # self.agent = ToolCallingAgent(
-        #     max_steps=1,
-        #     model=self.model,
-        #     prompt_templates=prompt_templates,
-        #     tools=tools
-        # )
-        step_callbacks: Optional[List[Callable]] = [
-            lambda step: logger.info(f"Step {step.step_number} completed: {step.action}")
-        ]
-        self.agent = CodeAgent(
-            add_base_tools=True,
-            max_steps=1,
-            model=self.model,
-            prompt_templates=prompt_templates,
-            step_callbacks=step_callbacks,
-            tools=tools,
-            verbosity_level=logging.DEBUG
-        )
-    def __call__(self, state: AgentState) -> AgentState:
         try:
-            # Log the current state before processing
-            logger.info("Current state before processing:")
-            logger.info(f"Messages: {state['messages']}")
-            logger.info(f"Question: {state['question']}")
-            logger.info(f"Answer: {state['answer']}")
-            # Process the question through the agent
-            logger.info("Calling agent.run()...")
-            result = self.agent.run(state["question"])
-            # Log the result details
-            logger.info("Agent run completed:")
-            logger.info(f"Result type: {type(result)}")
-            logger.info(f"Result value: {result}")
-            # Update the state with the answer
-            state["answer"] = result
-            # Log the updated state
-            logger.info("Updated state after processing:")
-            logger.info(f"Messages: {state['messages']}")
-            logger.info(f"Question: {state['question']}")
-            logger.info(f"Answer: {state['answer']}")
-            return state
         except Exception as e:
-            logger.error(f"Error in agent node: {str(e)}", exc_info=True)
-            state["answer"] = f"Error: {str(e)}"
             return state
-def build_agent_graph():
-    # Create the graph
-    graph = StateGraph(AgentState)
-    # Add the agent node
-    graph.add_node("agent", AgentNode())
     # Add edges
-    graph.add_edge("agent", END)
-    # Set the entry point
-    graph.set_entry_point("agent")
-    # Compile the graph
-    return graph.compile()
-# Create an instance of the compiled graph
-agent_graph = build_agent_graph()

+"""Define the agent graph and its components."""
 import logging
 import os
+import uuid
+from typing import Dict, List, Optional, TypedDict, Union, cast
+import yaml
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnableConfig
+from langgraph.graph import END, StateGraph
+from langgraph.prebuilt import ToolExecutor, ToolNode
+from langgraph.types import interrupt
+from smolagents import CodeAgent, LiteLLMModel, ToolCallingAgent
+from configuration import Configuration
+from tools import tools
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Enable LiteLLM debug logging only if environment variable is set
+import litellm
+if os.getenv("LITELLM_DEBUG", "false").lower() == "true":
+    litellm.set_verbose = True
+    logger.setLevel(logging.DEBUG)
+else:
+    litellm.set_verbose = False
+    logger.setLevel(logging.INFO)
 # Configure LiteLLM to drop unsupported parameters
 litellm.drop_params = True
+# Load default prompt templates from local file
+current_dir = os.path.dirname(os.path.abspath(__file__))
+prompts_dir = os.path.join(current_dir, "prompts")
+yaml_path = os.path.join(prompts_dir, "code_agent.yaml")
+with open(yaml_path, "r") as f:
+    prompt_templates = yaml.safe_load(f)
+# Initialize the model and agent using configuration
+config = Configuration()
+model = LiteLLMModel(
+    api_base=config.api_base,
+    api_key=config.api_key,
+    model_id=config.model_id,
+)
+agent = CodeAgent(
+    add_base_tools=True,
+    max_steps=1,  # Execute one step at a time
+    model=model,
+    prompt_templates=prompt_templates,
+    tools=tools,
+    verbosity_level=logging.DEBUG,
+)
 class AgentState(TypedDict):
+    """State for the agent graph."""
+    messages: List[Union[HumanMessage, AIMessage, SystemMessage]]
     question: str
+    answer: Optional[str]
+    step_logs: List[Dict]
+    is_complete: bool
+    step_count: int
 class AgentNode:
+    """Node that runs the agent."""
+    def __init__(self, agent: CodeAgent):
+        """Initialize the agent node with an agent."""
+        self.agent = agent
+    def __call__(
+        self, state: AgentState, config: Optional[RunnableConfig] = None
+    ) -> AgentState:
+        """Run the agent on the current state."""
+        # Log current state
+        logger.info("Current state before processing:")
+        logger.info(f"Messages: {state['messages']}")
+        logger.info(f"Question: {state['question']}")
+        logger.info(f"Answer: {state['answer']}")
+        # Get configuration
+        cfg = Configuration.from_runnable_config(config)
+        logger.info(f"Using configuration: {cfg}")
+        # Log execution start
+        logger.info("Starting agent execution")
+        # Run the agent
+        result = self.agent.run(state["question"])
+        # Log result
+        logger.info(f"Agent execution result type: {type(result)}")
+        logger.info(f"Agent execution result value: {result}")
+        # Update state
+        new_state = state.copy()
+        new_state["messages"].append(AIMessage(content=result))
+        new_state["answer"] = result
+        new_state["step_count"] += 1
+        # Log updated state
+        logger.info("Updated state after processing:")
+        logger.info(f"Messages: {new_state['messages']}")
+        logger.info(f"Question: {new_state['question']}")
+        logger.info(f"Answer: {new_state['answer']}")
+        return new_state
+class StepCallbackNode:
+    """Node that handles step callbacks and user interaction."""
+    def __call__(
+        self, state: AgentState, config: Optional[RunnableConfig] = None
+    ) -> AgentState:
+        """Handle step callback and user interaction."""
+        # Get configuration
+        cfg = Configuration.from_runnable_config(config)
+        # Log the step
+        step_log = {
+            "step": state["step_count"],
+            "messages": [msg.content for msg in state["messages"]],
+            "question": state["question"],
+            "answer": state["answer"],
+        }
+        state["step_logs"].append(step_log)
         try:
+            # Use interrupt for user input
+            user_input = interrupt(
+                "Press 'c' to continue, 'q' to quit, or 'i' for more info: "
+            )
+            if user_input.lower() == "q":
+                state["is_complete"] = True
+                return state
+            elif user_input.lower() == "i":
+                logger.info(f"Current step: {state['step_count']}")
+                logger.info(f"Question: {state['question']}")
+                logger.info(f"Current answer: {state['answer']}")
+                return self(state, config)  # Recursively call for new input
+            elif user_input.lower() == "c":
+                return state
+            else:
+                logger.warning("Invalid input. Please use 'c', 'q', or 'i'.")
+                return self(state, config)  # Recursively call for new input
         except Exception as e:
+            logger.warning(f"Error during interrupt: {str(e)}")
             return state
+def build_agent_graph(agent: AgentNode) -> StateGraph:
+    """Build the agent graph."""
+    # Initialize the graph
+    workflow = StateGraph(AgentState)
+    # Add nodes
+    workflow.add_node("agent", agent)
+    workflow.add_node("callback", StepCallbackNode())
     # Add edges
+    workflow.add_edge("agent", "callback")
+    workflow.add_conditional_edges(
+        "callback",
+        lambda x: END if x["is_complete"] else "agent",
+        {True: END, False: "agent"},
+    )
+    # Set entry point
+    workflow.set_entry_point("agent")
+    return workflow.compile()
+# Initialize the agent graph
+agent_graph = build_agent_graph(AgentNode(agent))

requirements.txt CHANGED Viewed

@@ -1,5 +1,7 @@
 duckduckgo-search>=8.0.1
 gradio[oauth]>=5.26.0
 langgraph>=0.3.34
 pytest>=8.3.5
 pytest-cov>=6.1.1

+black>=25.1.0
 duckduckgo-search>=8.0.1
 gradio[oauth]>=5.26.0
+isort>=6.0.1
 langgraph>=0.3.34
 pytest>=8.3.5
 pytest-cov>=6.1.1

test_agent.py CHANGED Viewed

@@ -1,84 +1,84 @@
 import logging
 import pytest
 import requests
 from agent import AgentRunner
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
 # Suppress specific warnings
-pytestmark = pytest.mark.filterwarnings(
-    "ignore::DeprecationWarning:httpx._models"
-)
 # Constants
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
 @pytest.fixture(scope="session")
 def agent():
     """Fixture to create and return an AgentRunner instance."""
-    logger.info("Creating AgentRunner instance")
     return AgentRunner()
 # @pytest.fixture(scope="session")
 # def questions_data():
 #     """Fixture to fetch questions from the API."""
-#     logger.info(f"Fetching questions from: {QUESTIONS_URL}")
 #     try:
 #         response = requests.get(QUESTIONS_URL, timeout=15)
 #         response.raise_for_status()
 #         data = response.json()
 #         if not data:
-#             logger.error("Fetched questions list is empty.")
 #             return []
-#         logger.info(f"Fetched {len(data)} questions.")
 #         return data
 #     except requests.exceptions.RequestException as e:
-#         logger.error(f"Error fetching questions: {e}")
 #         return []
 #     except requests.exceptions.JSONDecodeError as e:
-#         logger.error(f"Error decoding JSON response from questions endpoint: {e}")
 #         return []
 #     except Exception as e:
-#         logger.error(f"An unexpected error occurred fetching questions: {e}")
 #         return []
 #
 # class TestAppQuestions:
 #     """Test cases for questions from the app."""
-#
 #     def test_first_app_question(self, agent, questions_data):
 #         """Test the agent's response to the first app question."""
 #         if not questions_data:
 #             pytest.skip("No questions available from API")
-#
 #         first_question = questions_data[0]
 #         question_text = first_question.get("question")
 #         task_id = first_question.get("task_id")
-#
 #         if not question_text or not task_id:
 #             pytest.skip("First question is missing required fields")
-#
-#         logger.info(f"Testing with app question: {question_text}")
-#
 #         response = agent(question_text)
-#         logger.info(f"Agent response: {response}")
-#
 #         # Check that the response contains the expected information
 #         assert "Mercedes Sosa" in response, "Response should mention Mercedes Sosa"
 #         assert "studio albums" in response.lower(), "Response should mention studio albums"
 #         assert "2000" in response and "2009" in response, "Response should mention the year range"
-#
 #         # Verify that a number is mentioned (either as word or digit)
 #         import re
 #         number_pattern = r'\b(one|two|three|four|five|six|seven|eight|nine|ten|\d+)\b'
 #         has_number = bool(re.search(number_pattern, response.lower()))
 #         assert has_number, "Response should include the number of albums"
-#
 #         # Check for album names in the response
 #         known_albums = [
 #             "Corazón Libre",
@@ -89,54 +89,157 @@ def agent():
 #         ]
 #         found_albums = [album for album in known_albums if album in response]
 #         assert len(found_albums) > 0, "Response should mention at least some of the known albums"
-#
 #         # Check for a structured response
 #         assert re.search(r'\d+\.\s+[^(]+\(\d{4}\)', response), \
 #             "Response should list albums with years"
 class TestBasicCodeAgentCapabilities:
-    """Test cases for basic CodeAgent capabilities using examples from the YAML file."""
-    def test_simple_math_calculation(self, agent):
-        """Test the agent's ability to perform basic mathematical operations."""
-        # Test the second example from code_agent.yaml
         question = "What is the result of the following operation: 5 + 3 + 1294.678?"
-        logger.info("Testing simple math calculation capabilities")
-        logger.info(f"Question: {question}")
-        response = agent(question)
-        logger.info(f"Agent response: {response}")
         # Verify the response contains the correct result
         expected_result = str(5 + 3 + 1294.678)
-        assert expected_result in response, f"Response should contain the result {expected_result}"
-        # Check that the response is a clear answer
-        assert "answer" in response.lower(), "Response should indicate it's providing an answer"
-    def test_document_qa_and_image_generation(self, agent):
-        """Test the agent's ability to process a document QA task and generate an image."""
-        # Test the first example from code_agent.yaml
-        question = "Generate an image of the oldest person in this document."
-        logger.info("Testing document QA and image generation capabilities")
-        logger.info(f"Question: {question}")
-        response = agent(question)
-        logger.info(f"Agent response: {response}")
-        # Verify the response contains key elements
-        assert "Bob Wilson" in response, "Response should identify Bob Wilson as the oldest person"
-        assert "60" in response, "Response should mention the age 60"
-        assert "engineer" in response, "Response should mention the profession"
-        assert "Vancouver" in response, "Response should mention the location"
-        # Check for image generation related content
-        assert "image" in response.lower() or "portrait" in response.lower(), \
-            "Response should indicate image generation"
-        assert "description" in response.lower(), \
-            "Response should include a description of the image"
 if __name__ == "__main__":
-    pytest.main([__file__, "-v", "-x"])

 import logging
 import pytest
 import requests
+from langgraph.types import Command
 from agent import AgentRunner
+# Configure test logger
+test_logger = logging.getLogger("test_agent")
+test_logger.setLevel(logging.INFO)
 # Suppress specific warnings
+pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning:httpx._models")
 # Constants
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
 @pytest.fixture(scope="session")
 def agent():
     """Fixture to create and return an AgentRunner instance."""
+    test_logger.info("Creating AgentRunner instance")
     return AgentRunner()
 # @pytest.fixture(scope="session")
 # def questions_data():
 #     """Fixture to fetch questions from the API."""
+#     test_logger.info(f"Fetching questions from: {QUESTIONS_URL}")
 #     try:
 #         response = requests.get(QUESTIONS_URL, timeout=15)
 #         response.raise_for_status()
 #         data = response.json()
 #         if not data:
+#             test_logger.error("Fetched questions list is empty.")
 #             return []
+#         test_logger.info(f"Fetched {len(data)} questions.")
 #         return data
 #     except requests.exceptions.RequestException as e:
+#         test_logger.error(f"Error fetching questions: {e}")
 #         return []
 #     except requests.exceptions.JSONDecodeError as e:
+#         test_logger.error(f"Error decoding JSON response from questions endpoint: {e}")
 #         return []
 #     except Exception as e:
+#         test_logger.error(f"An unexpected error occurred fetching questions: {e}")
 #         return []
 #
 # class TestAppQuestions:
 #     """Test cases for questions from the app."""
+#
 #     def test_first_app_question(self, agent, questions_data):
 #         """Test the agent's response to the first app question."""
 #         if not questions_data:
 #             pytest.skip("No questions available from API")
+#
 #         first_question = questions_data[0]
 #         question_text = first_question.get("question")
 #         task_id = first_question.get("task_id")
+#
 #         if not question_text or not task_id:
 #             pytest.skip("First question is missing required fields")
+#
+#         test_logger.info(f"Testing with app question: {question_text}")
+#
 #         response = agent(question_text)
+#         test_logger.info(f"Agent response: {response}")
+#
 #         # Check that the response contains the expected information
 #         assert "Mercedes Sosa" in response, "Response should mention Mercedes Sosa"
 #         assert "studio albums" in response.lower(), "Response should mention studio albums"
 #         assert "2000" in response and "2009" in response, "Response should mention the year range"
+#
 #         # Verify that a number is mentioned (either as word or digit)
 #         import re
 #         number_pattern = r'\b(one|two|three|four|five|six|seven|eight|nine|ten|\d+)\b'
 #         has_number = bool(re.search(number_pattern, response.lower()))
 #         assert has_number, "Response should include the number of albums"
+#
 #         # Check for album names in the response
 #         known_albums = [
 #             "Corazón Libre",
 #         ]
 #         found_albums = [album for album in known_albums if album in response]
 #         assert len(found_albums) > 0, "Response should mention at least some of the known albums"
+#
 #         # Check for a structured response
 #         assert re.search(r'\d+\.\s+[^(]+\(\d{4}\)', response), \
 #             "Response should list albums with years"
 class TestBasicCodeAgentCapabilities:
+    """Test basic capabilities of the code agent."""
+    def setup_method(self):
+        """Setup method to initialize the agent before each test."""
+        test_logger.info("Creating AgentRunner instance")
+        self.agent = AgentRunner()
+    def test_simple_math_calculation_with_steps(self):
+        """Test that the agent can perform basic math calculations and log steps."""
         question = "What is the result of the following operation: 5 + 3 + 1294.678?"
+        test_logger.info(f"Testing math calculation with question: {question}")
+        # Run the agent and get the response
+        response = self.agent(question)
         # Verify the response contains the correct result
         expected_result = str(5 + 3 + 1294.678)
+        assert (
+            expected_result in response
+        ), f"Response should contain the result {expected_result}"
+        # Verify step logs exist and have required fields
+        assert self.agent.last_state is not None, "Agent should store last state"
+        assert "step_logs" in self.agent.last_state, "State should contain step_logs"
+        assert (
+            len(self.agent.last_state["step_logs"]) > 0
+        ), "Should have at least one step logged"
+        # Verify each step has required fields
+        for step in self.agent.last_state["step_logs"]:
+            assert "step_number" in step, "Each step should have a step_number"
+            assert any(
+                key in step for key in ["thought", "code", "observation"]
+            ), "Each step should have at least one of: thought, code, or observation"
+        # Verify the final answer is indicated
+        assert (
+            "final_answer" in response.lower()
+        ), "Response should indicate it's providing an answer"
+    def test_document_qa_and_image_generation_with_steps(self):
+        """Test that the agent can search for information and generate images, with step logging."""
+        question = (
+            "Search for information about the Mona Lisa and generate an image of it."
+        )
+        test_logger.info(
+            f"Testing document QA and image generation with question: {question}"
+        )
+        # Run the agent and get the response
+        response = self.agent(question)
+        # Verify the response contains both search and image generation
+        assert "mona lisa" in response.lower(), "Response should mention Mona Lisa"
+        assert "image" in response.lower(), "Response should mention image generation"
+        # Verify step logs exist and show logical progression
+        assert self.agent.last_state is not None, "Agent should store last state"
+        assert "step_logs" in self.agent.last_state, "State should contain step_logs"
+        assert (
+            len(self.agent.last_state["step_logs"]) > 1
+        ), "Should have multiple steps logged"
+        # Verify steps show logical progression
+        steps = self.agent.last_state["step_logs"]
+        search_steps = [step for step in steps if "search" in str(step).lower()]
+        image_steps = [step for step in steps if "image" in str(step).lower()]
+        assert len(search_steps) > 0, "Should have search steps"
+        assert len(image_steps) > 0, "Should have image generation steps"
+        # Verify each step has required fields
+        for step in steps:
+            assert "step_number" in step, "Each step should have a step_number"
+            assert any(
+                key in step for key in ["thought", "code", "observation"]
+            ), "Each step should have at least one of: thought, code, or observation"
+def test_simple_math_calculation_with_steps():
+    """Test that the agent can perform a simple math calculation and verify intermediate steps."""
+    agent = AgentRunner()
+    question = "What is the result of the following operation: 5 + 3 + 1294.678?"
+    # Process the question
+    response = agent(question)
+    # Verify step logs exist and have required fields
+    assert agent.last_state is not None, "Last state should be stored"
+    step_logs = agent.last_state.get("step_logs", [])
+    assert len(step_logs) > 0, "Should have recorded step logs"
+    for step in step_logs:
+        assert "step_number" in step, "Each step should have a step number"
+        assert any(
+            key in step for key in ["thought", "code", "observation"]
+        ), "Each step should have at least one of thought/code/observation"
+    # Verify final answer
+    expected_result = 1302.678
+    assert (
+        str(expected_result) in response
+    ), f"Response should contain the result {expected_result}"
+    assert (
+        "final_answer" in response.lower()
+    ), "Response should indicate it's using final_answer"
+def test_document_qa_and_image_generation_with_steps():
+    """Test document QA and image generation with step verification."""
+    agent = AgentRunner()
+    question = "Can you search for information about the Mona Lisa and generate an image inspired by it?"
+    # Process the question
+    response = agent(question)
+    # Verify step logs exist and demonstrate logical progression
+    assert agent.last_state is not None, "Last state should be stored"
+    step_logs = agent.last_state.get("step_logs", [])
+    assert len(step_logs) > 0, "Should have recorded step logs"
+    # Check for search and image generation steps
+    has_search_step = False
+    has_image_step = False
+    for step in step_logs:
+        assert "step_number" in step, "Each step should have a step number"
+        assert any(
+            key in step for key in ["thought", "code", "observation"]
+        ), "Each step should have at least one of thought/code/observation"
+        # Look for search and image steps in thoughts or code
+        step_content = str(step.get("thought", "")) + str(step.get("code", ""))
+        if "search" in step_content.lower():
+            has_search_step = True
+        if "image" in step_content.lower() or "dalle" in step_content.lower():
+            has_image_step = True
+    assert has_search_step, "Should include a search step"
+    assert has_image_step, "Should include an image generation step"
+    assert (
+        "final_answer" in response.lower()
+    ), "Response should indicate it's using final_answer"
 if __name__ == "__main__":
+    pytest.main([__file__, "-s", "-v", "-x"])

tools.py CHANGED Viewed

@@ -1,12 +1,16 @@
 import logging
-from smolagents import DuckDuckGoSearchTool, WikipediaSearchTool, Tool
 logger = logging.getLogger(__name__)
 class GeneralSearchTool(Tool):
     name = "search"
     description = """Performs a general web search using both DuckDuckGo and Wikipedia, then returns the combined search results."""
-    inputs = {"query": {"type": "string", "description": "The search query to perform."}}
     output_type = "string"
     def __init__(self, max_results=10, **kwargs):
@@ -22,26 +26,27 @@ class GeneralSearchTool(Tool):
         except Exception as e:
             ddg_results = "No DuckDuckGo results found."
             logger.warning(f"DuckDuckGo search failed: {str(e)}")
         # Get Wikipedia results
         try:
             wiki_results = self.wiki_tool.forward(query)
         except Exception as e:
             wiki_results = "No Wikipedia results found."
             logger.warning(f"Wikipedia search failed: {str(e)}")
         # Combine and format results
         output = []
         if ddg_results and ddg_results != "No DuckDuckGo results found.":
             output.append("## DuckDuckGo Search Results\n\n" + ddg_results)
         if wiki_results and wiki_results != "No Wikipedia results found.":
             output.append("## Wikipedia Results\n\n" + wiki_results)
         if not output:
             raise Exception("No results found! Try a less restrictive/shorter query.")
         return "\n\n---\n\n".join(output)
 # Export all tools
 tools = [
     # DuckDuckGoSearchTool(),

 import logging
+from smolagents import DuckDuckGoSearchTool, Tool, WikipediaSearchTool
 logger = logging.getLogger(__name__)
 class GeneralSearchTool(Tool):
     name = "search"
     description = """Performs a general web search using both DuckDuckGo and Wikipedia, then returns the combined search results."""
+    inputs = {
+        "query": {"type": "string", "description": "The search query to perform."}
+    }
     output_type = "string"
     def __init__(self, max_results=10, **kwargs):
         except Exception as e:
             ddg_results = "No DuckDuckGo results found."
             logger.warning(f"DuckDuckGo search failed: {str(e)}")
         # Get Wikipedia results
         try:
             wiki_results = self.wiki_tool.forward(query)
         except Exception as e:
             wiki_results = "No Wikipedia results found."
             logger.warning(f"Wikipedia search failed: {str(e)}")
         # Combine and format results
         output = []
         if ddg_results and ddg_results != "No DuckDuckGo results found.":
             output.append("## DuckDuckGo Search Results\n\n" + ddg_results)
         if wiki_results and wiki_results != "No Wikipedia results found.":
             output.append("## Wikipedia Results\n\n" + wiki_results)
         if not output:
             raise Exception("No results found! Try a less restrictive/shorter query.")
         return "\n\n---\n\n".join(output)
 # Export all tools
 tools = [
     # DuckDuckGoSearchTool(),