Final_Assignment_Template

Sleeping

App Files Files Community

riokorb commited on May 15

Commit

97858fd

verified ·

1 Parent(s): 9fb6b7c

Updates to mitigate simulation error

Browse files

Files changed (3) hide show

agent.py +3 -13
app.py +100 -60
requirements.txt +5 -1

agent.py CHANGED Viewed

@@ -134,18 +134,8 @@ def get_tools() -> List[BaseTool]:
         web_tool
     ]
-# Import BasicAgent class from app.py
-# The build_agent function is still exposed for compatibility
-from app import BasicAgent
-def build_agent():
-    """Build and return a BasicAgent instance."""
-    return BasicAgent()
 if __name__ == "__main__":
-    # Test the agent with a simple question
-    agent = build_agent()
-    test_question = "What is the capital of France?"
-    answer = agent(test_question)
-    print(f"Question: {test_question}")
-    print(f"Answer: {answer}")

         web_tool
     ]
+# REMOVED circular import from app.py
+# This file now just defines tools and doesn't attempt to build the agent
 if __name__ == "__main__":
+    print("This module defines tools for the agent. Run app.py or standalone_debug.py to test the agent.")

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import pandas as pd
 from typing import List, Dict, Any
 from dotenv import load_dotenv
 import json
 # LlamaIndex Imports
 from llama_index.core.llms import LLM
@@ -20,27 +21,24 @@ load_dotenv()
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 class BasicAgent:
     """A LlamaIndex-based agent."""
     def __init__(self):
         print("BasicAgent initialized.")
-        try:
-            # Initialize the core components
-            self.llm = self._initialize_llm()
-            # Import get_tools from agent.py here to avoid circular imports
-            from agent import get_tools
-            self.tools = get_tools()
-            self.memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
-            # Build the agent
-            self.agent = self._build_agent()
-            print("Agent setup complete.")
-        except Exception as e:
-            print(f"Warning: Error during agent initialization: {e}")
-            # Continue despite error - we'll handle this in the __call__ method
     def _initialize_llm(self) -> LLM:
         """Initialize the LLM based on configuration."""
@@ -75,15 +73,13 @@ class BasicAgent:
     def _build_agent(self) -> ReActAgent:
         """Build and return the agent."""
-        # Load system prompt from file and append output format requirements
         try:
             with open("system_prompt.txt", "r", encoding="utf-8") as f:
                 system_prompt = f.read()
-            # Append output format to system prompt
-            system_prompt = f"{system_prompt}\n\nIMPORTANT OUTPUT FORMAT:\n{OUTPUT_FORMAT}"
         except Exception as e:
             print(f"Error loading system prompt: {e}")
-            system_prompt = f"You are an intelligent agent designed to answer a wide variety of questions.\n\nIMPORTANT OUTPUT FORMAT:\n{OUTPUT_FORMAT}"
         return ReActAgent.from_tools(
             tools=self.tools,
@@ -95,13 +91,7 @@ class BasicAgent:
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         try:
-            # Check if agent was properly initialized
-            if not hasattr(self, 'agent') or self.agent is None:
-                # Fallback to a simple response if agent initialization failed
-                return "I'm unable to process your request due to initialization errors."
             # Process the question
             response = self.agent.query(question)
             answer_text = str(response)
@@ -111,16 +101,24 @@ class BasicAgent:
                 reasoning_trace = answer_text.split("FINAL ANSWER:")[0].strip()
                 model_answer = answer_text.split("FINAL ANSWER:")[1].strip()
                 print(f"Agent generated answer: {model_answer[:50]}..." if len(model_answer) > 50 else f"Agent generated answer: {model_answer}")
-                return model_answer  # Return just the answer part
             else:
                 # If no FINAL ANSWER pattern, return the whole response
                 print(f"No 'FINAL ANSWER' found in response. Returning full response.")
-                return answer_text
         except Exception as e:
             print(f"Error generating answer: {e}")
-            return f"I encountered an error while answering your question: {str(e)}"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
@@ -186,8 +184,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            # Get agent response - now it's a direct string
-            submitted_answer = agent(question_text)
             # Add to answers payload
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
@@ -196,14 +201,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text,
-                "Submitted Answer": submitted_answer
             })
-            # Add to JSONL output - save both the answer and reasoning
             jsonl_output.append({
                 "task_id": task_id,
-                "model_answer": submitted_answer,
-                "reasoning_trace": ""  # No separate reasoning trace now
             })
         except Exception as e:
@@ -278,37 +284,72 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
-# --- Build Gradio Interface using Blocks ---
-with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
-    gr.Markdown(
-        """
-        **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        ---
-        **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
-        """
-    )
-    gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
@@ -332,4 +373,3 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 from typing import List, Dict, Any
 from dotenv import load_dotenv
 import json
+import traceback
 # LlamaIndex Imports
 from llama_index.core.llms import LLM
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+OUTPUT_FORMAT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
 # --- Basic Agent Definition ---
 class BasicAgent:
     """A LlamaIndex-based agent."""
     def __init__(self):
         print("BasicAgent initialized.")
+        # Initialize the core components
+        self.llm = self._initialize_llm()
+        # Import get_tools from agent.py here to avoid circular imports
+        from agent import get_tools
+        self.tools = get_tools()
+        self.memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
+        # Build the agent
+        self.agent = self._build_agent()
+        print("Agent setup complete.")
     def _initialize_llm(self) -> LLM:
         """Initialize the LLM based on configuration."""
     def _build_agent(self) -> ReActAgent:
         """Build and return the agent."""
+        # Load system prompt from file
         try:
             with open("system_prompt.txt", "r", encoding="utf-8") as f:
                 system_prompt = f.read()
         except Exception as e:
             print(f"Error loading system prompt: {e}")
+            system_prompt = "You are an intelligent agent designed to answer a wide variety of questions."
         return ReActAgent.from_tools(
             tools=self.tools,
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         try:
             # Process the question
             response = self.agent.query(question)
             answer_text = str(response)
                 reasoning_trace = answer_text.split("FINAL ANSWER:")[0].strip()
                 model_answer = answer_text.split("FINAL ANSWER:")[1].strip()
+                # Include the reasoning trace in the response but formatted for JSON
+                result = {
+                    "model_answer": model_answer,
+                    "reasoning_trace": reasoning_trace
+                }
+                # Return just the answer part for direct evaluation
                 print(f"Agent generated answer: {model_answer[:50]}..." if len(model_answer) > 50 else f"Agent generated answer: {model_answer}")
+                return json.dumps(result)
             else:
                 # If no FINAL ANSWER pattern, return the whole response
                 print(f"No 'FINAL ANSWER' found in response. Returning full response.")
+                return json.dumps({"model_answer": answer_text, "reasoning_trace": ""})
         except Exception as e:
             print(f"Error generating answer: {e}")
+            error_msg = f"I encountered an error while answering your question: {str(e)}"
+            return json.dumps({"model_answer": error_msg, "reasoning_trace": ""})
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            # Get agent response which is now a JSON string
+            agent_response_json = agent(question_text)
+            agent_response = json.loads(agent_response_json)
+            model_answer = agent_response.get("model_answer", "")
+            reasoning_trace = agent_response.get("reasoning_trace", "")
+            # Format for submission payload
+            submitted_answer = model_answer
             # Add to answers payload
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text,
+                "Submitted Answer": submitted_answer,
+                "Reasoning": reasoning_trace[:100] + "..." if len(reasoning_trace) > 100 else reasoning_trace
             })
+            # Add to JSONL output
             jsonl_output.append({
                 "task_id": task_id,
+                "model_answer": model_answer,
+                "reasoning_trace": reasoning_trace
             })
         except Exception as e:
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
+# Try to load Gradio components, handling potential OAuth errors
+try:
+    # --- Build Gradio Interface using Blocks ---
+    with gr.Blocks() as demo:
+        gr.Markdown("# Basic Agent Evaluation Runner")
+        gr.Markdown(
+            """
+            **Instructions:**
+            1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+            2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+            3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+            ---
+            **Disclaimers:**
+            Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+            This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+            """
+        )
+        gr.LoginButton()
+        run_button = gr.Button("Run Evaluation & Submit All Answers")
+        status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+        results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+        run_button.click(
+            fn=run_and_submit_all,
+            outputs=[status_output, results_table]
+        )
+except ImportError as e:
+    print(f"Error initializing Gradio OAuth components: {e}")
+    print("This error is expected when running locally without OAuth dependencies.")
+    print("You can test the agent using standalone_debug.py or mini_test.py instead.")
+    # Create a minimal demo without OAuth if running locally
+    try:
+        import gradio as gr
+        with gr.Blocks() as demo:
+            gr.Markdown("# Agent Test Environment (Local Mode)")
+            gr.Markdown("OAuth dependencies not found. Running in local test mode.")
+            with gr.Row():
+                with gr.Column():
+                    question_input = gr.Textbox(label="Enter your question", lines=2)
+                    test_button = gr.Button("Test Agent")
+                with gr.Column():
+                    answer_output = gr.Textbox(label="Agent Answer", lines=10)
+            def test_agent_locally(question):
+                try:
+                    agent = BasicAgent()
+                    result = agent(question)
+                    return result
+                except Exception as e:
+                    return f"Error: {str(e)}\n\n{traceback.format_exc()}"
+            test_button.click(
+                fn=test_agent_locally,
+                inputs=[question_input],
+                outputs=[answer_output]
+            )
+    except Exception as e:
+        print(f"Failed to create even minimal Gradio interface: {e}")
+        demo = None
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")

requirements.txt CHANGED Viewed

@@ -1,7 +1,8 @@
-gradio>=4.0.0
 requests>=2.31.0
 pandas>=2.0.0
 python-dotenv>=1.0.0
 # LlamaIndex packages
 llama-index>=0.10.0
@@ -10,3 +11,6 @@ llama-index-llms-huggingface>=0.1.0
 llama-index-llms-huggingface-api>=0.1.0
 llama-index-readers-web>=0.1.0
 llama-index-readers-wikipedia>=0.1.0

+gradio[oauth]>=4.0.0
 requests>=2.31.0
 pandas>=2.0.0
 python-dotenv>=1.0.0
+itsdangerous>=2.0.0
 # LlamaIndex packages
 llama-index>=0.10.0
 llama-index-llms-huggingface-api>=0.1.0
 llama-index-readers-web>=0.1.0
 llama-index-readers-wikipedia>=0.1.0
+# For Google's newer Gemini API (recommended over the deprecated version)
+google-generativeai>=0.3.0