Final_Assignment_Template

Sleeping

App Files Files Community

EtienneB commited on Jun 30

Commit

c777509

1 Parent(s): 4e8db54

rewritten

Browse files

Files changed (3) hide show

app.py +366 -166
requirements.txt +21 -13
tools.py +422 -122

app.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import asyncio
 import inspect
 import os
 import gradio as gr
 import pandas as pd
@@ -8,35 +11,44 @@ import requests
 from dotenv import load_dotenv
 from langchain_community.chat_models import ChatHuggingFace
 from langchain_community.llms import HuggingFaceEndpoint
-from langchain_core.messages import HumanMessage
 from tools import (absolute, add, divide, exponential, floor_divide,
                    get_current_time_in_timezone, logarithm, modulus, multiply,
                    power, roman_calculator_converter, square_root, subtract,
                    web_search)
-# (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 MAX_AGENT_ITERATIONS = 15
 load_dotenv()
 HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
     def __init__(self):
         if not HUGGINGFACEHUB_API_TOKEN:
             raise ValueError("Missing Hugging Face API token. Please set HUGGINGFACEHUB_API_TOKEN.")
-        print("BasicAgent initialized.")
         self.llm = HuggingFaceEndpoint(
             repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
             huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
         )
-        self.chat = ChatHuggingFace(llm=self.llm, verbose=True)
         self.tools = [
             multiply, add, subtract, power, divide, modulus,
             square_root, floor_divide, absolute, logarithm,
@@ -46,153 +58,349 @@ class BasicAgent:
         self.chat_with_tools = self.chat.bind_tools(self.tools)
         print(f"Total tools available: {len(self.tools)}")
     async def answer(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        messages = [HumanMessage(content=question)]
-        response = await asyncio.to_thread(self.chat_with_tools.invoke, {"messages": messages})
-        return response['messages'][-1].content[14:]
     def answer_sync(self, question: str) -> str:
         """Synchronous version of answer method"""
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        messages = [HumanMessage(content=question)]
-        response = self.chat_with_tools.invoke({"messages": messages})
-        return response.content
-async def run_agent_async(agent, questions_data):
-    """Run agent asynchronously on all questions"""
-    results_log, answers_payload = [], []
-    async def process_question(task_id, question):
         try:
-            answer = await agent.answer(question)
-            return task_id, question, answer, None
         except Exception as e:
-            return task_id, question, None, str(e)
-    # Create tasks for all questions
     tasks = []
-    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
-        if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
-            continue
-        tasks.append(process_question(task_id, question_text))
-    print(f"Processing {len(tasks)} questions asynchronously...")
-    # Process all questions concurrently
-    results = await asyncio.gather(*tasks, return_exceptions=True)
-    for result in results:
-        if isinstance(result, Exception):
-            print(f"Unexpected error: {result}")
-            continue
-        task_id, question, answer, error = result
-        if error:
-            print(f"Error running agent on task {task_id}: {error}")
-            results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"AGENT ERROR: {error}"})
-        else:
-            answers_payload.append({"task_id": task_id, "submitted_answer": answer})
-            results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
     return results_log, answers_payload
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
-    and displays the results.
-    """
-    # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
-    if profile:
-        username = f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent (modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
-        print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a Hugging Face space, this link points toward your codebase (useful for others so please keep it public)
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(agent_code)
-    # 2. Fetch Questions
-    print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-            print("Fetched questions list is empty.")
-            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
-    except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
-    except requests.exceptions.JSONDecodeError as e:
-        print(f"Error decoding JSON response from questions endpoint: {e}")
-        print(f"Response text: {response.text[:500]}")
-        return f"Error decoding server response for questions: {e}", None
-    except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
-        return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
-    results_log = []
-    answers_payload = []
-    # Try async approach first, fall back to sync if needed
     try:
-        print(f"Running agent asynchronously on {len(questions_data)} questions...")
-        results_log, answers_payload = asyncio.run(run_agent_async(agent, questions_data))
     except Exception as e:
-        print(f"Async processing failed: {e}, falling back to synchronous processing...")
-        # Fallback to synchronous processing
-        for item in questions_data:
-            task_id = item.get("task_id")
-            question_text = item.get("question")
-            if not task_id or question_text is None:
-                print(f"Skipping item with missing task_id or question: {item}")
-                continue
-            try:
-                submitted_answer = agent.answer_sync(question_text)
-                answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-                results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-            except Exception as e:
-                print(f"Error running agent on task {task_id}: {e}")
-                results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
-        print("Agent did not produce any answers to submit.")
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
-    # 5. Submit
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
@@ -200,89 +408,81 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
-        print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
-        try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
-        print(status_message)
         results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-# --- Build Gradio Interface using Blocks ---
-with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
-        **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
-    space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
-    if space_host_startup:
-        print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
-    else:
-        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
-        print(f"✅ SPACE_ID found: {space_id_startup}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import asyncio
 import inspect
+import json
 import os
+import time
+from typing import Any, Dict, List, Optional
 import gradio as gr
 import pandas as pd
 from dotenv import load_dotenv
 from langchain_community.chat_models import ChatHuggingFace
 from langchain_community.llms import HuggingFaceEndpoint
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.tools import StructuredTool
 from tools import (absolute, add, divide, exponential, floor_divide,
                    get_current_time_in_timezone, logarithm, modulus, multiply,
                    power, roman_calculator_converter, square_root, subtract,
                    web_search)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 MAX_AGENT_ITERATIONS = 15
+MAX_CONCURRENT_REQUESTS = 5  # Limit concurrent requests to avoid overwhelming the API
 load_dotenv()
 HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+# Global cache for answers
+answer_cache = {}
+class ImprovedAgent:
     def __init__(self):
         if not HUGGINGFACEHUB_API_TOKEN:
             raise ValueError("Missing Hugging Face API token. Please set HUGGINGFACEHUB_API_TOKEN.")
+        print("ImprovedAgent initialized.")
+        # Initialize LLM with better parameters
         self.llm = HuggingFaceEndpoint(
             repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
             huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
+            temperature=0.1,  # Lower temperature for more consistent responses
+            max_new_tokens=1024,
+            timeout=30,
         )
+        self.chat = ChatHuggingFace(llm=self.llm, verbose=False)
+        # Initialize tools
         self.tools = [
             multiply, add, subtract, power, divide, modulus,
             square_root, floor_divide, absolute, logarithm,
         self.chat_with_tools = self.chat.bind_tools(self.tools)
         print(f"Total tools available: {len(self.tools)}")
+        # Create tool mapping for easier access
+        self.tool_map = {tool.name: tool for tool in self.tools}
+    def _extract_tool_calls(self, response) -> List[Dict]:
+        """Extract tool calls from the response"""
+        tool_calls = []
+        if hasattr(response, 'tool_calls') and response.tool_calls:
+            for tool_call in response.tool_calls:
+                tool_calls.append({
+                    'name': tool_call['name'],
+                    'args': tool_call['args']
+                })
+        return tool_calls
+    def _execute_tool_calls(self, tool_calls: List[Dict]) -> List[str]:
+        """Execute tool calls and return results"""
+        results = []
+        for tool_call in tool_calls:
+            tool_name = tool_call['name']
+            tool_args = tool_call['args']
+            if tool_name in self.tool_map:
+                try:
+                    tool = self.tool_map[tool_name]
+                    result = tool.invoke(tool_args)
+                    results.append(f"Tool {tool_name} result: {result}")
+                except Exception as e:
+                    results.append(f"Tool {tool_name} error: {str(e)}")
+            else:
+                results.append(f"Unknown tool: {tool_name}")
+        return results
     async def answer(self, question: str) -> str:
+        """Improved answer method with better error handling and tool usage"""
+        print(f"Processing question: {question[:100]}...")
+        try:
+            # Create system prompt for better instruction following
+            system_prompt = """You are a helpful AI assistant with access to various tools.
+            When answering questions, use the appropriate tools when needed and provide clear, concise answers.
+            If you need to perform calculations, use the math tools available.
+            If you need current information, use the web search tool.
+            Always provide a final answer after using tools."""
+            messages = [
+                HumanMessage(content=f"{system_prompt}\n\nQuestion: {question}")
+            ]
+            # Initial response
+            response = await asyncio.to_thread(self.chat_with_tools.invoke, messages)
+            # Handle tool calls if present
+            max_iterations = 3
+            iteration = 0
+            while iteration < max_iterations:
+                tool_calls = self._extract_tool_calls(response)
+                if not tool_calls:
+                    break
+                # Execute tool calls
+                tool_results = self._execute_tool_calls(tool_calls)
+                # Add tool results to conversation
+                messages.append(AIMessage(content=response.content))
+                messages.append(HumanMessage(content=f"Tool results: {'; '.join(tool_results)}. Please provide a final answer based on these results."))
+                # Get next response
+                response = await asyncio.to_thread(self.chat_with_tools.invoke, messages)
+                iteration += 1
+            # Extract final answer
+            final_answer = response.content.strip()
+            # Clean up the response - remove any tool call artifacts
+            if "Tool " in final_answer and "result:" in final_answer:
+                # Try to extract just the final answer part
+                lines = final_answer.split('\n')
+                for line in reversed(lines):
+                    if line.strip() and not line.startswith('Tool ') and not 'result:' in line:
+                        final_answer = line.strip()
+                        break
+            return final_answer
+        except Exception as e:
+            print(f"Error in answer method: {e}")
+            return f"Error processing question: {str(e)}"
     def answer_sync(self, question: str) -> str:
         """Synchronous version of answer method"""
         try:
+            return asyncio.run(self.answer(question))
         except Exception as e:
+            print(f"Error in sync answer: {e}")
+            return f"Error: {str(e)}"
+async def process_questions_batch(agent, questions_batch, semaphore):
+    """Process a batch of questions with rate limiting"""
+    results = []
+    async def process_single_question(task_id, question):
+        async with semaphore:
+            try:
+                # Check cache first
+                cache_key = f"{task_id}_{hash(question)}"
+                if cache_key in answer_cache:
+                    print(f"Using cached answer for task {task_id}")
+                    return task_id, question, answer_cache[cache_key], None
+                answer = await agent.answer(question)
+                # Cache the result
+                answer_cache[cache_key] = answer
+                return task_id, question, answer, None
+            except Exception as e:
+                print(f"Error processing task {task_id}: {e}")
+                return task_id, question, None, str(e)
+    # Create semaphore for rate limiting
     tasks = []
+    for item in questions_batch:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        if task_id and question_text is not None:
+            tasks.append(process_single_question(task_id, question_text))
+    if tasks:
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+    return results
+async def run_agent_async_improved(agent, questions_data):
+    """Improved async processing with batching and caching"""
+    results_log, answers_payload = [], []
+    # Create semaphore for rate limiting
+    semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
+    # Process questions in batches
+    batch_size = 10
+    batches = [questions_data[i:i + batch_size] for i in range(0, len(questions_data), batch_size)]
+    print(f"Processing {len(questions_data)} questions in {len(batches)} batches...")
+    for i, batch in enumerate(batches):
+        print(f"Processing batch {i+1}/{len(batches)} ({len(batch)} questions)...")
+        try:
+            batch_results = await process_questions_batch(agent, batch, semaphore)
+            for result in batch_results:
+                if isinstance(result, Exception):
+                    print(f"Batch processing error: {result}")
+                    continue
+                task_id, question, answer, error = result
+                if error:
+                    print(f"Error in task {task_id}: {error}")
+                    results_log.append({
+                        "Task ID": task_id,
+                        "Question": question[:100] + "..." if len(question) > 100 else question,
+                        "Submitted Answer": f"ERROR: {error}"
+                    })
+                else:
+                    answers_payload.append({"task_id": task_id, "submitted_answer": answer})
+                    results_log.append({
+                        "Task ID": task_id,
+                        "Question": question[:100] + "..." if len(question) > 100 else question,
+                        "Submitted Answer": answer[:200] + "..." if len(answer) > 200 else answer
+                    })
+            # Small delay between batches to be respectful
+            if i < len(batches) - 1:
+                await asyncio.sleep(1)
+        except Exception as e:
+            print(f"Error processing batch {i+1}: {e}")
+            # Continue with next batch
+            continue
     return results_log, answers_payload
+def cache_answers(profile: gr.OAuthProfile | None):
+    """Cache answers without submitting"""
+    if not profile:
+        return "Please log in to Hugging Face first.", None
+    username = profile.username
+    print(f"Caching answers for user: {username}")
+    # Fetch questions
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+            return "No questions found.", None
+        print(f"Fetched {len(questions_data)} questions for caching.")
+        # Initialize agent
+        agent = ImprovedAgent()
+        # Process questions
+        results_log, answers_payload = asyncio.run(run_agent_async_improved(agent, questions_data))
+        # Store in global cache with username
+        answer_cache[f"user_{username}"] = answers_payload
+        status = f"Cached {len(answers_payload)} answers for user {username}. Ready to submit!"
+        results_df = pd.DataFrame(results_log)
+        return status, results_df
+    except Exception as e:
+        print(f"Error caching answers: {e}")
+        return f"Error caching answers: {e}", None
+def submit_cached_answers(profile: gr.OAuthProfile | None):
+    """Submit previously cached answers"""
+    if not profile:
+        return "Please log in to Hugging Face first.", None
+    username = profile.username
+    cache_key = f"user_{username}"
+    if cache_key not in answer_cache:
+        return "No cached answers found. Please run 'Cache Answers' first.", None
+    answers_payload = answer_cache[cache_key]
+    if not answers_payload:
+        return "No answers to submit.", None
+    # Get space info
+    space_id = os.getenv("SPACE_ID")
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
+    # Submit
+    api_url = DEFAULT_API_URL
+    submit_url = f"{api_url}/submit"
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload
+    }
+    try:
+        print(f"Submitting {len(answers_payload)} cached answers...")
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        # Clear cache after successful submission
+        if cache_key in answer_cache:
+            del answer_cache[cache_key]
+        return final_status, None
+    except Exception as e:
+        print(f"Submission error: {e}")
+        return f"Submission failed: {e}", None
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """Original function - now improved with better error handling"""
+    if not profile:
+        return "Please log in to Hugging Face first.", None
+    username = profile.username
+    print(f"User logged in: {username}")
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # Initialize agent
     try:
+        agent = ImprovedAgent()
     except Exception as e:
+        print(f"Error initializing agent: {e}")
         return f"Error initializing agent: {e}", None
+    # Get space info
+    space_id = os.getenv("SPACE_ID")
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
+    # Fetch questions
     try:
+        print(f"Fetching questions from: {questions_url}")
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            return "No questions found.", None
         print(f"Fetched {len(questions_data)} questions.")
+    except Exception as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
+    # Process questions
     try:
+        results_log, answers_payload = asyncio.run(run_agent_async_improved(agent, questions_data))
     except Exception as e:
+        print(f"Error processing questions: {e}")
+        return f"Error processing questions: {e}", None
     if not answers_payload:
+        return "No answers generated.", pd.DataFrame(results_log) if results_log else None
+    # Submit answers
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload
+    }
     try:
+        print(f"Submitting {len(answers_payload)} answers...")
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except Exception as e:
+        print(f"Submission error: {e}")
         results_df = pd.DataFrame(results_log)
+        return f"Submission failed: {e}", results_df
+# --- Build Gradio Interface ---
+with gr.Blocks(title="Improved Agent Evaluation") as demo:
+    gr.Markdown("# Improved Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
+        1. Log in to your Hugging Face account using the button below.
+        2. **Recommended**: Use "Cache Answers" to process all questions first, then "Submit Cached Answers" to submit them.
+        3. **Alternative**: Use "Run & Submit All" for the original one-step process.
+        **Improvements:**
+        - ✅ Async processing with rate limiting
+        - ✅ Answer caching for faster resubmissions
+        - ✅ Better error handling and recovery
+        - ✅ Batch processing to avoid timeouts
+        - ✅ Improved tool usage and response parsing
         ---
         """
     )
     gr.LoginButton()
+    with gr.Row():
+        cache_button = gr.Button("🔄 Cache Answers", variant="secondary")
+        submit_button = gr.Button("📤 Submit Cached Answers", variant="primary")
+        run_all_button = gr.Button("🚀 Run & Submit All", variant="secondary")
+    status_output = gr.Textbox(label="Status", lines=6, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    # Wire up the buttons
+    cache_button.click(
+        fn=cache_answers,
+        outputs=[status_output, results_table]
+    )
+    submit_button.click(
+        fn=submit_cached_answers,
+        outputs=[status_output, results_table]
+    )
+    run_all_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
+    print("\n" + "-"*30 + " Improved App Starting " + "-"*30)
+    space_host = os.getenv("SPACE_HOST")
+    space_id = os.getenv("SPACE_ID")
+    if space_host:
+        print(f"✅ SPACE_HOST: {space_host}")
+        print(f"   Runtime URL: https://{space_host}.hf.space")
     else:
+        print("ℹ️  Running locally - SPACE_HOST not found.")
+    if space_id:
+        print(f"✅ SPACE_ID: {space_id}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id}")
+    else:
+        print("ℹ️  SPACE_ID not found.")
+    print("-" * 76 + "\n")
+    print("Launching Improved Gradio Interface...")
+    demo.launch(debug=True, share=False)

requirements.txt CHANGED Viewed

@@ -1,22 +1,30 @@
 # UI and OAuth
-gradio[oauth]
-requests
-pandas
 # LangChain and ecosystem
-langchain
-langchain-core
-langchain-community
-langgraph
 # Hugging Face integration
-huggingface_hub
-transformers
-accelerate  # Needed for many transformer-based models
 # Environment config
-python-dotenv
 # Tools dependencies
-duckduckgo-search  # Required for web_search tool
-pytz              # Required for get_current_time_in_timezone tool

 # UI and OAuth
+gradio[oauth]>=4.0.0
+requests>=2.31.0
+pandas>=2.0.0
 # LangChain and ecosystem
+langchain>=0.1.0
+langchain-core>=0.1.0
+langchain-community>=0.0.20
+langgraph>=0.0.30
 # Hugging Face integration
+huggingface_hub>=0.19.0
+transformers>=4.35.0
+accelerate>=0.24.0  # Needed for many transformer-based models
 # Environment config
+python-dotenv>=1.0.0
 # Tools dependencies
+duckduckgo-search>=3.9.0  # Required for web_search tool
+pytz>=2023.3             # Required for get_current_time_in_timezone tool
+# Additional utilities for better error handling and performance
+typing-extensions>=4.8.0
+asyncio-throttle>=1.0.2  # For rate limiting (optional)
+tenacity>=8.2.0          # For retry logic (optional)
+# Optional: For better logging and monitoring
+loguru>=0.7.0            # Better logging (optional)

tools.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import datetime
 import math
 import pytz
 from langchain_community.tools import DuckDuckGoSearchRun
@@ -7,180 +9,212 @@ from langchain_core.tools import tool
 @tool
-def multiply(a: int, b:int) -> int:
-    """Multiplies two integers and returns the product.
     Args:
-        a (int): The first integer.
-        b (int): The second integer.
     Returns:
-        int: The product of the two input integers.
     """
-    return a * b
 @tool
-def add(a: int, b:int) -> int:
-    """Adds two integers and returns the sum.
     Args:
-        a (int): The first integer.
-        b (int): The second integer.
     Returns:
-        int: The sum of the two input integers.
     """
-    return a + b
 @tool
-def power(a: float, b: float) -> float:
     """Raises a number to the power of another.
     Args:
-        a (float): The base number.
-        b (float): The exponent.
     Returns:
-        float: The result of raising `a` to the power of `b`.
     """
-    return a ** b
 @tool
-def subtract(a: float, b: float) -> float:
     """Subtracts the second number from the first.
     Args:
-        a (float): The number from which to subtract.
-        b (float): The number to subtract.
     Returns:
-        float: The result of `a` minus `b`.
     """
-    return a - b
 @tool
-def divide(a: float, b: float) -> float:
     """Divides one number by another.
     Args:
-        a (float): The numerator.
-        b (float): The denominator.
     Returns:
-        float: The result of `a` divided by `b`.
-    Raises:
-        ValueError: If `b` is zero.
     """
-    if b == 0:
-        raise ValueError("Divide by zero is not allowed")
-    return a / b
 @tool
-def modulus(a: int, b: int) -> int:
     """Returns the remainder of the division of two integers.
     Args:
-        a (int): The dividend.
-        b (int): The divisor.
     Returns:
-        int: The remainder when `a` is divided by `b`.
-    Raises:
-        ValueError: If `b` is zero.
     """
-    if b == 0:
-        raise ValueError("Modulus by zero is not allowed")
-    return a % b
 @tool
-def square_root(x: float) -> float:
     """Returns the square root of a number.
     Args:
-        x (float): The input number. Must be non-negative.
     Returns:
-        float: The square root of `x`.
-    Raises:
-        ValueError: If `x` is negative.
     """
-    if x < 0:
-        raise ValueError("Square root of negative number is not allowed")
-    return math.sqrt(x)
 @tool
-def floor_divide(a: int, b: int) -> int:
     """Performs integer division (floor division) of two numbers.
     Args:
-        a (int): The dividend.
-        b (int): The divisor.
     Returns:
-        int: The floor of the quotient.
-        Returns the quotient rounded down to the nearest integer.
-    Raises:
-        ValueError: If `b` is zero.
     """
-    if b == 0:
-        raise ValueError("Division by zero is not allowed")
-    return a // b
 @tool
-def absolute(x: float) -> float:
     """Returns the absolute value of a number.
     Args:
-        x (float): The input number.
     Returns:
-        float: The absolute value of `x`.
     """
-    return abs(x)
 @tool
-def logarithm(x: float, base: float = math.e) -> float:
     """Returns the logarithm of a number with a given base.
     Args:
-        x (float): The number to take the logarithm of. Must be positive.
-        base (float): The logarithmic base. Must be positive and not equal to 1.
     Returns:
-        float: The logarithm of `x` to the given base.
-    Raises:
-        ValueError: If `x <= 0` or `base <= 0` or `base == 1`.
     """
-    if x <= 0 or base <= 0 or base == 1:
-        raise ValueError("Invalid input for logarithm")
-    return math.log(x, base)
 @tool
-def exponential(x: float) -> float:
     """Returns e raised to the power of `x`.
     Args:
-        x (float): The exponent.
     Returns:
-        float: The value of e^x.
     """
-    return math.exp(x)
 @tool
@@ -188,67 +222,333 @@ def web_search(query: str) -> str:
     """Performs a DuckDuckGo search for the given query and returns the results.
     Args:
-        query (str): The search query.
     Returns:
-        str: The top search results as a string.
     """
-    search_tool = DuckDuckGoSearchRun()
-    return search_tool.invoke(query)
 @tool
 def roman_calculator_converter(value1: int, value2: int, oper: str) -> str:
-    """A tool that performs an operator on 2 numbers to calculate the result
-    Args:
-        value1: the first value
-        value2: the second value
-        oper: operator for the calculation, like "add", "subtract", "multiply", "divide"
-    """
-    roman_numerals = {
-        1000: "M", 900: "CM", 500: "D", 400: "CD",
-        100: "C", 90: "XC", 50: "L", 40: "XL",
-        10: "X", 9: "IX", 5: "V", 4: "IV", 1: "I"
-    }
-    roman_string = ""
-    if oper == "add":
-        result = value1 + value2
-    elif oper == "subtract":
-        result = value1 - value2  # Fixed: was value2 - value1
-    elif oper == "divide":
-        if value2 == 0:
-            return "Error: Division by zero is not allowed"
-        result = int(value1 / value2)  # Convert to int for Roman numerals
-    elif oper == "multiply":
-        result = value1 * value2
-    else:
-        return "Unsupported operation. Please use 'add', 'subtract', 'multiply', or 'divide'."
-    # Handle negative results
-    if result <= 0:
-        return f"Error: Roman numerals cannot represent zero or negative numbers. Result was: {result}"
-    for value, numeral in roman_numerals.items():
-        while result >= value:
-            roman_string += numeral
-            result -= value
-    return f"The result of {oper} on the values {value1} and {value2} is the Roman numeral: {roman_string}"
 @tool
 def get_current_time_in_timezone(timezone: str) -> str:
-    """A tool that fetches the current local time in a specified timezone.
     Args:
-        timezone: A string representing a valid timezone (e.g., 'America/New_York').
     """
     try:
         # Create timezone object
         tz = pytz.timezone(timezone)
         # Get current time in that timezone
-        local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
-        return f"The current local time in {timezone} is: {local_time}"
     except Exception as e:
         return f"Error fetching time for timezone '{timezone}': {str(e)}"

 import datetime
 import math
+import re
+from typing import Union
 import pytz
 from langchain_community.tools import DuckDuckGoSearchRun
 @tool
+def multiply(a: Union[int, float], b: Union[int, float]) -> Union[int, float]:
+    """Multiplies two numbers and returns the product.
     Args:
+        a: The first number.
+        b: The second number.
     Returns:
+        The product of the two input numbers.
     """
+    try:
+        result = a * b
+        return int(result) if isinstance(a, int) and isinstance(b, int) else result
+    except Exception as e:
+        return f"Error in multiplication: {str(e)}"
 @tool
+def add(a: Union[int, float], b: Union[int, float]) -> Union[int, float]:
+    """Adds two numbers and returns the sum.
     Args:
+        a: The first number.
+        b: The second number.
     Returns:
+        The sum of the two input numbers.
     """
+    try:
+        result = a + b
+        return int(result) if isinstance(a, int) and isinstance(b, int) else result
+    except Exception as e:
+        return f"Error in addition: {str(e)}"
 @tool
+def power(a: Union[int, float], b: Union[int, float]) -> float:
     """Raises a number to the power of another.
     Args:
+        a: The base number.
+        b: The exponent.
     Returns:
+        The result of raising `a` to the power of `b`.
     """
+    try:
+        if a == 0 and b < 0:
+            return "Error: Cannot raise 0 to a negative power"
+        result = a ** b
+        return result
+    except OverflowError:
+        return "Error: Result too large to compute"
+    except Exception as e:
+        return f"Error in power calculation: {str(e)}"
 @tool
+def subtract(a: Union[int, float], b: Union[int, float]) -> Union[int, float]:
     """Subtracts the second number from the first.
     Args:
+        a: The number from which to subtract.
+        b: The number to subtract.
     Returns:
+        The result of `a` minus `b`.
     """
+    try:
+        result = a - b
+        return int(result) if isinstance(a, int) and isinstance(b, int) else result
+    except Exception as e:
+        return f"Error in subtraction: {str(e)}"
 @tool
+def divide(a: Union[int, float], b: Union[int, float]) -> float:
     """Divides one number by another.
     Args:
+        a: The numerator.
+        b: The denominator.
     Returns:
+        The result of `a` divided by `b`.
     """
+    try:
+        if b == 0:
+            return "Error: Division by zero is not allowed"
+        return a / b
+    except Exception as e:
+        return f"Error in division: {str(e)}"
 @tool
+def modulus(a: int, b: int) -> Union[int, str]:
     """Returns the remainder of the division of two integers.
     Args:
+        a: The dividend.
+        b: The divisor.
     Returns:
+        The remainder when `a` is divided by `b`.
     """
+    try:
+        if b == 0:
+            return "Error: Modulus by zero is not allowed"
+        return a % b
+    except Exception as e:
+        return f"Error in modulus operation: {str(e)}"
 @tool
+def square_root(x: Union[int, float]) -> Union[float, str]:
     """Returns the square root of a number.
     Args:
+        x: The input number. Must be non-negative.
     Returns:
+        The square root of `x`.
     """
+    try:
+        if x < 0:
+            return "Error: Square root of negative number is not allowed"
+        return math.sqrt(x)
+    except Exception as e:
+        return f"Error in square root calculation: {str(e)}"
 @tool
+def floor_divide(a: int, b: int) -> Union[int, str]:
     """Performs integer division (floor division) of two numbers.
     Args:
+        a: The dividend.
+        b: The divisor.
     Returns:
+        The floor of the quotient.
     """
+    try:
+        if b == 0:
+            return "Error: Division by zero is not allowed"
+        return a // b
+    except Exception as e:
+        return f"Error in floor division: {str(e)}"
 @tool
+def absolute(x: Union[int, float]) -> Union[int, float]:
     """Returns the absolute value of a number.
     Args:
+        x: The input number.
     Returns:
+        The absolute value of `x`.
     """
+    try:
+        result = abs(x)
+        return int(result) if isinstance(x, int) else result
+    except Exception as e:
+        return f"Error in absolute value calculation: {str(e)}"
 @tool
+def logarithm(x: Union[int, float], base: Union[int, float] = math.e) -> Union[float, str]:
     """Returns the logarithm of a number with a given base.
     Args:
+        x: The number to take the logarithm of. Must be positive.
+        base: The logarithmic base. Must be positive and not equal to 1.
     Returns:
+        The logarithm of `x` to the given base.
     """
+    try:
+        if x <= 0:
+            return "Error: Logarithm input must be positive"
+        if base <= 0 or base == 1:
+            return "Error: Logarithm base must be positive and not equal to 1"
+        return math.log(x, base)
+    except Exception as e:
+        return f"Error in logarithm calculation: {str(e)}"
 @tool
+def exponential(x: Union[int, float]) -> Union[float, str]:
     """Returns e raised to the power of `x`.
     Args:
+        x: The exponent.
     Returns:
+        The value of e^x.
     """
+    try:
+        if x > 700:  # Prevent overflow
+            return "Error: Exponent too large, would cause overflow"
+        return math.exp(x)
+    except OverflowError:
+        return "Error: Result too large to compute"
+    except Exception as e:
+        return f"Error in exponential calculation: {str(e)}"
 @tool
     """Performs a DuckDuckGo search for the given query and returns the results.
     Args:
+        query: The search query.
     Returns:
+        The top search results as a string.
     """
+    try:
+        if not query or not query.strip():
+            return "Error: Search query cannot be empty"
+        search_tool = DuckDuckGoSearchRun()
+        results = search_tool.invoke(query.strip())
+        # Clean up the results a bit
+        if len(results) > 2000:  # Truncate very long results
+            results = results[:2000] + "... (truncated)"
+        return results
+    except Exception as e:
+        return f"Error performing web search: {str(e)}"
 @tool
 def roman_calculator_converter(value1: int, value2: int, oper: str) -> str:
+    """Performs an operation on 2 numbers and returns the result as a Roman numeral.
+    Args:
+        value1: The first value
+        value2: The second value
+        oper: Operator for the calculation ("add", "subtract", "multiply", "divide")
+    Returns:
+        The result as a Roman numeral string.
+    """
+    try:
+        # Input validation
+        if not isinstance(value1, int) or not isinstance(value2, int):
+            return "Error: Both values must be integers"
+        if oper not in ["add", "subtract", "multiply", "divide"]:
+            return "Error: Operator must be 'add', 'subtract', 'multiply', or 'divide'"
+        # Roman numeral mapping
+        roman_numerals = [
+            (1000, "M"), (900, "CM"), (500, "D"), (400, "CD"),
+            (100, "C"), (90, "XC"), (50, "L"), (40, "XL"),
+            (10, "X"), (9, "IX"), (5, "V"), (4, "IV"), (1, "I")
+        ]
+        # Perform calculation
+        if oper == "add":
+            result = value1 + value2
+        elif oper == "subtract":
+            result = value1 - value2
+        elif oper == "multiply":
+            result = value1 * value2
+        elif oper == "divide":
+            if value2 == 0:
+                return "Error: Division by zero is not allowed"
+            result = int(value1 / value2)  # Integer division for Roman numerals
+        # Handle invalid results for Roman numerals
+        if result <= 0:
+            return f"Error: Roman numerals cannot represent zero or negative numbers. Result was: {result}"
+        if result > 3999:  # Roman numerals traditionally don't go beyond this
+            return f"Error: Result ({result}) is too large for standard Roman numeral representation"
+        # Convert to Roman numeral
+        roman_string = ""
+        for value, numeral in roman_numerals:
+            count = result // value
+            if count:
+                roman_string += numeral * count
+                result -= value * count
+        return f"The result of {oper}ing {value1} and {value2} is: {roman_string}"
+    except Exception as e:
+        return f"Error in Roman calculator: {str(e)}"
 @tool
 def get_current_time_in_timezone(timezone: str) -> str:
+    """Fetches the current local time in a specified timezone.
     Args:
+        timezone: A string representing a valid timezone (e.g., 'America/New_York', 'Europe/London').
+    Returns:
+        The current time in the specified timezone.
     """
     try:
+        if not timezone or not timezone.strip():
+            return "Error: Timezone cannot be empty"
+        # Clean the timezone string
+        timezone = timezone.strip()
+        # Handle common timezone aliases
+        timezone_aliases = {
+            'EST': 'America/New_York',
+            'PST': 'America/Los_Angeles',
+            'MST': 'America/Denver',
+            'CST': 'America/Chicago',
+            'GMT': 'GMT',
+            'UTC': 'UTC',
+            'CET': 'Europe/Berlin',
+            'JST': 'Asia/Tokyo',
+        }
+        if timezone.upper() in timezone_aliases:
+            timezone = timezone_aliases[timezone.upper()]
         # Create timezone object
         tz = pytz.timezone(timezone)
         # Get current time in that timezone
+        local_time = datetime.datetime.now(tz)
+        formatted_time = local_time.strftime("%Y-%m-%d %H:%M:%S %Z")
+        return f"The current local time in {timezone} is: {formatted_time}"
+    except pytz.exceptions.UnknownTimeZoneError:
+        return f"Error: Unknown timezone '{timezone}'. Please use a valid timezone like 'America/New_York' or 'Europe/London'"
     except Exception as e:
         return f"Error fetching time for timezone '{timezone}': {str(e)}"
+# Additional utility tools that might be helpful
+@tool
+def factorial(n: int) -> Union[int, str]:
+    """Calculates the factorial of a non-negative integer.
+    Args:
+        n: A non-negative integer.
+    Returns:
+        The factorial of n.
+    """
+    try:
+        if not isinstance(n, int):
+            return "Error: Input must be an integer"
+        if n < 0:
+            return "Error: Factorial is not defined for negative numbers"
+        if n > 170:  # Prevent overflow
+            return "Error: Number too large for factorial calculation"
+        result = math.factorial(n)
+        return result
+    except Exception as e:
+        return f"Error calculating factorial: {str(e)}"
+@tool
+def greatest_common_divisor(a: int, b: int) -> Union[int, str]:
+    """Finds the greatest common divisor of two integers.
+    Args:
+        a: First integer.
+        b: Second integer.
+    Returns:
+        The greatest common divisor of a and b.
+    """
+    try:
+        if not isinstance(a, int) or not isinstance(b, int):
+            return "Error: Both inputs must be integers"
+        return math.gcd(abs(a), abs(b))
+    except Exception as e:
+        return f"Error calculating GCD: {str(e)}"
+@tool
+def least_common_multiple(a: int, b: int) -> Union[int, str]:
+    """Finds the least common multiple of two integers.
+    Args:
+        a: First integer.
+        b: Second integer.
+    Returns:
+        The least common multiple of a and b.
+    """
+    try:
+        if not isinstance(a, int) or not isinstance(b, int):
+            return "Error: Both inputs must be integers"
+        if a == 0 or b == 0:
+            return 0
+        return abs(a * b) // math.gcd(abs(a), abs(b))
+    except Exception as e:
+        return f"Error calculating LCM: {str(e)}"
+@tool
+def is_prime(n: int) -> Union[bool, str]:
+    """Checks if a number is prime.
+    Args:
+        n: The number to check.
+    Returns:
+        True if n is prime, False otherwise.
+    """
+    try:
+        if not isinstance(n, int):
+            return "Error: Input must be an integer"
+        if n < 2:
+            return False
+        if n == 2:
+            return True
+        if n % 2 == 0:
+            return False
+        # Check odd divisors up to sqrt(n)
+        for i in range(3, int(math.sqrt(n)) + 1, 2):
+            if n % i == 0:
+                return False
+        return True
+    except Exception as e:
+        return f"Error checking if prime: {str(e)}"
+@tool
+def percentage_calculator(part: Union[int, float], whole: Union[int, float]) -> Union[float, str]:
+    """Calculates what percentage 'part' is of 'whole'.
+    Args:
+        part: The part value.
+        whole: The whole value.
+    Returns:
+        The percentage as a float.
+    """
+    try:
+        if whole == 0:
+            return "Error: Cannot calculate percentage when whole is zero"
+        percentage = (part / whole) * 100
+        return round(percentage, 2)
+    except Exception as e:
+        return f"Error calculating percentage: {str(e)}"
+@tool
+def compound_interest(principal: Union[int, float], rate: Union[int, float],
+                     time: Union[int, float], compound_frequency: int = 1) -> Union[float, str]:
+    """Calculates compound interest.
+    Args:
+        principal: The initial amount of money.
+        rate: The annual interest rate (as a percentage, e.g., 5 for 5%).
+        time: The time period in years.
+        compound_frequency: How many times per year the interest is compounded (default: 1).
+    Returns:
+        The final amount after compound interest.
+    """
+    try:
+        if principal <= 0:
+            return "Error: Principal must be positive"
+        if rate < 0:
+            return "Error: Interest rate cannot be negative"
+        if time < 0:
+            return "Error: Time cannot be negative"
+        if compound_frequency <= 0:
+            return "Error: Compound frequency must be positive"
+        # Convert percentage to decimal
+        rate_decimal = rate / 100
+        # Compound interest formula: A = P(1 + r/n)^(nt)
+        amount = principal * (1 + rate_decimal / compound_frequency) ** (compound_frequency * time)
+        return round(amount, 2)
+    except Exception as e:
+        return f"Error calculating compound interest: {str(e)}"
+@tool
+def convert_temperature(value: Union[int, float], from_unit: str, to_unit: str) -> Union[float, str]:
+    """Converts temperature between Celsius, Fahrenheit, and Kelvin.
+    Args:
+        value: The temperature value to convert.
+        from_unit: The source unit ('C', 'F', or 'K').
+        to_unit: The target unit ('C', 'F', or 'K').
+    Returns:
+        The converted temperature value.
+    """
+    try:
+        from_unit = from_unit.upper().strip()
+        to_unit = to_unit.upper().strip()
+        valid_units = ['C', 'F', 'K', 'CELSIUS', 'FAHRENHEIT', 'KELVIN']
+        # Normalize unit names
+        unit_map = {
+            'CELSIUS': 'C', 'FAHRENHEIT': 'F', 'KELVIN': 'K'
+        }
+        from_unit = unit_map.get(from_unit, from_unit)
+        to_unit = unit_map.get(to_unit, to_unit)
+        if from_unit not in ['C', 'F', 'K'] or to_unit not in ['C', 'F', 'K']:
+            return "Error: Units must be 'C' (Celsius), 'F' (Fahrenheit), or 'K' (Kelvin)"
+        if from_unit == to_unit:
+            return float(value)
+        # Convert to Celsius first
+        if from_unit == 'F':
+            celsius = (value - 32) * 5/9
+        elif from_unit == 'K':
+            celsius = value - 273.15
+        else:  # from_unit == 'C'
+            celsius = value
+        # Convert from Celsius to target unit
+        if to_unit == 'F':
+            result = celsius * 9/5 + 32
+        elif to_unit == 'K':
+            result = celsius + 273.15
+        else:  # to_unit == 'C'
+            result = celsius
+        return round(result, 2)
+    except Exception as e:
+        return f"Error converting temperature: {str(e)}"