Spaces:

arbnori45
/

assignment_agent

Sleeping

App Files Files Community

Arbnor Tefiki commited on Jun 29

Commit

8ecb1cd

1 Parent(s): 94b3868

one percent accuracy

Browse files

Files changed (5) hide show

.gitignore +1 -0
app.py +81 -26
custom_tools.py +198 -25
functions.py +346 -99
requirements.txt +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__/

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ import pandas as pd
 from dotenv import load_dotenv
 from functions import *
 from langchain_core.messages import HumanMessage
 load_dotenv()
@@ -49,60 +51,106 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             input_messages = [HumanMessage(content=question_text)]
             result = agent({"messages": input_messages})
             if "messages" in result and result["messages"]:
-                last_valid = next(
-                    (m for m in reversed(result["messages"]) if hasattr(m, "content") and isinstance(m.content, str)),
-                    None
-                )
-                if last_valid:
-                    answer = last_valid.content.strip()
-                else:
-                    answer = "UNKNOWN"
-            else:
-                answer = "UNKNOWN"
-            print("Answered with:", answer)
             answers_payload.append({"task_id": task_id, "submitted_answer": answer})
             results_log.append({
                 "Task ID": task_id,
-                "Question": question_text,
-                "Submitted Answer": answer
             })
         except Exception as e:
             print(f"Error running agent on task {task_id}: {e}")
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    print(f"Submitting {len(answers_payload)} answers for user '{username}'...")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
-        print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except Exception as e:
@@ -113,10 +161,17 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 # Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
-        Modify the code here to define your agent's logic, the tools, the necessary packages, etc...
         """
     )
@@ -154,5 +209,5 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 from dotenv import load_dotenv
 from functions import *
 from langchain_core.messages import HumanMessage
+import traceback
+import time
 load_dotenv()
     results_log = []
     answers_payload = []
+    print(f"\n{'='*60}")
     print(f"Running agent on {len(questions_data)} questions...")
+    print(f"{'='*60}\n")
+    for idx, item in enumerate(questions_data, 1):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        print(f"\n--- Question {idx}/{len(questions_data)} ---")
+        print(f"Task ID: {task_id}")
+        print(f"Question: {question_text}")
         try:
+            # Add timeout for each question
+            start_time = time.time()
             input_messages = [HumanMessage(content=question_text)]
+            # Invoke the agent with the question
             result = agent({"messages": input_messages})
+            # Extract the answer from the result
+            answer = "UNKNOWN"
             if "messages" in result and result["messages"]:
+                # Look for the last AI message with content
+                for msg in reversed(result["messages"]):
+                    if hasattr(msg, "content") and isinstance(msg.content, str) and msg.content.strip():
+                        # Skip planner outputs
+                        if not any(msg.content.upper().startswith(prefix) for prefix in ["SEARCH:", "CALCULATE:", "DEFINE:", "WIKIPEDIA:", "REVERSE:", "DIRECT:"]):
+                            answer = msg.content.strip()
+                            break
+            elapsed_time = time.time() - start_time
+            print(f"Answer: {answer}")
+            print(f"Time taken: {elapsed_time:.2f}s")
             answers_payload.append({"task_id": task_id, "submitted_answer": answer})
             results_log.append({
                 "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": answer,
+                "Time (s)": f"{elapsed_time:.2f}"
             })
         except Exception as e:
             print(f"Error running agent on task {task_id}: {e}")
+            print(f"Traceback: {traceback.format_exc()}")
+            # Still submit UNKNOWN for errors
+            answers_payload.append({"task_id": task_id, "submitted_answer": "UNKNOWN"})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": f"ERROR: {str(e)[:50]}",
+                "Time (s)": "N/A"
+            })
+    print(f"\n{'='*60}")
+    print(f"Completed processing all questions")
+    print(f"{'='*60}\n")
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # Summary before submission
+    unknown_count = sum(1 for ans in answers_payload if ans["submitted_answer"] == "UNKNOWN")
+    print(f"\nSummary before submission:")
+    print(f"Total questions: {len(answers_payload)}")
+    print(f"UNKNOWN answers: {unknown_count}")
+    print(f"Attempted answers: {len(answers_payload) - unknown_count}")
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    print(f"\nSubmitting {len(answers_payload)} answers for user '{username}'...")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
+        score = result_data.get('score', 0)
+        correct_count = result_data.get('correct_count', 0)
+        total_attempted = result_data.get('total_attempted', 0)
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
+            f"Overall Score: {score}% "
+            f"({correct_count}/{total_attempted} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
+        print("\n" + "="*60)
+        print("SUBMISSION RESULTS:")
+        print(f"Score: {score}%")
+        print(f"Correct: {correct_count}/{total_attempted}")
+        print("="*60)
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except Exception as e:
 # Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("# Enhanced GAIA Agent Evaluation Runner")
     gr.Markdown(
         """
+        This enhanced agent is optimized for GAIA benchmark questions with improved:
+        - Planning logic for better tool selection
+        - Search capabilities with more comprehensive results
+        - Mathematical expression parsing
+        - Answer extraction from search results
+        - Error handling and logging
+        Target: >50% accuracy on GAIA questions
         """
     )
     print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Enhanced GAIA Agent Evaluation...")
+    demo.launch(debug=True, share=False)

custom_tools.py CHANGED Viewed

@@ -1,19 +1,21 @@
 import requests
 from duckduckgo_search import DDGS
 from langchain_core.tools import tool
 @tool
 def reverse_text(input: str) -> str:
     """Reverse the characters in a text or string.
     Args:
-        query: The text or string to reverse.
     """
     return input[::-1]
 @tool
 def web_search(query: str) -> str:
-    """Perform a web search using DuckDuckGo and return the top 3 summarized results.
     Args:
         query: The search query to look up.
@@ -21,76 +23,247 @@ def web_search(query: str) -> str:
     try:
         results = []
         with DDGS() as ddgs:
-            for r in ddgs.text(query, max_results=3):
                 title = r.get("title", "")
                 snippet = r.get("body", "")
                 url = r.get("href", "")
                 if title and snippet:
-                    results.append(f"{title}: {snippet} (URL: {url})")
         if not results:
-            return "No results found."
-        return "\n\n---\n\n".join(results)
     except Exception as e:
         return f"Web search error: {e}"
 @tool
 def calculate(expression: str) -> str:
-    """Evaluate a simple math expression and return the result.
     Args:
         expression: A string containing the math expression to evaluate.
     """
     try:
         allowed_names = {
             "abs": abs,
             "round": round,
             "min": min,
             "max": max,
             "pow": pow,
         }
-        result = eval(expression, {"__builtins__": None}, allowed_names)
-        return str(result)
     except Exception as e:
         return f"Calculation error: {e}"
 @tool
 def wikipedia_summary(query: str) -> str:
-    """Retrieve a summary of a topic from Wikipedia.
     Args:
         query: The subject or topic to summarize.
     """
     try:
         response = requests.get(
-            f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}", timeout=10
         )
-        response.raise_for_status()
-        data = response.json()
-        return data.get("extract", "No summary found.")
     except Exception as e:
         return f"Wikipedia error: {e}"
 @tool
 def define_term(term: str) -> str:
-    """Provide a dictionary-style definition of a given term using an online API.
     Args:
         term: The word or term to define.
     """
     try:
         response = requests.get(
-            f"https://api.dictionaryapi.dev/api/v2/entries/en/{term}", timeout=10
         )
-        response.raise_for_status()
-        data = response.json()
-        meanings = data[0].get("meanings", [])
-        if meanings:
-            defs = meanings[0].get("definitions", [])
-            if defs:
-                return defs[0].get("definition", "Definition not found.")
-        return "Definition not found."
     except Exception as e:
         return f"Definition error: {e}"
 # List of tools to register with your agent
-TOOLS = [web_search, calculate, wikipedia_summary, define_term, reverse_text]

 import requests
 from duckduckgo_search import DDGS
 from langchain_core.tools import tool
+import time
+import re
 @tool
 def reverse_text(input: str) -> str:
     """Reverse the characters in a text or string.
     Args:
+        input: The text or string to reverse.
     """
     return input[::-1]
 @tool
 def web_search(query: str) -> str:
+    """Perform a web search using DuckDuckGo and return comprehensive results.
     Args:
         query: The search query to look up.
     try:
         results = []
         with DDGS() as ddgs:
+            # Get more results for better coverage
+            search_results = list(ddgs.text(query, max_results=8))
+            for r in search_results:
                 title = r.get("title", "")
                 snippet = r.get("body", "")
                 url = r.get("href", "")
                 if title and snippet:
+                    # Combine title and snippet for more context
+                    full_text = f"{title}. {snippet}"
+                    results.append(full_text)
         if not results:
+            # Try with modified query
+            time.sleep(0.5)
+            with DDGS() as ddgs:
+                # Add more context to the query
+                modified_query = f"{query} facts information details"
+                search_results = list(ddgs.text(modified_query, max_results=5))
+                for r in search_results:
+                    title = r.get("title", "")
+                    snippet = r.get("body", "")
+                    if title and snippet:
+                        results.append(f"{title}. {snippet}")
+        if not results:
+            return "No search results found."
+        # Join all results with clear separation
+        return "\n\n".join(results)
     except Exception as e:
         return f"Web search error: {e}"
 @tool
 def calculate(expression: str) -> str:
+    """Evaluate a mathematical expression and return the result.
     Args:
         expression: A string containing the math expression to evaluate.
     """
     try:
+        # Clean the expression more thoroughly
+        expression = expression.strip()
+        # Handle various multiplication notations
+        expression = expression.replace("×", "*")
+        expression = expression.replace("x", "*")
+        expression = expression.replace("X", "*")
+        # Handle exponents
+        expression = expression.replace("^", "**")
+        # Remove thousands separators
+        expression = expression.replace(",", "")
+        # Handle parentheses
+        expression = expression.replace("[", "(").replace("]", ")")
+        expression = expression.replace("{", "(").replace("}", ")")
+        # Handle percentage calculations
+        # Convert "X% of Y" to "(X/100) * Y"
+        percent_pattern = r'(\d+(?:\.\d+)?)\s*%\s*of\s*(\d+(?:\.\d+)?)'
+        expression = re.sub(percent_pattern, r'(\1/100) * \2', expression)
+        # Convert standalone percentages
+        expression = re.sub(r'(\d+(?:\.\d+)?)\s*%', r'(\1/100)', expression)
+        # Define safe functions and constants
         allowed_names = {
             "abs": abs,
             "round": round,
             "min": min,
             "max": max,
             "pow": pow,
+            "sum": sum,
+            "len": len,
+            "__builtins__": {},
+            # Math constants
+            "pi": 3.14159265359,
+            "e": 2.71828182846,
         }
+        # Evaluate the expression
+        result = eval(expression, allowed_names)
+        # Format the result nicely
+        if isinstance(result, float):
+            # Check if it's a whole number
+            if result.is_integer():
+                return str(int(result))
+            else:
+                # Round to reasonable precision
+                formatted = f"{result:.10f}".rstrip('0').rstrip('.')
+                return formatted
+        else:
+            return str(result)
+    except ZeroDivisionError:
+        return "Error: Division by zero"
+    except SyntaxError as e:
+        return f"Syntax error in expression: {e}"
     except Exception as e:
         return f"Calculation error: {e}"
 @tool
 def wikipedia_summary(query: str) -> str:
+    """Retrieve a comprehensive summary of a topic from Wikipedia.
     Args:
         query: The subject or topic to summarize.
     """
     try:
+        # Clean the query
+        query = query.strip()
+        # First, try direct API
+        clean_query = query.replace(" ", "_")
         response = requests.get(
+            f"https://en.wikipedia.org/api/rest_v1/page/summary/{clean_query}",
+            timeout=10,
+            headers={"User-Agent": "Mozilla/5.0"}
+        )
+        if response.status_code == 200:
+            data = response.json()
+            extract = data.get("extract", "")
+            if extract and extract != "No summary found.":
+                title = data.get("title", query)
+                description = data.get("description", "")
+                # Get additional details from the full article if needed
+                full_response = requests.get(
+                    f"https://en.wikipedia.org/w/api.php",
+                    params={
+                        "action": "query",
+                        "prop": "extracts",
+                        "exintro": True,
+                        "explaintext": True,
+                        "titles": title,
+                        "format": "json"
+                    },
+                    timeout=10
+                )
+                result = extract
+                if description and description not in extract:
+                    result = f"{description}. {extract}"
+                if full_response.status_code == 200:
+                    full_data = full_response.json()
+                    pages = full_data.get("query", {}).get("pages", {})
+                    for page_id, page_info in pages.items():
+                        full_extract = page_info.get("extract", "")
+                        if full_extract and len(full_extract) > len(result):
+                            result = full_extract[:1000]  # Limit length
+                return result
+        # Fallback: Try searching Wikipedia
+        search_response = requests.get(
+            "https://en.wikipedia.org/w/api.php",
+            params={
+                "action": "opensearch",
+                "search": query,
+                "limit": 3,
+                "format": "json"
+            },
+            timeout=10
         )
+        if search_response.status_code == 200:
+            search_data = search_response.json()
+            if len(search_data) > 1 and search_data[1]:
+                # Try the first result
+                first_result = search_data[1][0]
+                if first_result:
+                    return wikipedia_summary(first_result)
+        return f"No Wikipedia article found for '{query}'."
     except Exception as e:
         return f"Wikipedia error: {e}"
 @tool
 def define_term(term: str) -> str:
+    """Provide a comprehensive dictionary definition of a given term.
     Args:
         term: The word or term to define.
     """
     try:
+        # Clean the term
+        term = term.strip().lower()
+        term = re.sub(r'[^\w\s-]', '', term)  # Remove punctuation except hyphens
         response = requests.get(
+            f"https://api.dictionaryapi.dev/api/v2/entries/en/{term}",
+            timeout=10
         )
+        if response.status_code == 200:
+            data = response.json()
+            all_definitions = []
+            # Collect all definitions with their parts of speech
+            for entry in data:
+                word = entry.get("word", term)
+                meanings = entry.get("meanings", [])
+                for meaning in meanings:
+                    part_of_speech = meaning.get("partOfSpeech", "")
+                    definitions = meaning.get("definitions", [])
+                    for definition in definitions:
+                        def_text = definition.get("definition", "")
+                        if def_text:
+                            if part_of_speech:
+                                all_definitions.append(f"({part_of_speech}) {def_text}")
+                            else:
+                                all_definitions.append(def_text)
+            if all_definitions:
+                # Return the most comprehensive definition
+                # Prefer longer, more detailed definitions
+                all_definitions.sort(key=len, reverse=True)
+                return all_definitions[0]
+        # Try alternative approach - use the error message if it's informative
+        if response.status_code == 404:
+            error_data = response.json()
+            if "message" in error_data:
+                return f"No definition found for '{term}'"
+        # Last resort - return a clear message
+        return f"Unable to find definition for '{term}'"
     except Exception as e:
         return f"Definition error: {e}"
 # List of tools to register with your agent
+TOOLS = [web_search, calculate, wikipedia_summary, define_term, reverse_text]

functions.py CHANGED Viewed

@@ -1,140 +1,387 @@
 import os
 import re
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import ToolNode
-from langchain_core.messages import HumanMessage, SystemMessage
 from huggingface_hub import InferenceClient
 from custom_tools import TOOLS
-from langchain_core.messages import AIMessage
 HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
 client = InferenceClient(token=HF_TOKEN)
-planner_prompt = SystemMessage(content="""
-    You are a planning assistant. Your job is to decide how to answer a question.
-    - If the answer is easy and factual, answer it directly.
-    - If you are not 100% certain or the answer requires looking up real-world information, say:
-        I need to search this.
-    - If the question contains math or expressions like +, -, /, ^, say:
-        I need to calculate this.
-    - If a word should be explained, say:
-        I need to define this.
-    -If the question asks about a person, historical event, or specific topic, say:
-        I need to look up wikipedia.
-    -If the questions asks for backwards pronounciation or reversing text, say:
-        I need to reverse text.
-    Only respond with one line explaining what you will do.
-    Do not try to answer yet.
-    e.g:
-        Q: How many studio albums did Mercedes Sosa release between 2000 and 2009?
-        A: I need to search this.
-        Q: What does the word 'ephemeral' mean?
-        A: I need to define this.
-        Q: What is 23 * 6 + 3?
-        A: I need to calculate this.
-        Q: Reverse this: 'tfel drow eht'
-        A: I need to reverse text.
-        Q: What bird species are seen in this video?
-        A: UNKNOWN
-    """)
 def planner_node(state: MessagesState):
-    hf_messages = [planner_prompt] + state["messages"]
-    # Properly map LangChain message objects to dicts
-    messages_dict = []
-    for msg in hf_messages:
-        if isinstance(msg, SystemMessage):
-            role = "system"
-        elif isinstance(msg, HumanMessage):
-            role = "user"
-        else:
-            raise ValueError(f"Unsupported message type: {type(msg)}")
-        messages_dict.append({"role": role, "content": msg.content})
-    response = client.chat.completions.create(
-        model="mistralai/Mistral-7B-Instruct-v0.2",
-        messages=messages_dict,
-    )
-    text = response.choices[0].message.content.strip()
-    print("Planner output:\n", text)
-    return {"messages": [SystemMessage(content=text)]}
-answer_prompt = SystemMessage(content="""
-    You are now given the result of a tool (like a search, calculator, or text reversal).
-    Use the tool result and the original question to give the final answer.
-    If the tool result is unhelpful or unclear, respond with 'UNKNOWN'.
-    Respond with only the answer — no explanations.
-    """)
-def assistant_node(state: MessagesState):
-    hf_messages = [answer_prompt] + state["messages"]
-    messages_dict = []
-    for msg in hf_messages:
-        if isinstance(msg, SystemMessage):
-            role = "system"
-        elif isinstance(msg, HumanMessage):
-            role = "user"
-        else:
-            raise ValueError(f"Unsupported message type: {type(msg)}")
-        messages_dict.append({"role": role, "content": msg.content})
-    response = client.chat.completions.create(
-        model="mistralai/Mistral-7B-Instruct-v0.2",
-        messages=messages_dict,
-    )
-    text = response.choices[0].message.content.strip()
-    print("Final answer output:\n", text)
-    return {"messages": [AIMessage(content=text)]}
 def tools_condition(state: MessagesState) -> str:
-    last_msg = state["messages"][-1].content.lower()
-    if any(trigger in last_msg for trigger in [
-        "i need to search",
-        "i need to calculate",
-        "i need to define",
-        "i need to reverse text",
-        "i need to look up wikipedia"
-    ]):
         return "tools"
     return "end"
-class PatchedToolNode(ToolNode):
-    def invoke(self, state: MessagesState, config) -> dict:
-        result = super().invoke(state)
-        tool_output = result.get("messages", [])[0].content if result.get("messages") else "UNKNOWN"
-        # Append tool result as a HumanMessage so assistant sees it
-        new_messages = state["messages"] + [HumanMessage(content=f"Tool result:\n{tool_output}")]
-        return {"messages": new_messages}
 def build_graph():
     builder = StateGraph(MessagesState)
     builder.add_node("planner", planner_node)
     builder.add_node("assistant", assistant_node)
-    builder.add_node("tools", PatchedToolNode(TOOLS))
     builder.add_edge(START, "planner")
     builder.add_conditional_edges("planner", tools_condition)
     builder.add_edge("tools", "assistant")
     return builder.compile()

 import os
 import re
+import json
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import ToolNode
+from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, ToolMessage
 from huggingface_hub import InferenceClient
 from custom_tools import TOOLS
 HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
 client = InferenceClient(token=HF_TOKEN)
+# Enhanced planner prompt with better instructions
+planner_prompt = SystemMessage(content="""You are an expert planning assistant for answering factual questions. Your job is to analyze each question and determine the BEST tool to use.
+TOOL SELECTION RULES:
+1. SEARCH: Use for ANY factual questions about:
+   - People (births, deaths, ages, achievements, relationships)
+   - Events (dates, locations, participants, outcomes)
+   - Places (locations, populations, geography)
+   - Current information (weather, news, prices)
+   - Specific facts requiring recent or detailed information
+   - Questions with numbers, dates, or statistics about real things
+2. CALCULATE: Use ONLY for pure mathematical expressions that can be evaluated
+   - Basic arithmetic (23 * 6 + 3)
+   - Percentages (15% of 250)
+   - Unit conversions with clear numbers
+   - Mathematical formulas
+3. WIKIPEDIA: Use for general knowledge topics that need comprehensive overview
+   - Historical events or periods
+   - Scientific concepts
+   - Geographic locations
+   - Famous people (when general info is needed)
+4. DEFINE: Use ONLY when asked for the definition of a single word
+   - "What does X mean?"
+   - "Define X"
+   - Single vocabulary words
+5. REVERSE: Use ONLY when explicitly asked to reverse text
+6. DIRECT: Use ONLY for:
+   - Greetings ("Hello", "Hi")
+   - Meta questions about the assistant
+   - Questions that are clearly unanswerable
+IMPORTANT PATTERNS:
+- "How many..." → Usually SEARCH (unless pure math)
+- "Who is..." → WIKIPEDIA or SEARCH
+- "When did..." → SEARCH
+- "Where is..." → SEARCH
+- "What is the [statistic/number]..." → SEARCH
+- "Calculate..." → CALCULATE
+- Names of people/places/things → SEARCH or WIKIPEDIA
+RESPONSE FORMAT: Respond with EXACTLY one of:
+- "SEARCH: [exact search query]"
+- "CALCULATE: [mathematical expression]"
+- "WIKIPEDIA: [topic]"
+- "DEFINE: [word]"
+- "REVERSE: [text]"
+- "DIRECT: [answer]"
+Extract the most relevant query from the question. Be specific and include key terms.""")
 def planner_node(state: MessagesState):
+    messages = state["messages"]
+    # Get the last human message
+    question = None
+    for msg in reversed(messages):
+        if isinstance(msg, HumanMessage):
+            question = msg.content
+            break
+    if not question:
+        return {"messages": [AIMessage(content="DIRECT: UNKNOWN")]}
+    # Quick pattern matching for common cases
+    question_lower = question.lower()
+    # Mathematical calculations
+    if any(op in question for op in ['*', '+', '-', '/', '^']) or \
+       re.search(r'\d+\s*[x��]\s*\d+', question) or \
+       re.search(r'\d+%\s+of\s+\d+', question_lower) or \
+       'calculate' in question_lower and not 'how many' in question_lower:
+        # Extract the mathematical expression
+        expr = question
+        for remove in ['calculate', 'what is', 'what\'s', '?', 'equals']:
+            expr = expr.lower().replace(remove, '')
+        expr = expr.strip()
+        return {"messages": [AIMessage(content=f"CALCULATE: {expr}")]}
+    # Definitions
+    if question_lower.startswith(('define ', 'what does ')) and ' mean' in question_lower:
+        word = re.search(r'(?:define |what does )(\w+)', question_lower)
+        if word:
+            return {"messages": [AIMessage(content=f"DEFINE: {word.group(1)}")]}
+    # Text reversal
+    if 'reverse' in question_lower:
+        # Extract text to reverse
+        match = re.search(r'reverse[:\s]+["\']?(.+?)["\']?$', question, re.IGNORECASE)
+        if match:
+            return {"messages": [AIMessage(content=f"REVERSE: {match.group(1).strip()}")]}
+    # For most factual questions, use search
+    factual_indicators = [
+        'how many', 'how much', 'how old', 'when did', 'when was',
+        'where is', 'where was', 'who is', 'who was', 'what year',
+        'which', 'name of', 'number of', 'amount of', 'age of',
+        'population', 'capital', 'president', 'founded', 'created',
+        'discovered', 'invented', 'released', 'published', 'born',
+        'died', 'location', 'situated', 'temperature', 'weather',
+        'price', 'cost', 'worth', 'value', 'rate'
+    ]
+    if any(indicator in question_lower for indicator in factual_indicators):
+        return {"messages": [AIMessage(content=f"SEARCH: {question}")]}
+    # Use planner LLM for complex cases
+    messages_dict = [
+        {"role": "system", "content": planner_prompt.content},
+        {"role": "user", "content": question}
+    ]
+    try:
+        response = client.chat.completions.create(
+            model="meta-llama/Meta-Llama-3-70B-Instruct",
+            messages=messages_dict,
+            max_tokens=100,
+            temperature=0.1
+        )
+        plan = response.choices[0].message.content.strip()
+        print(f"Question: {question}")
+        print(f"Planner output: {plan}")
+        return {"messages": [AIMessage(content=plan)]}
+    except Exception as e:
+        print(f"Planner error: {e}")
+        # Default to search for errors
+        return {"messages": [AIMessage(content=f"SEARCH: {question}")]}
+def extract_query_from_plan(plan: str, original_question: str):
+    """Extract the query/expression from the planner output"""
+    if ":" in plan:
+        parts = plan.split(":", 1)
+        if len(parts) == 2:
+            query = parts[1].strip()
+            # Remove quotes if present
+            query = query.strip("'\"")
+            return query
+    # Fallback to original question
+    return original_question
+def tool_calling_node(state: MessagesState):
+    """Call the appropriate tool based on planner decision"""
+    messages = state["messages"]
+    # Get planner output
+    plan = None
+    for msg in reversed(messages):
+        if isinstance(msg, AIMessage):
+            plan = msg.content
+            break
+    # Get original question
+    original_question = None
+    for msg in messages:
+        if isinstance(msg, HumanMessage):
+            original_question = msg.content
+            break
+    if not plan or not original_question:
+        return {"messages": [ToolMessage(content="UNKNOWN", tool_call_id="error")]}
+    plan_upper = plan.upper()
+    try:
+        if plan_upper.startswith("SEARCH:"):
+            query = extract_query_from_plan(plan, original_question)
+            tool = next(t for t in TOOLS if t.name == "web_search")
+            result = tool.invoke({"query": query})
+        elif plan_upper.startswith("CALCULATE:"):
+            expression = extract_query_from_plan(plan, original_question)
+            # Clean up the expression more thoroughly
+            expression = expression.replace("×", "*").replace("x", "*").replace("X", "*")
+            expression = expression.replace("^", "**")
+            expression = expression.replace(",", "")
+            # Handle percentage calculations
+            if "%" in expression:
+                # Convert "X% of Y" to "Y * X / 100"
+                match = re.search(r'(\d+(?:\.\d+)?)\s*%\s*of\s*(\d+(?:\.\d+)?)', expression)
+                if match:
+                    expression = f"{match.group(2)} * {match.group(1)} / 100"
+                else:
+                    expression = expression.replace("%", "/ 100")
+            tool = next(t for t in TOOLS if t.name == "calculate")
+            result = tool.invoke({"expression": expression})
+        elif plan_upper.startswith("DEFINE:"):
+            term = extract_query_from_plan(plan, original_question)
+            term = term.strip("'\"?.,!").lower()
+            tool = next(t for t in TOOLS if t.name == "define_term")
+            result = tool.invoke({"term": term})
+        elif plan_upper.startswith("WIKIPEDIA:"):
+            topic = extract_query_from_plan(plan, original_question)
+            tool = next(t for t in TOOLS if t.name == "wikipedia_summary")
+            result = tool.invoke({"query": topic})
+        elif plan_upper.startswith("REVERSE:"):
+            text = extract_query_from_plan(plan, original_question)
+            text = text.strip("'\"")
+            tool = next(t for t in TOOLS if t.name == "reverse_text")
+            result = tool.invoke({"input": text})
+        elif plan_upper.startswith("DIRECT:"):
+            result = extract_query_from_plan(plan, original_question)
+        elif "UNKNOWN" in plan_upper:
+            result = "UNKNOWN"
+        else:
+            # Fallback: search
+            print(f"Unrecognized plan format: {plan}, falling back to search")
+            tool = next(t for t in TOOLS if t.name == "web_search")
+            result = tool.invoke({"query": original_question})
+    except Exception as e:
+        print(f"Tool error: {e}")
+        # Try to provide a more specific error or fallback
+        if "calculate" in plan_upper:
+            result = "Calculation error"
+        else:
+            result = "UNKNOWN"
+    print(f"Tool result: {result[:200]}...")
+    return {"messages": [ToolMessage(content=str(result), tool_call_id="tool_call")]}
+# Enhanced answer extraction
+answer_prompt = SystemMessage(content="""You are an expert at extracting precise answers from search results and tool outputs.
+CRITICAL RULES:
+1. Extract the EXACT answer the question is asking for
+2. For numerical questions, return ONLY the number (no units unless asked)
+3. For yes/no questions, return ONLY "yes" or "no"
+4. For counting questions ("how many"), return ONLY the number
+5. For naming questions, return ONLY the name(s)
+6. Be as concise as possible - typically 1-10 words
+7. If the information is clearly not in the tool result, return "UNKNOWN"
+PATTERN MATCHING:
+- "How many..." → Return just the number
+- "What is the name of..." → Return just the name
+- "When did..." → Return just the date/year
+- "Where is..." → Return just the location
+- "Who is/was..." → Return just the name or brief role
+- "Is/Are..." → Return "yes" or "no"
+IMPORTANT: Look for specific numbers, dates, names, or facts in the tool result that directly answer the question.""")
+def assistant_node(state: MessagesState):
+    """Generate final answer based on tool results"""
+    messages = state["messages"]
+    # Get original question
+    original_question = None
+    for msg in messages:
+        if isinstance(msg, HumanMessage):
+            original_question = msg.content
+            break
+    # Get tool result
+    tool_result = None
+    for msg in reversed(messages):
+        if isinstance(msg, ToolMessage):
+            tool_result = msg.content
+            break
+    if not tool_result or not original_question:
+        return {"messages": [AIMessage(content="UNKNOWN")]}
+    # For calculation results, often just return the number
+    if "Calculation error" not in tool_result and re.match(r'^-?\d+\.?\d*$', tool_result.strip()):
+        return {"messages": [AIMessage(content=tool_result.strip())]}
+    # For simple reversed text, return it directly
+    if len(tool_result.split()) == 1 and original_question.lower().startswith('reverse'):
+        return {"messages": [AIMessage(content=tool_result)]}
+    # Extract specific patterns from questions
+    question_lower = original_question.lower()
+    # Try to extract numbers for "how many" questions
+    if "how many" in question_lower and tool_result != "UNKNOWN":
+        # Look for numbers in the result
+        numbers = re.findall(r'\b\d+\b', tool_result)
+        if numbers:
+            # Often the first prominent number is the answer
+            for num in numbers:
+                # Check if this number is mentioned in context of the question topic
+                context_window = 50
+                num_index = tool_result.find(num)
+                if num_index != -1:
+                    context = tool_result[max(0, num_index-context_window):num_index+context_window+len(num)]
+                    # Check if relevant keywords from question appear near the number
+                    question_keywords = [w for w in question_lower.split() if len(w) > 3 and w not in ['what', 'when', 'where', 'many', 'much']]
+                    if any(keyword in context.lower() for keyword in question_keywords):
+                        return {"messages": [AIMessage(content=num)]}
+    # Use LLM for complex extraction
+    messages_dict = [
+        {"role": "system", "content": answer_prompt.content},
+        {"role": "user", "content": f"Question: {original_question}\n\nTool result: {tool_result}\n\nExtract the precise answer:"}
+    ]
+    try:
+        response = client.chat.completions.create(
+            model="meta-llama/Meta-Llama-3-70B-Instruct",
+            messages=messages_dict,
+            max_tokens=50,
+            temperature=0.1
+        )
+        answer = response.choices[0].message.content.strip()
+        # Clean up common issues
+        answer = answer.replace("Answer:", "").replace("A:", "").strip()
+        answer = answer.strip(".")
+        # For yes/no questions, ensure lowercase
+        if answer.lower() in ['yes', 'no']:
+            answer = answer.lower()
+        print(f"Final answer: {answer}")
+        return {"messages": [AIMessage(content=answer)]}
+    except Exception as e:
+        print(f"Assistant error: {e}")
+        return {"messages": [AIMessage(content="UNKNOWN")]}
 def tools_condition(state: MessagesState) -> str:
+    """Decide whether to use tools or end"""
+    last_msg = state["messages"][-1]
+    if not isinstance(last_msg, AIMessage):
+        return "end"
+    content = last_msg.content.upper()
+    # Check if we need to use a tool
+    tool_keywords = ["SEARCH:", "CALCULATE:", "DEFINE:", "WIKIPEDIA:", "REVERSE:"]
+    if any(content.startswith(keyword) for keyword in tool_keywords):
         return "tools"
+    # For DIRECT answers or UNKNOWN, go straight to assistant to format properly
+    if content.startswith("DIRECT:") or "UNKNOWN" in content:
+        # Still go through assistant to extract the answer
+        return "tools"
     return "end"
 def build_graph():
+    """Build the LangGraph workflow"""
     builder = StateGraph(MessagesState)
+    # Add nodes
     builder.add_node("planner", planner_node)
+    builder.add_node("tools", tool_calling_node)
     builder.add_node("assistant", assistant_node)
+    # Add edges
     builder.add_edge(START, "planner")
     builder.add_conditional_edges("planner", tools_condition)
     builder.add_edge("tools", "assistant")
     return builder.compile()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+requests
+gradio[oauth]