New_Final_Assignment

Sleeping

App Files Files Community

naman1102 commited on Jun 3

Commit

4f25f4e

1 Parent(s): 4ad5e89

new

Browse files

Files changed (7) hide show

app.py +145 -334
old2app.py +587 -0
old2state.py +22 -0
old2tools.py +422 -0
old_app_copy.py +2 -2
state.py +22 -21
tools.py +99 -199

app.py CHANGED Viewed

@@ -1,17 +1,9 @@
 import os
 import gradio as gr
 import requests
-import inspect
 import pandas as pd
-from langgraph.prebuilt import ToolNode
-# from typing import Any, Dict
-# from typing import TypedDict, Annotated
 from langchain_openai import ChatOpenAI
 from langgraph.graph import StateGraph, START, END
-from langgraph.graph.message import add_messages
 from langchain.schema import HumanMessage, SystemMessage, AIMessage
 # Create a ToolNode that knows about your web_search function
 import json
@@ -20,369 +12,188 @@ from state import AgentState
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-from tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools, audio_transcriber_tool, wikipedia_search_tool
-llm = ChatOpenAI(model_name="gpt-4.1")
-# ─── 1) plan_node ───
-# ─── 1) plan_node ───
-tool_counter = 0
-# ─── 1) plan_node ───
-def plan_node(state: AgentState) -> AgentState:
-    """
-    Step 1: Ask GPT to draft a concise direct answer (INTERIM_ANSWER),
-            then decide if it's confident enough to stop or if it needs one tool.
-    If confident: return {"final_answer":"<answer>"}
-    Otherwise:   return exactly one of:
-                 {"wiki_query":"..."},
-                 {"ocr_path":"..."},
-                 {"excel_path":"...","excel_sheet_name":"..."},
-                 {"audio_path":"..."}
-    """
-    prior_msgs = state.get("messages", [])
-    user_input = ""
-    for msg in reversed(prior_msgs):
-        if isinstance(msg, HumanMessage):
-            user_input = msg.content
-            break
-    system_msg = SystemMessage(
-        content=(
-            "You are an agent that must do two things in one JSON output:\n\n"
-            "  1) Provide a concise, direct answer to the user's question (no explanation).\n"
-            "  2) Judge whether that answer is reliable:\n"
-            "     • If you are fully confident, return exactly:\n"
-            "         {\"final_answer\":\"<your concise answer>\"}\n"
-            "       and nothing else.\n"
-            "     • Otherwise, return exactly one of:\n"
-            "         {\"wiki_query\":\"<Wikipedia search>\"}\n"
-            "         {\"ocr_path\":\"<image path or task_id>\"}\n"
-            "         {\"excel_path\":\"<xlsx path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
-            "         {\"audio_path\":\"<audio path or task_id>\"}\n"
-            "       and nothing else.\n"
-            "Do NOT wrap in markdown—output only a single JSON object.\n"
-            f"User's question: \"{user_input}\"\n"
-        )
-    )
-    human_msg = HumanMessage(content=user_input)
-    llm_response = llm([system_msg, human_msg])
-    llm_out = llm_response.content.strip()
-    ai_msg = AIMessage(content=llm_out)
-    new_msgs = prior_msgs.copy() + [ai_msg]
-    try:
-        parsed = json.loads(llm_out)
-        if isinstance(parsed, dict):
-            partial: AgentState = {"messages": new_msgs}
-            allowed = {
-                "final_answer",
-                "wiki_query",
-                "ocr_path",
-                "excel_path",
-                "excel_sheet_name",
-                "audio_path",
-            }
-            for k, v in parsed.items():
-                if k in allowed:
-                    partial[k] = v
-            return partial
-    except json.JSONDecodeError:
-        pass
-    return {
-        "messages": new_msgs,
-        "final_answer": "Sorry, I could not parse your intent.",
-    }
-# ─── 2) store_prev_state ───
-def store_prev_state(state: AgentState) -> AgentState:
-    return {**state, "prev_state": state.copy()}
-# ─── 3) tools_node ───
-def tool_node(state: AgentState) -> AgentState:
-    """
-    Dispatch exactly one tool based on which key was set:
-      - wiki_query → wikipedia_search_tool
-      - ocr_path   → ocr_image_tool
-      - excel_path → parse_excel_tool
-      - audio_path → audio_transcriber_tool
-    """
-    global tool_counter
-    if tool_counter >= 5:
-        # If we've already run 5 tools, do nothing
-        return {
-            "messages": state["messages"],
-            "final_answer": state.get("final_answer", "No interim answer available.")
-        }
-    tool_counter += 1
-    if state.get("wiki_query"):
-        return wikipedia_search_tool(state)
-    if state.get("ocr_path"):
-        return ocr_image_tool(state)
-    if state.get("excel_path"):
-        return parse_excel_tool(state)
-    if state.get("audio_path"):
-        return audio_transcriber_tool(state)
-    return {}  # no tool key present
-# ─── 4) merge_tool_output ───
-def merge_tool_output(state: AgentState) -> AgentState:
-    """
-    Combine previous state and tool output into one, but remove any stale request-keys.
-    """
-    prev = state.get("prev_state", {}).copy()
-    # Drop stale request-keys in prev
-    for dead in ["wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"]:
-        prev.pop(dead, None)
-    merged = {**prev, **state}
-    # Drop them again from merged so they don't persist into the next cycle
-    for dead in ["wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"]:
-        merged.pop(dead, None)
-    merged.pop("prev_state", None)
-    return merged
-# ─── 5) inspect_node ───
-def inspect_node(state: AgentState) -> AgentState:
-    """
-    After running a tool, show GPT:
-      - ORIGINAL user question
-      - Any tool results (web_search_result, ocr_result, excel_result, transcript, wiki_result)
-      - The INTERIM_ANSWER (always present if plan_node ran correctly)
-    If tool_counter ≥ 5, use LLM once more (with full context) to craft a final answer.
-    Otherwise, ask GPT to either:
-      • Return {"final_answer":"<final>"} if done, OR
-      • Return exactly one tool key to run next (wiki_query / ocr_path / excel_path & excel_sheet_name / audio_path).
-    """
-    global tool_counter
-    # If we've already run 5 tools, ask GPT for a strictly‐formatted JSON final_answer
-    if tool_counter >= 5:
-        messages_for_llm = []
-        # Re‐insert the user’s question
-        question = ""
-        for msg in reversed(state.get("messages", [])):
-            if isinstance(msg, HumanMessage):
-                question = msg.content
-                break
-        messages_for_llm.append(SystemMessage(content=f"USER_QUESTION: {question}"))
-        # Add any tool results so far
-        if sr := state.get("web_search_result"):
-            messages_for_llm.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {sr}"))
-        if orc := state.get("ocr_result"):
-            messages_for_llm.append(SystemMessage(content=f"OCR_RESULT: {orc}"))
-        if exr := state.get("excel_result"):
-            messages_for_llm.append(SystemMessage(content=f"EXCEL_RESULT: {exr}"))
-        if tr := state.get("transcript"):
-            messages_for_llm.append(SystemMessage(content=f"AUDIO_TRANSCRIPT: {tr}"))
-        if wr := state.get("wiki_result"):
-            messages_for_llm.append(SystemMessage(content=f"WIKIPEDIA_RESULT: {wr}"))
-        # Show the interim answer
-        interim = state.get("interim_answer", "")
-        messages_for_llm.append(SystemMessage(content=f"INTERIM_ANSWER: {interim}"))
-        # Now ask for JSON ONLY (no reasoning, no extra text)
-        final_prompt = (
-            "Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
-            "Using only the information above—including the USER_QUESTION, "
-            "any TOOL_RESULT, and the INTERIM_ANSWER—produce a concise final answer. "
-            "Return exactly one JSON object and nothing else, in this format:\n\n"
-            "{\"final_answer\":\"<your final answer>\"}\n"
-            "Do not include any other words or punctuation outside that JSON. if its numbers, dont show the units"
         )
-        messages_for_llm.append(SystemMessage(content=final_prompt))
-        llm_response = llm(messages_for_llm)
-        raw = llm_response.content.strip()
-        new_msgs = state["messages"] + [AIMessage(content=raw)]
-        # Try to parse exactly one JSON with "final_answer"
-        try:
-            parsed = json.loads(raw)
-            if isinstance(parsed, dict) and "final_answer" in parsed:
-                return {"messages": new_msgs, "final_answer": parsed["final_answer"]}
-        except json.JSONDecodeError:
-            pass
-        # Fallback to returning the interim in case JSON parse fails
-        return {"messages": new_msgs, "final_answer": interim}
-    # ——————————— If tool_counter < 5, proceed as before ———————————
-    messages_for_llm = []
-    # (1) Re‐insert original user question
-    question = ""
-    for msg in reversed(state.get("messages", [])):
-        if isinstance(msg, HumanMessage):
-            question = msg.content
-            break
-    messages_for_llm.append(SystemMessage(content=f"USER_QUESTION: {question}"))
-    # (2) Add any tool results
-    if sr := state.get("web_search_result"):
-        messages_for_llm.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {sr}"))
-    if orc := state.get("ocr_result"):
-        messages_for_llm.append(SystemMessage(content=f"OCR_RESULT: {orc}"))
-    if exr := state.get("excel_result"):
-        messages_for_llm.append(SystemMessage(content=f"EXCEL_RESULT: {exr}"))
-    if tr := state.get("transcript"):
-        messages_for_llm.append(SystemMessage(content=f"AUDIO_TRANSCRIPT: {tr}"))
-    if wr := state.get("wiki_result"):
-        messages_for_llm.append(SystemMessage(content=f"WIKIPEDIA_RESULT: {wr}"))
-    # (3) Always show the interim answer
-    interim = state.get("interim_answer", "")
-    messages_for_llm.append(SystemMessage(content=f"INTERIM_ANSWER: {interim}"))
-    # (4) Prompt GPT to decide final or another tool
-    prompt = (
-        "You have a current draft answer (INTERIM_ANSWER) and possibly some tool results above.\n"
-        "If you are confident it’s correct, return exactly:\n"
-        "  {\"final_answer\":\"<your final answer>\"}\n"
-        "and nothing else.\n"
-        "Otherwise, return exactly one of these JSON literals to fetch another tool:\n"
-        "  {\"wiki_query\":\"<query for Wikipedia>\"}\n"
-        "  {\"ocr_path\":\"<image path or task_id>\"}\n"
-        "  {\"excel_path\":\"<xls path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
-        "  {\"audio_path\":\"<audio path or task_id>\"}\n"
-        "Do NOT wrap in markdown—return only the JSON object.\n"
     )
-    messages_for_llm.append(SystemMessage(content=prompt))
-    llm_response = llm(messages_for_llm)
-    raw = llm_response.content.strip()
-    new_msgs = state["messages"] + [AIMessage(content=raw)]
-    # Try to parse the LLM’s JSON
-    try:
-        parsed = json.loads(raw)
-        if isinstance(parsed, dict):
-            # (a) If GPT gave a final_answer, return immediately
-            if "final_answer" in parsed:
-                return {"messages": new_msgs, "final_answer": parsed["final_answer"]}
-            # (b) If GPT requested exactly one valid tool, return only that key
-            valid_keys = {"wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"}
-            requested_keys = set(parsed.keys()) & valid_keys
-            if len(requested_keys) == 1:
-                clean: AgentState = {"messages": new_msgs}
-                for k in requested_keys:
-                    clean[k] = parsed[k]
-                return clean
-    except json.JSONDecodeError:
-        pass
-    # (c) Fallback: if GPT never returned a valid tool key or a final_answer,
-    # just finalize with the existing interim_answer
-    return {"messages": new_msgs, "final_answer": interim}
-# ─── 6) finalize_node ───
-def finalize_node(state: AgentState) -> AgentState:
-    """
-    If state already has "final_answer", return it. Otherwise, it's an error.
-    """
-    if fa := state.get("final_answer"):
-        return {"final_answer": fa}
-    return {"final_answer": "ERROR: finalize called without a final_answer."}
-# ─── 7) Build the graph and wire edges ───
 graph = StateGraph(AgentState)
 # Register nodes
-graph.add_node("plan", plan_node)
-graph.add_node("store_prev_state", store_prev_state)
-graph.add_node("tools", tool_node)
-graph.add_node("merge_tool_output", merge_tool_output)
-graph.add_node("inspect", inspect_node)
-graph.add_node("finalize", finalize_node)
-# START → plan
-graph.add_edge(START, "plan")
-# plan → either finalize (if plan set final_answer) or store_prev_state (if plan wants a tool)
-def route_plan(plan_out: AgentState) -> str:
-    if plan_out.get("final_answer") is not None:
-        return "finalize"
-    return "store_prev_state"
 graph.add_conditional_edges(
-    "plan",
-    route_plan,
-    {"store_prev_state": "store_prev_state", "finalize": "finalize"},
 )
-# store_prev_state → tools
-graph.add_edge("store_prev_state", "tools")
-# tools → merge_tool_output
-graph.add_edge("tools", "merge_tool_output")
-# merge_tool_output → inspect
-graph.add_edge("merge_tool_output", "inspect")
-# inspect → either finalize (if inspect set final_answer) or store_prev_state (if inspect wants another tool)
-def route_inspect(inspect_out: AgentState) -> str:
-    if inspect_out.get("final_answer") is not None:
-        return "finalize"
-    return "store_prev_state"
-graph.add_conditional_edges(
-    "inspect",
-    route_inspect,
-    {"store_prev_state": "store_prev_state", "finalize": "finalize"},
-)
-# finalize → END
-graph.add_edge("finalize", END)
-compiled_graph = graph.compile()
-# ─── 8) respond_to_input ───
-def respond_to_input(user_input: str, task_id) -> str:
-    """
-    Reset the global tool_counter, seed state['messages'], invoke the graph,
-    and return the final_answer.
-    """
-    global tool_counter
-    tool_counter = 0  # Reset on every new user query
-    system_msg = SystemMessage(
-        content=(
-            "You are an agent orchestrator. Decide whether to use a tool or answer directly.\n"
-            "Try not to use tools so many times. If you think you can answer the question without using a tool, do it Please.\n"
-            "Tools available:\n"
-            "  • Wikipedia: set {\"wiki_query\":\"<search terms>\"}\n"
-            "  • OCR: set {\"ocr_path\":\"<image path or task_id>\"}\n"
-            "  • Excel: set {\"excel_path\":\"<xlsx path>\", \"excel_sheet_name\":\"<sheet>\"}\n"
-            "  • Audio transcription: set {\"audio_path\":\"<audio path or task_id>\"}\n"
-            "If you can answer immediately, set {\"final_answer\":\"<answer>\"}. "
-            "Respond with only one JSON object and no extra formatting."
-        )
-    )
-    human_msg = HumanMessage(content=user_input)
-    initial_state: AgentState = {"messages": [system_msg, human_msg], "task_id": task_id}
-    final_state = compiled_graph.invoke(initial_state)
-    return final_state.get("final_answer", "Error: No final answer generated.")
 class BasicAgent:
     def __init__(self):

 import os
 import gradio as gr
 import requests
 import pandas as pd
 from langchain_openai import ChatOpenAI
 from langgraph.graph import StateGraph, START, END
 from langchain.schema import HumanMessage, SystemMessage, AIMessage
 # Create a ToolNode that knows about your web_search function
 import json
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+from __future__ import annotations
+import json
+from typing import Any, Dict, List, Optional
+# ─────────────────────────── External tools ──────────────────────────────
+from tools import (
+    wikipedia_search_tool,
+    ocr_image_tool,
+    audio_transcriber_tool,
+    parse_excel_tool
+)
+# ─────────────────────────── Configuration ───────────────────────────────
+LLM = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.0)
+MAX_TOOL_CALLS = 5
+# ─────────────────────────── Helper utilities ────────────────────────────
+def safe_json(text: str) -> Optional[Dict[str, Any]]:
+    try:
+        obj = json.loads(text.strip())
+        return obj if isinstance(obj, dict) else None
+    except json.JSONDecodeError:
+        return None
+def brief(d: Dict[str, Any]) -> str:
+    for k in ("wiki_result", "ocr_result", "transcript"):
+        if k in d:
+            return f"{k}: {str(d[k])[:160].replace('\n', ' ')}…"
+    return "(no output)"
+# ─────────────────────────── Agent state ⬇ ───────────────────────────────
+# ───────────────────────────── Nodes  ⬇ ───────────────────────────────────
+def tool_selector(state: AgentState) -> AgentState:
+    """Ask the LLM what to do next (wiki / ocr / audio / excel / final)."""
+    if state.tool_calls >= MAX_TOOL_CALLS:
+        state.add(SystemMessage(content="You have reached the maximum number of tool calls. Use the already gathered information to answer the question."))
+        state.next_action = "final"
+        return state
+    prompt = SystemMessage(
+        content=(
+            "Reply with ONE JSON only (no markdown). Choices:\n"
+            "  {'action':'wiki','query':'…'}\n"
+            "  {'action':'ocr'}\n"
+            "  {'action':'audio'}\n"
+            "  {'action':'excel'}\n"
+            "  {'action':'final'}\n"
         )
     )
+    raw = LLM(state.messages + [prompt]).content.strip()
+    state.add(AIMessage(content=raw))
+    parsed = safe_json(raw)
+    if not parsed or "action" not in parsed:
+        state.next_action = "final"
+        return state
+    state.next_action = parsed["action"]
+    state.query = parsed.get("query")
+    return state
+# ------------- tool adapters -------------
+def wiki_tool(state: AgentState) -> AgentState:
+    out = wikipedia_search_tool({"wiki_query": state.query or ""})
+    state.tool_calls += 1
+    state.add(SystemMessage(content=f"WIKI_TOOL_OUT: {brief(out)}"))
+    state.next_action = None
+    return state
+def ocr_tool(state: AgentState) -> AgentState:
+    out = ocr_image_tool({"task_id": state.task_id, "ocr_path": ""})
+    state.tool_calls += 1
+    state.add(SystemMessage(content=f"OCR_TOOL_OUT: {brief(out)}"))
+    state.next_action = None
+    return state
+def audio_tool(state: AgentState) -> AgentState:
+    out = audio_transcriber_tool({"task_id": state.task_id, "audio_path": ""})
+    state.tool_calls += 1
+    state.add(SystemMessage(content=f"AUDIO_TOOL_OUT: {brief(out)}"))
+    state.next_action = None
+    return state
+def excel_tool(state: AgentState) -> AgentState:
+    result = parse_excel_tool({
+        "task_id": state.task_id,
+        "excel_sheet_name": state.sheet or ""
+    })
+    out = {"excel_result": result}
+    state.tool_calls += 1
+    state.add(SystemMessage(content=f"EXCEL_TOOL_OUT: {brief(out)}"))
+    state.next_action = None
+    return state
+# ------------- final answer -------------
+def final_answer(state: AgentState) -> AgentState:
+    wrap = SystemMessage(
+        content="Using everything so far, reply ONLY with {'final_answer':'…'}. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
+    )
+    raw = LLM(state.messages + [wrap]).content.strip()
+    state.add(AIMessage(content=raw))
+    parsed = safe_json(raw)
+    state.final_answer = parsed.get("final_answer") if parsed else "Unable to parse final answer."
+    return state
+# ─────────────────────────── Graph wiring ───────────────────────────────
 graph = StateGraph(AgentState)
 # Register nodes
+for name, fn in [
+    ("tool_selector", tool_selector),
+    ("wiki_tool", wiki_tool),
+    ("ocr_tool", ocr_tool),
+    ("audio_tool", audio_tool),
+    ("final_answer", final_answer),
+]:
+    graph.add_node(name, fn)
+# Edges
+graph.add_edge(START, "tool_selector")
+def dispatch(state: AgentState) -> str:
+    return {
+        "wiki": "wiki_tool",
+        "ocr": "ocr_tool",
+        "audio": "audio_tool",
+        "final": "final_answer",
+    }.get(state.next_action, "final_answer")
 graph.add_conditional_edges(
+    "tool_selector",
+    dispatch,
+    {
+        "wiki_tool": "wiki_tool",
+        "ocr_tool": "ocr_tool",
+        "audio_tool": "audio_tool",
+        "excel_tool": "excel_tool",
+        "final_answer": "final_answer",
+    },
 )
+# tools loop back to selector
+for tool_name in ("wiki_tool", "ocr_tool", "audio_tool", "excel_tool"):
+    graph.add_edge(tool_name, "tool_selector")
+# final_answer → END
+graph.add_edge("final_answer", END)
+compiled_graph = graph.compile()
+# ─────────────────────────── Public API  ────────────────────────────────
+def answer(question: str, *, task_id: Optional[str] = None) -> str:
+    state = AgentState(user_question=question, task_id=task_id)
+    state.add(SystemMessage(content="You are a helpful assistant."))
+    state.add(HumanMessage(content=question))
+    compiled_graph.invoke(state)
+    return state.final_answer or "No answer."
 class BasicAgent:
     def __init__(self):

old2app.py ADDED Viewed

	@@ -0,0 +1,587 @@

+import os
+import gradio as gr
+import requests
+import inspect
+import pandas as pd
+from langgraph.prebuilt import ToolNode
+# from typing import Any, Dict
+# from typing import TypedDict, Annotated
+from langchain_openai import ChatOpenAI
+from langgraph.graph import StateGraph, START, END
+from langgraph.graph.message import add_messages
+from langchain.schema import HumanMessage, SystemMessage, AIMessage
+# Create a ToolNode that knows about your web_search function
+import json
+from old2state import AgentState
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+from old2tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools, audio_transcriber_tool, wikipedia_search_tool
+llm = ChatOpenAI(model_name="gpt-4.1")
+# ─── 1) plan_node ───
+# ─── 1) plan_node ───
+tool_counter = 0
+# ─── 1) plan_node ───
+def plan_node(state: AgentState) -> AgentState:
+    """
+    Step 1: Ask GPT to draft a concise direct answer (INTERIM_ANSWER),
+            then decide if it's confident enough to stop or if it needs one tool.
+    If confident: return {"final_answer":"<answer>"}
+    Otherwise:   return exactly one of:
+                 {"wiki_query":"..."},
+                 {"ocr_path":"..."},
+                 {"excel_path":"...","excel_sheet_name":"..."},
+                 {"audio_path":"..."}
+    """
+    prior_msgs = state.get("messages", [])
+    user_input = ""
+    for msg in reversed(prior_msgs):
+        if isinstance(msg, HumanMessage):
+            user_input = msg.content
+            break
+    system_msg = SystemMessage(
+        content=(
+            "You are an agent that must do two things in one JSON output:\n\n"
+            "  1) Provide a concise, direct answer to the user's question (no explanation).\n"
+            "  2) Judge whether that answer is reliable:\n"
+            "     • If you are fully confident, return exactly:\n"
+            "         {\"final_answer\":\"<your concise answer>\"}\n"
+            "       and nothing else.\n"
+            "     • Otherwise, return exactly one of:\n"
+            "         {\"wiki_query\":\"<Wikipedia search>\"}\n"
+            "         {\"ocr_path\":\"<image path or task_id>\"}\n"
+            "         {\"excel_path\":\"<xlsx path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
+            "         {\"audio_path\":\"<audio path or task_id>\"}\n"
+            "       and nothing else.\n"
+            "Do NOT wrap in markdown—output only a single JSON object.\n"
+            f"User's question: \"{user_input}\"\n"
+        )
+    )
+    human_msg = HumanMessage(content=user_input)
+    llm_response = llm([system_msg, human_msg])
+    llm_out = llm_response.content.strip()
+    ai_msg = AIMessage(content=llm_out)
+    new_msgs = prior_msgs.copy() + [ai_msg]
+    try:
+        parsed = json.loads(llm_out)
+        if isinstance(parsed, dict):
+            partial: AgentState = {"messages": new_msgs}
+            allowed = {
+                "final_answer",
+                "wiki_query",
+                "ocr_path",
+                "excel_path",
+                "excel_sheet_name",
+                "audio_path",
+            }
+            for k, v in parsed.items():
+                if k in allowed:
+                    partial[k] = v
+            return partial
+    except json.JSONDecodeError:
+        pass
+    return {
+        "messages": new_msgs,
+        "final_answer": "Sorry, I could not parse your intent.",
+    }
+# ─── 2) store_prev_state ───
+def store_prev_state(state: AgentState) -> AgentState:
+    return {**state, "prev_state": state.copy()}
+# ─── 3) tools_node ───
+def tool_node(state: AgentState) -> AgentState:
+    """
+    Dispatch exactly one tool based on which key was set:
+      - wiki_query → wikipedia_search_tool
+      - ocr_path   → ocr_image_tool
+      - excel_path → parse_excel_tool
+      - audio_path → audio_transcriber_tool
+    """
+    global tool_counter
+    if tool_counter >= 5:
+        # If we've already run 5 tools, do nothing
+        return {
+            "messages": state["messages"],
+            "final_answer": state.get("final_answer", "No interim answer available.")
+        }
+    tool_counter += 1
+    if state.get("wiki_query"):
+        return wikipedia_search_tool(state)
+    if state.get("ocr_path"):
+        return ocr_image_tool(state)
+    if state.get("excel_path"):
+        return parse_excel_tool(state)
+    if state.get("audio_path"):
+        return audio_transcriber_tool(state)
+    return {}  # no tool key present
+# ─── 4) merge_tool_output ───
+def merge_tool_output(state: AgentState) -> AgentState:
+    """
+    Combine previous state and tool output into one, but remove any stale request-keys.
+    """
+    prev = state.get("prev_state", {}).copy()
+    # Drop stale request-keys in prev
+    for dead in ["wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"]:
+        prev.pop(dead, None)
+    merged = {**prev, **state}
+    # Drop them again from merged so they don't persist into the next cycle
+    for dead in ["wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"]:
+        merged.pop(dead, None)
+    merged.pop("prev_state", None)
+    return merged
+# ─── 5) inspect_node ───
+def inspect_node(state: AgentState) -> AgentState:
+    """
+    After running a tool, show GPT:
+      - ORIGINAL user question
+      - Any tool results (web_search_result, ocr_result, excel_result, transcript, wiki_result)
+      - The INTERIM_ANSWER (always present if plan_node ran correctly)
+    If tool_counter ≥ 5, use LLM once more (with full context) to craft a final answer.
+    Otherwise, ask GPT to either:
+      • Return {"final_answer":"<final>"} if done, OR
+      • Return exactly one tool key to run next (wiki_query / ocr_path / excel_path & excel_sheet_name / audio_path).
+    """
+    global tool_counter
+    # If we've already run 5 tools, ask GPT for a strictly‐formatted JSON final_answer
+    if tool_counter >= 5:
+        messages_for_llm = []
+        # Re‐insert the user’s question
+        question = ""
+        for msg in reversed(state.get("messages", [])):
+            if isinstance(msg, HumanMessage):
+                question = msg.content
+                break
+        messages_for_llm.append(SystemMessage(content=f"USER_QUESTION: {question}"))
+        # Add any tool results so far
+        if sr := state.get("web_search_result"):
+            messages_for_llm.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {sr}"))
+        if orc := state.get("ocr_result"):
+            messages_for_llm.append(SystemMessage(content=f"OCR_RESULT: {orc}"))
+        if exr := state.get("excel_result"):
+            messages_for_llm.append(SystemMessage(content=f"EXCEL_RESULT: {exr}"))
+        if tr := state.get("transcript"):
+            messages_for_llm.append(SystemMessage(content=f"AUDIO_TRANSCRIPT: {tr}"))
+        if wr := state.get("wiki_result"):
+            messages_for_llm.append(SystemMessage(content=f"WIKIPEDIA_RESULT: {wr}"))
+        # Show the interim answer
+        interim = state.get("interim_answer", "")
+        messages_for_llm.append(SystemMessage(content=f"INTERIM_ANSWER: {interim}"))
+        # Now ask for JSON ONLY (no reasoning, no extra text)
+        final_prompt = (
+            "Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
+            "Using only the information above—including the USER_QUESTION, "
+            "any TOOL_RESULT, and the INTERIM_ANSWER—produce a concise final answer. "
+            "Return exactly one JSON object and nothing else, in this format:\n\n"
+            "{\"final_answer\":\"<your final answer>\"}\n"
+            "Do not include any other words or punctuation outside that JSON. if its numbers, dont show the units"
+        )
+        messages_for_llm.append(SystemMessage(content=final_prompt))
+        llm_response = llm(messages_for_llm)
+        raw = llm_response.content.strip()
+        new_msgs = state["messages"] + [AIMessage(content=raw)]
+        # Try to parse exactly one JSON with "final_answer"
+        try:
+            parsed = json.loads(raw)
+            if isinstance(parsed, dict) and "final_answer" in parsed:
+                return {"messages": new_msgs, "final_answer": parsed["final_answer"]}
+        except json.JSONDecodeError:
+            pass
+        # Fallback to returning the interim in case JSON parse fails
+        return {"messages": new_msgs, "final_answer": interim}
+    # ——————————— If tool_counter < 5, proceed as before ———————————
+    messages_for_llm = []
+    # (1) Re‐insert original user question
+    question = ""
+    for msg in reversed(state.get("messages", [])):
+        if isinstance(msg, HumanMessage):
+            question = msg.content
+            break
+    messages_for_llm.append(SystemMessage(content=f"USER_QUESTION: {question}"))
+    # (2) Add any tool results
+    if sr := state.get("web_search_result"):
+        messages_for_llm.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {sr}"))
+    if orc := state.get("ocr_result"):
+        messages_for_llm.append(SystemMessage(content=f"OCR_RESULT: {orc}"))
+    if exr := state.get("excel_result"):
+        messages_for_llm.append(SystemMessage(content=f"EXCEL_RESULT: {exr}"))
+    if tr := state.get("transcript"):
+        messages_for_llm.append(SystemMessage(content=f"AUDIO_TRANSCRIPT: {tr}"))
+    if wr := state.get("wiki_result"):
+        messages_for_llm.append(SystemMessage(content=f"WIKIPEDIA_RESULT: {wr}"))
+    # (3) Always show the interim answer
+    interim = state.get("interim_answer", "")
+    messages_for_llm.append(SystemMessage(content=f"INTERIM_ANSWER: {interim}"))
+    # (4) Prompt GPT to decide final or another tool
+    prompt = (
+        "You have a current draft answer (INTERIM_ANSWER) and possibly some tool results above.\n"
+        "If you are confident it’s correct, return exactly:\n"
+        "  {\"final_answer\":\"<your final answer>\"}\n"
+        "and nothing else.\n"
+        "Otherwise, return exactly one of these JSON literals to fetch another tool:\n"
+        "  {\"wiki_query\":\"<query for Wikipedia>\"}\n"
+        "  {\"ocr_path\":\"<image path or task_id>\"}\n"
+        "  {\"excel_path\":\"<xls path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
+        "  {\"audio_path\":\"<audio path or task_id>\"}\n"
+        "Do NOT wrap in markdown—return only the JSON object.\n"
+    )
+    messages_for_llm.append(SystemMessage(content=prompt))
+    llm_response = llm(messages_for_llm)
+    raw = llm_response.content.strip()
+    new_msgs = state["messages"] + [AIMessage(content=raw)]
+    # Try to parse the LLM’s JSON
+    try:
+        parsed = json.loads(raw)
+        if isinstance(parsed, dict):
+            # (a) If GPT gave a final_answer, return immediately
+            if "final_answer" in parsed:
+                return {"messages": new_msgs, "final_answer": parsed["final_answer"]}
+            # (b) If GPT requested exactly one valid tool, return only that key
+            valid_keys = {"wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"}
+            requested_keys = set(parsed.keys()) & valid_keys
+            if len(requested_keys) == 1:
+                clean: AgentState = {"messages": new_msgs}
+                for k in requested_keys:
+                    clean[k] = parsed[k]
+                return clean
+    except json.JSONDecodeError:
+        pass
+    # (c) Fallback: if GPT never returned a valid tool key or a final_answer,
+    # just finalize with the existing interim_answer
+    return {"messages": new_msgs, "final_answer": interim}
+# ─── 6) finalize_node ───
+def finalize_node(state: AgentState) -> AgentState:
+    """
+    If state already has "final_answer", return it. Otherwise, it's an error.
+    """
+    if fa := state.get("final_answer"):
+        return {"final_answer": fa}
+    return {"final_answer": "ERROR: finalize called without a final_answer."}
+# ─── 7) Build the graph and wire edges ───
+graph = StateGraph(AgentState)
+# Register nodes
+graph.add_node("plan", plan_node)
+graph.add_node("store_prev_state", store_prev_state)
+graph.add_node("tools", tool_node)
+graph.add_node("merge_tool_output", merge_tool_output)
+graph.add_node("inspect", inspect_node)
+graph.add_node("finalize", finalize_node)
+# START → plan
+graph.add_edge(START, "plan")
+# plan → either finalize (if plan set final_answer) or store_prev_state (if plan wants a tool)
+def route_plan(plan_out: AgentState) -> str:
+    if plan_out.get("final_answer") is not None:
+        return "finalize"
+    return "store_prev_state"
+graph.add_conditional_edges(
+    "plan",
+    route_plan,
+    {"store_prev_state": "store_prev_state", "finalize": "finalize"},
+)
+# store_prev_state → tools
+graph.add_edge("store_prev_state", "tools")
+# tools → merge_tool_output
+graph.add_edge("tools", "merge_tool_output")
+# merge_tool_output → inspect
+graph.add_edge("merge_tool_output", "inspect")
+# inspect → either finalize (if inspect set final_answer) or store_prev_state (if inspect wants another tool)
+def route_inspect(inspect_out: AgentState) -> str:
+    if inspect_out.get("final_answer") is not None:
+        return "finalize"
+    return "store_prev_state"
+graph.add_conditional_edges(
+    "inspect",
+    route_inspect,
+    {"store_prev_state": "store_prev_state", "finalize": "finalize"},
+)
+# finalize → END
+graph.add_edge("finalize", END)
+compiled_graph = graph.compile()
+# ─── 8) respond_to_input ───
+def respond_to_input(user_input: str, task_id) -> str:
+    """
+    Reset the global tool_counter, seed state['messages'], invoke the graph,
+    and return the final_answer.
+    """
+    global tool_counter
+    tool_counter = 0  # Reset on every new user query
+    system_msg = SystemMessage(
+        content=(
+            "You are an agent orchestrator. Decide whether to use a tool or answer directly.\n"
+            "Try not to use tools so many times. If you think you can answer the question without using a tool, do it Please.\n"
+            "Tools available:\n"
+            "  • Wikipedia: set {\"wiki_query\":\"<search terms>\"}\n"
+            "  • OCR: set {\"ocr_path\":\"<image path or task_id>\"}\n"
+            "  • Excel: set {\"excel_path\":\"<xlsx path>\", \"excel_sheet_name\":\"<sheet>\"}\n"
+            "  • Audio transcription: set {\"audio_path\":\"<audio path or task_id>\"}\n"
+            "If you can answer immediately, set {\"final_answer\":\"<answer>\"}. "
+            "Respond with only one JSON object and no extra formatting."
+        )
+    )
+    human_msg = HumanMessage(content=user_input)
+    initial_state: AgentState = {"messages": [system_msg, human_msg], "task_id": task_id}
+    final_state = compiled_graph.invoke(initial_state)
+    return final_state.get("final_answer", "Error: No final answer generated.")
+class BasicAgent:
+    def __init__(self):
+        print("BasicAgent initialized.")
+    def __call__(self, question: str, task_id) -> str:
+        # print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # fixed_answer = "This is a default answer."
+        # print(f"Agent returning fixed answer: {fixed_answer}")
+        print()
+        print()
+        print()
+        print()
+        print(f"Agent received question: {question}")
+        print()
+        return respond_to_input(question, task_id)
+        # return fixed_answer
+def run_and_submit_all( profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    if profile:
+        username= f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent ( modify this part to create your agent)
+    try:
+        agent = BasicAgent()
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
+        try:
+            submitted_answer = agent(question_text, task_id)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+        except Exception as e:
+             print(f"Error running agent on task {task_id}: {e}")
+             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        ---
+        **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+        """
+    )
+    gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
+    )
+if __name__ == "__main__":
+    # print("LangGraph version:", langgraph.__version__)
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    # import langgraph
+    # print("▶︎ LangGraph version:", langgraph.__version__)
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup: # Print repo URLs if SPACE_ID is found
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

old2state.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from typing_extensions import TypedDict
+from typing import Annotated
+from langgraph.graph.message import add_messages
+class AgentState(TypedDict, total=False):
+    messages: Annotated[list, add_messages]
+    web_search_query: str
+    ocr_path: str
+    excel_path: str
+    excel_sheet_name: str
+    web_search_result: str
+    ocr_result: str
+    excel_result: str
+    final_answer: str
+    user_input: str
+    audio_path: str
+    transcript: str
+    audio_transcript: str
+    wiki_query: str
+    wiki_result: str
+    task_id: str
+    tool_counter: int

old2tools.py ADDED Viewed

	@@ -0,0 +1,422 @@

+# tools.py
+import pandas as pd
+# from langchain_community.tools import DuckDuckGoSearchRun
+from pathlib import Path
+# from PIL import Image
+# import pytesseract
+from old2state import AgentState
+from langchain.schema import HumanMessage
+import regex as re
+import time
+from duckduckgo_search import DDGS
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+def _download_file_for_task(task_id: str, ext: str) -> str:
+    """
+    Helper: attempt to GET the remote file for a given task_id.
+    Saves under ./hf_files/{task_id}.{ext}. Returns the local path if successful,
+    or an empty string if no file / download failed.
+    """
+    print("reached _download_file_for_task")
+    os.makedirs("hf_files", exist_ok=True)
+    local_path = os.path.join("hf_files", f"{task_id}.{ext}")
+    url = f"{DEFAULT_API_URL}/files/{task_id}"
+    try:
+        resp = requests.get(url, timeout=10)
+        if resp.status_code == 200 and resp.content:
+            print(f"Downloaded file from {url} to {local_path}")
+            with open(local_path, "wb") as f:
+                f.write(resp.content)
+            return local_path
+    except Exception:
+        pass
+    # If we get here, either 404 or download error
+    return ""
+def web_search_tool(state: AgentState) -> AgentState:
+    """
+    Expects: state["web_search_query"] is a non‐empty string.
+    Returns: {"web_search_query": None, "web_search_result": <string>}.
+    Retries up to 5 times on either a DuckDuckGo “202 Ratelimit” response or any exception (e.g. timeout).
+    """
+    print("reached web_search_tool")
+    query = state.get("web_search_query", "")
+    if not query:
+        return {}  # nothing to do
+    ddg = DDGS()
+    max_retries = 5
+    result_text = ""
+    for attempt in range(1, max_retries + 1):
+        try:
+            result_text = str(ddg.text(query, max_results=5))
+        except Exception as e:
+            # Network error or timeout—retry up to max_retries
+            if attempt < max_retries:
+                print(f"web_search_tool: exception '{e}', retrying in 4 seconds ({attempt}/{max_retries})")
+                time.sleep(4)
+                continue
+            else:
+                # Final attempt failed
+                return {
+                    "web_search_query": None,
+                    "web_search_result": f"Error during DuckDuckGo search: {e}"
+                }
+        # Check for DuckDuckGo rate‐limit indicator
+        if "202 Ratelimit" in result_text:
+            if attempt < max_retries:
+                print(f"web_search_tool: received '202 Ratelimit', retrying in 4 seconds ({attempt}/{max_retries})")
+                time.sleep(4)
+                continue
+            else:
+                # Final attempt still rate‐limited
+                break
+        # Successful response (no exception and no rate‐limit text)
+        break
+    return {
+        "web_search_query": None,
+        "web_search_result": result_text
+    }
+def ocr_image_tool(state: AgentState) -> AgentState:
+    """
+    Expects: state["ocr_path"] is either:
+      • a local image path (e.g. "./hf_files/abc.png"), OR
+      • a Task ID (e.g. "abc123"), in which case we try downloading
+        GET {DEFAULT_API_URL}/files/{task_id} with .png/.jpg/.jpeg extensions.
+    Returns:
+      {
+        "ocr_path": None,
+        "ocr_result": "<OCR text + brief caption or an error message>"
+      }
+    """
+    print("reached ocr_image_tool")
+    path_or_id = state.get("ocr_path", "")
+    # if not path_or_id:
+    #     return {}
+    # 1) Determine local_img: either existing path_or_id or download by Task ID
+    # local_img = ""
+    # if os.path.exists(path_or_id):
+    #     local_img = path_or_id
+    # else:
+    for ext in ("png", "jpg", "jpeg"):
+            candidate = _download_file_for_task(state.get("task_id"), ext)
+            if candidate:
+                local_img = candidate
+                break
+    if not local_img or not os.path.exists(local_img):
+        return {
+            "ocr_path": None,
+            "ocr_result": "Error: No image file found (local nonexistent or download failed)."
+        }
+    # 2) Read raw bytes
+    try:
+        with open(local_img, "rb") as f:
+            image_bytes = f.read()
+    except Exception as e:
+        return {
+            "ocr_path": None,
+            "ocr_result": f"Error reading image file: {e}"
+        }
+    # 3) Prepare HF Inference headers
+    hf_token = os.getenv("HF_TOKEN")
+    if not hf_token:
+        return {
+            "ocr_path": None,
+            "ocr_result": "Error: HUGGINGFACE_API_KEY not set in environment."
+        }
+    headers = {"Authorization": f"Bearer {hf_token}"}
+    # 4) Call HF’s vision-ocr to extract text
+    ocr_text = ""
+    try:
+        ocr_resp = requests.post(
+            "https://api-inference.huggingface.co/models/google/vit-ocr",
+            headers=headers,
+            files={"file": image_bytes},
+            timeout=30
+        )
+        ocr_resp.raise_for_status()
+        ocr_json = ocr_resp.json()
+        # The JSON has “pages” → list of blocks → “lines” → each line has “text”
+        lines = []
+        for page in ocr_json.get("pages", []):
+            for line in page.get("lines", []):
+                lines.append(line.get("text", "").strip())
+        ocr_text = "\n".join(lines).strip() or "(no visible text)"
+    except Exception as e:
+        ocr_text = f"Error during HF OCR: {e}"
+    # 5) Call HF’s image-captioning to get a brief description
+    caption = ""
+    try:
+        cap_resp = requests.post(
+            "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base",
+            headers=headers,
+            files={"file": image_bytes},
+            timeout=30
+        )
+        cap_resp.raise_for_status()
+        cap_json = cap_resp.json()
+        # The response looks like: {"generated_text": "...caption..."}
+        caption = cap_json.get("generated_text", "").strip()
+        if not caption:
+            caption = "(no caption returned)"
+    except Exception as e:
+        caption = f"Error during HF captioning: {e}"
+    # 6) Combine OCR + caption
+    combined = f"OCR text:\n{ocr_text}\n\nImage caption:\n{caption}"
+    print("combined: ")
+    return {
+        "ocr_path": None,
+        "ocr_result": combined
+    }
+def parse_excel_tool(state: AgentState) -> AgentState:
+    """
+    Expects state["excel_path"] to be either:
+      • A real local .xlsx path, or
+      • A Task ID string (e.g. "abc123"), in which case we GET /files/abc123.xlsx.
+    Returns:
+      {
+        "excel_path": None,
+        "excel_sheet_name": None,
+        "excel_result": "<stringified records or Markdown table>"
+      }
+    Always attempts to download the file for the given path or task ID.
+    """
+    print("reached parse_excel_tool")
+    local_xlsx = _download_file_for_task(state.get("task_id"), "xlsx")
+    path_or_id = state.get("excel_path", "")
+    sheet = state.get("excel_sheet_name", "")
+    if not path_or_id:
+        return {}
+    # Always attempt to download the file, regardless of local existence
+    # If we finally have a real file, read it
+    if local_xlsx and os.path.exists(local_xlsx):
+        try:
+            print("reached excel file found")
+            xls = pd.ExcelFile(local_xlsx)
+            if sheet and sheet in xls.sheet_names:
+                df = pd.read_excel(xls, sheet_name=sheet)
+            else:
+                df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
+            records = df.to_dict(orient="records")
+            text = str(records)
+            print("reached excel file found: ")
+            print(text)
+            print()
+            return {
+                "excel_path": None,
+                "excel_sheet_name": None,
+                "excel_result": text
+            }
+        except Exception as e:
+            print(f">>> parse_excel_tool: Error reading Excel file {local_xlsx}: {e}")
+            # Fall back to scanning for Markdown below
+    # Fallback: scan any HumanMessage for a Markdown‐style table
+    messages = state.get("messages", [])
+    table_lines = []
+    collecting = False
+    for msg in messages:
+        if isinstance(msg, HumanMessage):
+            for line in msg.content.splitlines():
+                if re.match(r"^\s*\|\s*[-A-Za-z0-9]", line):
+                    collecting = True
+                if collecting:
+                    if not re.match(r"^\s*\|", line):
+                        collecting = False
+                        break
+                    table_lines.append(line)
+            if table_lines:
+                break
+    if not table_lines:
+        return {
+            "excel_path": None,
+            "excel_sheet_name": None,
+            "excel_result": "Error: No Excel file found and no Markdown table detected in prompt."
+        }
+    clean_rows = [row for row in table_lines if not re.match(r"^\s*\|\s*-+", row)]
+    table_block = "\n".join(clean_rows).strip()
+    print(f"Parsed excel as excel_result: {table_block}")
+    return {
+        "excel_path": None,
+        "excel_sheet_name": None,
+        "excel_result": table_block
+    }
+import os
+import os
+import openai
+from old2state import AgentState
+def audio_transcriber_tool(state: AgentState) -> AgentState:
+    """
+    LangGraph tool for transcribing audio via OpenAI's Whisper API.
+    Expects: state["audio_path"] to be either:
+      • A local file path (e.g. "./hf_files/abc.mp3"), OR
+      • A Task ID (e.g. "abc123"), in which case we try downloading
+        GET {DEFAULT_API_URL}/files/{task_id} with .mp3, .wav, .m4a extensions.
+    Returns:
+      {
+        "audio_path": None,
+        "transcript": "<text or error message>"
+      }
+    Always attempts to download the file for the given path or task ID.
+    """
+    print("reached audio_transcriber_tool")
+    path_or_id = state.get("audio_path", "")
+    if not path_or_id:
+        return {}
+    # Always attempt to download the file, regardless of local existence
+    local_audio = ""
+    for ext in ("mp3", "wav", "m4a"):
+        candidate = _download_file_for_task(state.get("task_id"), ext)
+        if candidate:
+            local_audio = candidate
+            break
+    if not local_audio or not os.path.exists(local_audio):
+        return {
+            "audio_path": None,
+            "transcript": "Error: No audio file found (download failed)."
+        }
+    # Send to OpenAI Whisper
+    try:
+        openai.api_key = os.getenv("OPENAI_API_KEY")
+        if not openai.api_key:
+            raise RuntimeError("OPENAI_API_KEY is not set in environment.")
+        with open(local_audio, "rb") as audio_file:
+            print("reached openai.audio.transcriptions.create")
+            response = openai.audio.transcriptions.create(
+                model="whisper-1",
+                file=audio_file,
+            )
+            print("reached response")
+        text = response.text.strip()
+    except Exception as e:
+        text = f"Error during transcription: {e}"
+    print(f"Transcripted as transcript: {text}")
+    return {
+        "audio_path": None,
+        "transcript": text
+    }
+# tools.py
+import re
+import requests
+from old2state import AgentState
+def wikipedia_search_tool(state: AgentState) -> AgentState:
+    """
+    LangGraph wrapper for searching Wikipedia.
+    Expects: state["wiki_query"] to be a non‐empty string.
+    Returns:
+      {
+        "wiki_query": None,
+        "wiki_result": "<text summary of first matching page or an error message>"
+      }
+    If no valid wiki_query is provided, returns {}.
+    """
+    print("reached wikipedia search tool")
+    query = state.get("wiki_query", "").strip()
+    if not query:
+        return {}
+    try:
+        # 1) Use the MediaWiki API to search for page titles matching the query
+        search_params = {
+            "action": "query",
+            "list": "search",
+            "srsearch": query,
+            "format": "json",
+            "utf8": 1
+        }
+        search_resp = requests.get("https://en.wikipedia.org/w/api.php", params=search_params, timeout=10)
+        search_resp.raise_for_status()
+        search_data = search_resp.json()
+        search_results = search_data.get("query", {}).get("search", [])
+        # print("wikipedia: search_results",search_results)
+        if not search_results:
+            return {"wiki_query": None, "wiki_result": f"No Wikipedia page found for '{query}'."}
+        # 2) Take the first search result's title
+        first_title = search_results[0].get("title", "")
+        if not first_title:
+            return {"wiki_query": None, "wiki_result": "Unexpected format from Wikipedia search."}
+        # 3) Fetch the page summary for that title via the REST summary endpoint
+        title_for_url = requests.utils.requote_uri(first_title)
+        summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title_for_url}"
+        summary_resp = requests.get(summary_url, timeout=10)
+        summary_resp.raise_for_status()
+        summary_data = summary_resp.json()
+        # 4) Extract either the "extract" field or a fallback message
+        summary_text = summary_data.get("extract")
+        if not summary_text:
+            summary_text = summary_data.get("description", "No summary available.")
+        return {
+            "wiki_query": None,
+            "wiki_result": f"Title: {first_title}\n\n{summary_text}"
+        }
+    except requests.exceptions.RequestException as e:
+        return {"wiki_query": None, "wiki_result": f"Wikipedia search error: {e}"}
+    except Exception as e:
+        return {"wiki_query": None, "wiki_result": f"Unexpected error in wikipedia_search_tool: {e}"}
+def run_tools(state: AgentState, tool_out: AgentState) -> AgentState:
+    """
+    Merges whatever partial state the tool wrapper returned (tool_out)
+    into the main state. That is, combine previous keys with new keys:
+      new_state = { **state, **tool_out }.
+    This node should be wired as its own graph node, not as a transition function.
+    """
+    new_state = {**state, **tool_out}
+    return new_state

old_app_copy.py CHANGED Viewed

@@ -15,12 +15,12 @@ from langgraph.graph.message import add_messages
 from langchain.schema import HumanMessage, SystemMessage, AIMessage
 # Create a ToolNode that knows about your web_search function
 import json
-from state import AgentState
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-from tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools, audio_transcriber_tool, wikipedia_search_tool
 llm = ChatOpenAI(model_name="gpt-4o-mini")

 from langchain.schema import HumanMessage, SystemMessage, AIMessage
 # Create a ToolNode that knows about your web_search function
 import json
+from old2state import AgentState
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+from old2tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools, audio_transcriber_tool, wikipedia_search_tool
 llm = ChatOpenAI(model_name="gpt-4o-mini")

state.py CHANGED Viewed

@@ -1,22 +1,23 @@
-from typing_extensions import TypedDict
-from typing import Annotated
-from langgraph.graph.message import add_messages
-class AgentState(TypedDict, total=False):
-    messages: Annotated[list, add_messages]
-    web_search_query: str
-    ocr_path: str
-    excel_path: str
-    excel_sheet_name: str
-    web_search_result: str
-    ocr_result: str
-    excel_result: str
-    final_answer: str
-    user_input: str
-    audio_path: str
-    transcript: str
-    audio_transcript: str
-    wiki_query: str
-    wiki_result: str
-    task_id: str
-    tool_counter: int

+from dataclasses import dataclass, field
+from typing import List, Dict, Any, Optional
+import json
+from dataclasses import dataclass, field, asdict
+from langchain.schema import SystemMessage, HumanMessage, AIMessage, BaseMessage
+@dataclass
+class AgentState:
+    """Single source‑of‑truth context for one user query run."""
+    user_question: str
+    task_id: Optional[str] = None
+    messages: List[BaseMessage] = field(default_factory=list)
+    next_action: Optional[str] = None      # wiki | ocr | audio | final
+    query: Optional[str] = None            # wiki search term
+    tool_calls: int = 0
+    final_answer: Optional[str] = None
+    def add(self, *msgs: BaseMessage):
+        self.messages.extend(msgs)

tools.py CHANGED Viewed

@@ -1,14 +1,12 @@
 # tools.py
 import pandas as pd
-# from langchain_community.tools import DuckDuckGoSearchRun
 from pathlib import Path
-# from PIL import Image
-# import pytesseract
-from state import AgentState
-from langchain.schema import HumanMessage
 import regex as re
 import time
 from duckduckgo_search import DDGS
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -39,83 +37,20 @@ def _download_file_for_task(task_id: str, ext: str) -> str:
     # If we get here, either 404 or download error
     return ""
-def web_search_tool(state: AgentState) -> AgentState:
-    """
-    Expects: state["web_search_query"] is a non‐empty string.
-    Returns: {"web_search_query": None, "web_search_result": <string>}.
-    Retries up to 5 times on either a DuckDuckGo “202 Ratelimit” response or any exception (e.g. timeout).
-    """
-    print("reached web_search_tool")
-    query = state.get("web_search_query", "")
-    if not query:
-        return {}  # nothing to do
-    ddg = DDGS()
-    max_retries = 5
-    result_text = ""
-    for attempt in range(1, max_retries + 1):
-        try:
-            result_text = str(ddg.text(query, max_results=5))
-        except Exception as e:
-            # Network error or timeout—retry up to max_retries
-            if attempt < max_retries:
-                print(f"web_search_tool: exception '{e}', retrying in 4 seconds ({attempt}/{max_retries})")
-                time.sleep(4)
-                continue
-            else:
-                # Final attempt failed
-                return {
-                    "web_search_query": None,
-                    "web_search_result": f"Error during DuckDuckGo search: {e}"
-                }
-        # Check for DuckDuckGo rate‐limit indicator
-        if "202 Ratelimit" in result_text:
-            if attempt < max_retries:
-                print(f"web_search_tool: received '202 Ratelimit', retrying in 4 seconds ({attempt}/{max_retries})")
-                time.sleep(4)
-                continue
-            else:
-                # Final attempt still rate‐limited
-                break
-        # Successful response (no exception and no rate‐limit text)
-        break
-    return {
-        "web_search_query": None,
-        "web_search_result": result_text
-    }
-def ocr_image_tool(state: AgentState) -> AgentState:
     """
     Expects: state["ocr_path"] is either:
       • a local image path (e.g. "./hf_files/abc.png"), OR
       • a Task ID (e.g. "abc123"), in which case we try downloading
         GET {DEFAULT_API_URL}/files/{task_id} with .png/.jpg/.jpeg extensions.
-    Returns:
-      {
-        "ocr_path": None,
-        "ocr_result": "<OCR text + brief caption or an error message>"
-      }
     """
     print("reached ocr_image_tool")
-    path_or_id = state.get("ocr_path", "")
-    # if not path_or_id:
-    #     return {}
-    # 1) Determine local_img: either existing path_or_id or download by Task ID
-    # local_img = ""
-    # if os.path.exists(path_or_id):
-    #     local_img = path_or_id
-    # else:
     for ext in ("png", "jpg", "jpeg"):
-            candidate = _download_file_for_task(state.get("task_id"), ext)
             if candidate:
                 local_img = candidate
                 break
@@ -188,105 +123,39 @@ def ocr_image_tool(state: AgentState) -> AgentState:
     # 6) Combine OCR + caption
     combined = f"OCR text:\n{ocr_text}\n\nImage caption:\n{caption}"
     print("combined: ")
-    return {
-        "ocr_path": None,
-        "ocr_result": combined
-    }
-def parse_excel_tool(state: AgentState) -> AgentState:
     """
-    Expects state["excel_path"] to be either:
-      • A real local .xlsx path, or
-      • A Task ID string (e.g. "abc123"), in which case we GET /files/abc123.xlsx.
-    Returns:
-      {
-        "excel_path": None,
-        "excel_sheet_name": None,
-        "excel_result": "<stringified records or Markdown table>"
-      }
-    Always attempts to download the file for the given path or task ID.
     """
-    print("reached parse_excel_tool")
-    local_xlsx = _download_file_for_task(state.get("task_id"), "xlsx")
-    path_or_id = state.get("excel_path", "")
-    sheet = state.get("excel_sheet_name", "")
-    if not path_or_id:
-        return {}
-    # Always attempt to download the file, regardless of local existence
-    # If we finally have a real file, read it
-    if local_xlsx and os.path.exists(local_xlsx):
-        try:
-            print("reached excel file found")
-            xls = pd.ExcelFile(local_xlsx)
-            if sheet and sheet in xls.sheet_names:
-                df = pd.read_excel(xls, sheet_name=sheet)
-            else:
-                df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
-            records = df.to_dict(orient="records")
-            text = str(records)
-            print("reached excel file found: ")
-            print(text)
-            print()
-            return {
-                "excel_path": None,
-                "excel_sheet_name": None,
-                "excel_result": text
-            }
-        except Exception as e:
-            print(f">>> parse_excel_tool: Error reading Excel file {local_xlsx}: {e}")
-            # Fall back to scanning for Markdown below
-    # Fallback: scan any HumanMessage for a Markdown‐style table
-    messages = state.get("messages", [])
-    table_lines = []
-    collecting = False
-    for msg in messages:
-        if isinstance(msg, HumanMessage):
-            for line in msg.content.splitlines():
-                if re.match(r"^\s*\|\s*[-A-Za-z0-9]", line):
-                    collecting = True
-                if collecting:
-                    if not re.match(r"^\s*\|", line):
-                        collecting = False
-                        break
-                    table_lines.append(line)
-            if table_lines:
-                break
-    if not table_lines:
-        return {
-            "excel_path": None,
-            "excel_sheet_name": None,
-            "excel_result": "Error: No Excel file found and no Markdown table detected in prompt."
-        }
-    clean_rows = [row for row in table_lines if not re.match(r"^\s*\|\s*-+", row)]
-    table_block = "\n".join(clean_rows).strip()
-    print(f"Parsed excel as excel_result: {table_block}")
-    return {
-        "excel_path": None,
-        "excel_sheet_name": None,
-        "excel_result": table_block
-    }
-import os
-import os
 import openai
-from state import AgentState
-def audio_transcriber_tool(state: AgentState) -> AgentState:
     """
     LangGraph tool for transcribing audio via OpenAI's Whisper API.
     Expects: state["audio_path"] to be either:
@@ -301,23 +170,21 @@ def audio_transcriber_tool(state: AgentState) -> AgentState:
     Always attempts to download the file for the given path or task ID.
     """
     print("reached audio_transcriber_tool")
-    path_or_id = state.get("audio_path", "")
-    if not path_or_id:
-        return {}
     # Always attempt to download the file, regardless of local existence
     local_audio = ""
     for ext in ("mp3", "wav", "m4a"):
-        candidate = _download_file_for_task(state.get("task_id"), ext)
         if candidate:
             local_audio = candidate
             break
     if not local_audio or not os.path.exists(local_audio):
-        return {
-            "audio_path": None,
-            "transcript": "Error: No audio file found (download failed)."
-        }
     # Send to OpenAI Whisper
     try:
@@ -336,17 +203,13 @@ def audio_transcriber_tool(state: AgentState) -> AgentState:
     except Exception as e:
         text = f"Error during transcription: {e}"
     print(f"Transcripted as transcript: {text}")
-    return {
-        "audio_path": None,
-        "transcript": text
-    }
 # tools.py
 import re
 import requests
-from state import AgentState
-def wikipedia_search_tool(state: AgentState) -> AgentState:
     """
     LangGraph wrapper for searching Wikipedia.
     Expects: state["wiki_query"] to be a non‐empty string.
@@ -358,7 +221,7 @@ def wikipedia_search_tool(state: AgentState) -> AgentState:
     If no valid wiki_query is provided, returns {}.
     """
     print("reached wikipedia search tool")
-    query = state.get("wiki_query", "").strip()
     if not query:
         return {}
@@ -397,26 +260,63 @@ def wikipedia_search_tool(state: AgentState) -> AgentState:
         if not summary_text:
             summary_text = summary_data.get("description", "No summary available.")
-        return {
-            "wiki_query": None,
-            "wiki_result": f"Title: {first_title}\n\n{summary_text}"
-        }
     except requests.exceptions.RequestException as e:
-        return {"wiki_query": None, "wiki_result": f"Wikipedia search error: {e}"}
     except Exception as e:
-        return {"wiki_query": None, "wiki_result": f"Unexpected error in wikipedia_search_tool: {e}"}
-def run_tools(state: AgentState, tool_out: AgentState) -> AgentState:
-    """
-    Merges whatever partial state the tool wrapper returned (tool_out)
-    into the main state. That is, combine previous keys with new keys:
-      new_state = { **state, **tool_out }.
-    This node should be wired as its own graph node, not as a transition function.
-    """
-    new_state = {**state, **tool_out}
-    return new_state

 # tools.py
 import pandas as pd
 from pathlib import Path
 import regex as re
 import time
+import os
 from duckduckgo_search import DDGS
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
     # If we get here, either 404 or download error
     return ""
+def ocr_image_tool(args: dict) -> str:
     """
     Expects: state["ocr_path"] is either:
       • a local image path (e.g. "./hf_files/abc.png"), OR
       • a Task ID (e.g. "abc123"), in which case we try downloading
         GET {DEFAULT_API_URL}/files/{task_id} with .png/.jpg/.jpeg extensions.
+    Returns: "OCR text + brief caption or an error message"
     """
     print("reached ocr_image_tool")
+    # path_or_id = state.get("ocr_path", "")
     for ext in ("png", "jpg", "jpeg"):
+            candidate = _download_file_for_task(args["task_id"], ext)
             if candidate:
                 local_img = candidate
                 break
     # 6) Combine OCR + caption
     combined = f"OCR text:\n{ocr_text}\n\nImage caption:\n{caption}"
     print("combined: ")
+    return combined
+def parse_excel_tool(args: dict) -> str:
     """
+    Downloads <task_id>.xlsx (if any) and returns a stringified list of
+    records from the specified sheet.  No fallback to user-supplied tables.
+    Expected keys in `args`:
+        • task_id            – required (used to download the file)
+        • excel_sheet_name   – optional sheet to load
+    returns: stringified list of records from the specified sheet
     """
+    task_id = args.get("task_id", "")
+    sheet   = args.get("excel_sheet_name", "")
+    local_xlsx = _download_file_for_task(task_id, "xlsx")
+    if not local_xlsx or not os.path.exists(local_xlsx):
+        return "Error: Excel file not found for this task."
+    try:
+        xls = pd.ExcelFile(local_xlsx)
+        df  = pd.read_excel(
+            xls,
+            sheet_name=sheet if sheet and sheet in xls.sheet_names else xls.sheet_names[0]
+        )
+        return str(df.to_dict(orient="records"))
+    except Exception as e:
+        return f"Error reading Excel file: {e}"
 import openai
+def audio_transcriber_tool(args: dict) -> str:
     """
     LangGraph tool for transcribing audio via OpenAI's Whisper API.
     Expects: state["audio_path"] to be either:
     Always attempts to download the file for the given path or task ID.
     """
     print("reached audio_transcriber_tool")
+    # path_or_id = state.get("audio_path", "")
+    # if not path_or_id:
+    #     return {}
     # Always attempt to download the file, regardless of local existence
     local_audio = ""
     for ext in ("mp3", "wav", "m4a"):
+        candidate = _download_file_for_task(args["task_id"], ext)
         if candidate:
             local_audio = candidate
             break
     if not local_audio or not os.path.exists(local_audio):
+        return "Error: No audio file found (download failed)."
     # Send to OpenAI Whisper
     try:
     except Exception as e:
         text = f"Error during transcription: {e}"
     print(f"Transcripted as transcript: {text}")
+    return text
 # tools.py
 import re
 import requests
+def wikipedia_search_tool(args: dict) -> str:
     """
     LangGraph wrapper for searching Wikipedia.
     Expects: state["wiki_query"] to be a non‐empty string.
     If no valid wiki_query is provided, returns {}.
     """
     print("reached wikipedia search tool")
+    query = args["wiki_query"]
     if not query:
         return {}
         if not summary_text:
             summary_text = summary_data.get("description", "No summary available.")
+        return f"Title: {first_title}\n\n{summary_text}"
     except requests.exceptions.RequestException as e:
+        return f"Wikipedia search error: {e}"
     except Exception as e:
+        return f"Unexpected error in wikipedia_search_tool: {e}"
+# def web_search_tool(state: AgentState) -> AgentState:
+#     """
+#     Expects: state["web_search_query"] is a non‐empty string.
+#     Returns: {"web_search_query": None, "web_search_result": <string>}.
+#     Retries up to 5 times on either a DuckDuckGo “202 Ratelimit” response or any exception (e.g. timeout).
+#     """
+#     print("reached web_search_tool")
+#     query = state.get("web_search_query", "")
+#     if not query:
+#         return {}  # nothing to do
+#     ddg = DDGS()
+#     max_retries = 5
+#     result_text = ""
+#     for attempt in range(1, max_retries + 1):
+#         try:
+#             result_text = str(ddg.text(query, max_results=5))
+#         except Exception as e:
+#             # Network error or timeout—retry up to max_retries
+#             if attempt < max_retries:
+#                 print(f"web_search_tool: exception '{e}', retrying in 4 seconds ({attempt}/{max_retries})")
+#                 time.sleep(4)
+#                 continue
+#             else:
+#                 # Final attempt failed
+#                 return {
+#                     "web_search_query": None,
+#                     "web_search_result": f"Error during DuckDuckGo search: {e}"
+#                 }
+#         # Check for DuckDuckGo rate‐limit indicator
+#         if "202 Ratelimit" in result_text:
+#             if attempt < max_retries:
+#                 print(f"web_search_tool: received '202 Ratelimit', retrying in 4 seconds ({attempt}/{max_retries})")
+#                 time.sleep(4)
+#                 continue
+#             else:
+#                 # Final attempt still rate‐limited
+#                 break
+#         # Successful response (no exception and no rate‐limit text)
+#         break
+#     return {
+#         "web_search_query": None,
+#         "web_search_result": result_text
+#     }