New_Final_Assignment

Sleeping

App Files Files Community

naman1102 commited on Jun 8

Commit

d3b49b4

1 Parent(s): e312936

new

Browse files

Files changed (7) hide show

agent.py +150 -0
app.py +23 -282
old2app.py → old/old2app.py +0 -0
old2state.py → old/old2state.py +0 -0
old2tools.py → old/old2tools.py +3 -3
old_app_copy.py → old/old_app_copy.py +0 -0
state.py +1 -0

agent.py ADDED Viewed

	@@ -0,0 +1,150 @@

+from __future__ import annotations
+import os
+from langchain_openai import ChatOpenAI
+from langgraph.graph import StateGraph, START, END
+from langchain.schema import HumanMessage, SystemMessage, AIMessage
+from state import AgentState
+from typing import Any, Dict, List, Optional
+import json
+# ─────────────────────────── External tools ──────────────────────────────
+from tools import (
+    wikipedia_search_tool,
+    ocr_image_tool,
+    audio_transcriber_tool,
+    parse_excel_tool,
+    analyze_code_tool
+)
+# ─────────────────────────── Configuration ───────────────────────────────
+LLM = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.3)
+MAX_TOOL_CALLS = 5
+# ─────────────────────────── Helper utilities ────────────────────────────
+# ─────────────────────────── Agent state ⬇ ───────────────────────────────
+# ───────────────────────────── Nodes  ⬇ ───────────────────────────────────
+# ------------- tool adapters -------------
+def wiki_tool(state: AgentState) -> AgentState:
+    out = wikipedia_search_tool({"wiki_query": state.query or ""})
+    state.tool_calls += 1
+    state.add(SystemMessage(content=f"WIKI_TOOL_OUT: {out}"))
+    state.next_action = None
+    return state
+def ocr_tool(state: AgentState) -> AgentState:
+    out = ocr_image_tool({"task_id": state.task_id, "ocr_path": ""})
+    state.tool_calls += 1
+    state.add(SystemMessage(content=f"OCR_TOOL_OUT: {out}"))
+    state.next_action = None
+    return state
+def audio_tool(state: AgentState) -> AgentState:
+    out = audio_transcriber_tool({"task_id": state.task_id, "audio_path": ""})
+    state.tool_calls += 1
+    state.add(SystemMessage(content=f"AUDIO_TOOL_OUT: {out}"))
+    state.next_action = None
+    return state
+def excel_tool(state: AgentState) -> AgentState:
+    result = parse_excel_tool({
+        "task_id": state.task_id,
+        "excel_sheet_name": ""
+    })
+    out = {"excel_result": result}
+    state.tool_calls += 1
+    state.add(SystemMessage(content=f"EXCEL_TOOL_OUT: {out}"))
+    state.next_action = None
+    return state
+def code_tool(state: AgentState) -> AgentState:
+    if state.snippet:
+        out = {"analysis": analyze_code_tool({
+            "task_id": state.task_id,
+            "snippet": state.snippet,
+        })}
+    else:
+        out = {"analysis": analyze_code_tool({
+            "task_id": state.task_id,
+            "snippet": ""
+        })}
+    state.tool_calls += 1
+    state.add(SystemMessage(content=f"CODE_TOOL_OUT: {out}"))
+    state.next_action = None
+    return state
+# ------------- final answer -------------
+def final_node(state: AgentState) -> AgentState:
+    print("reached final node")
+    wrap = SystemMessage(
+        content="Using everything so far, reply ONLY with {'final_answer':'…'}. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. \n"
+        "reply **only** with "
+        "{\"final_answer\":\"…\"} (no markdown, no commentary)."
+    )
+    raw = LLM.invoke(state.messages + [wrap]).content.strip()
+    # print("raw : ", raw)
+    state.add(AIMessage(content=raw))
+    parsed = safe_json(raw)
+    # print("parsed : ", parsed, "type : ", type(parsed))
+    state.final_answer = parsed.get("final_answer") if parsed else "Unable to parse final answer."
+    # print("state.final_answer : ", state.final_answer)
+    return state
+# ─────────────────────────── Graph wiring ───────────────────────────────
+def build_graph():
+    graph = StateGraph(AgentState)
+    # Register nodes
+    for name, fn in [
+        ("tool_selector", tool_selector),
+        ("wiki_tool", wiki_tool),
+        ("ocr_tool", ocr_tool),
+        ("audio_tool", audio_tool),
+        ("excel_tool", excel_tool),
+        ("code_tool", code_tool),
+        ("final_node", final_node),
+    ]:
+        graph.add_node(name, fn)
+    # Edges
+    graph.add_edge(START, "tool_selector")
+    def dispatch(state: AgentState) -> str:
+        return {
+            "wiki": "wiki_tool",
+            "ocr": "ocr_tool",
+            "audio": "audio_tool",
+            "excel": "excel_tool",
+            "code": "code_tool",
+            "final": "final_node",
+        }.get(state.next_action, "final_node")
+    graph.add_conditional_edges(
+        "tool_selector",
+        dispatch,
+        {
+            "wiki_tool": "wiki_tool",
+            "ocr_tool": "ocr_tool",
+            "audio_tool": "audio_tool",
+            "excel_tool": "excel_tool",
+            "code_tool": "code_tool",
+            "final_node": "final_node",
+        },
+    )
+    # tools loop back to selector
+    for tool_name in ("wiki_tool", "ocr_tool", "audio_tool", "excel_tool", "code_tool"):
+        graph.add_edge(tool_name, "tool_selector")
+    # final_answer → END
+    graph.add_edge("final_node", END)
+    return graph

app.py CHANGED Viewed

@@ -3,298 +3,39 @@ import os
 import gradio as gr
 import requests
 import pandas as pd
-from langchain_openai import ChatOpenAI
-from langgraph.graph import StateGraph, START, END
-from langchain.schema import HumanMessage, SystemMessage, AIMessage
-# Create a ToolNode that knows about your web_search function
-import json
 from state import AgentState
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-import json
-from typing import Any, Dict, List, Optional
-# ─────────────────────────── External tools ──────────────────────────────
-from tools import (
-    wikipedia_search_tool,
-    ocr_image_tool,
-    audio_transcriber_tool,
-    parse_excel_tool,
-    analyze_code_tool
-)
-# ─────────────────────────── Configuration ───────────────────────────────
-LLM = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.3)
-MAX_TOOL_CALLS = 5
-# ─────────────────────────── Helper utilities ────────────────────────────
-def safe_json(text: str) -> Optional[Dict[str, Any]]:
-    """Parse the *first* mapping‑literal in `text`.
-    • Accepts **strict JSON** or Python‑style single‑quoted dicts.
-    • Ignores markdown fences / leading commentary.
-    """
-    import re, json, ast
-    # Strip ``` fences if any
-    if text.strip().startswith("```"):
-        text = re.split(r"```+", text.strip(), maxsplit=2)[1]
-    # Find the first {...}
-    brace, start = 0, None
-    for i, ch in enumerate(text):
-        if ch == '{':
-            if brace == 0:
-                start = i
-            brace += 1
-        elif ch == '}' and brace:
-            brace -= 1
-            if brace == 0 and start is not None:
-                candidate = text[start:i+1]
-                # First try strict JSON
-                try:
-                    return json.loads(candidate)
-                except json.JSONDecodeError:
-                    # Fallback: Python literal (handles single quotes)
-                    try:
-                        obj = ast.literal_eval(candidate)
-                        return obj if isinstance(obj, dict) else None
-                    except Exception:
-                        return None
-    return None
-# def brief(d: Dict[str, Any]) -> str:
-#     for k in ("wiki_result", "ocr_result", "transcript"):
-#         if k in d:
-#             return f"{k}: {str(d[k])[:160].replace('\n', ' ')}…"
-#     return "(no output)"
-# ─────────────────────────── Agent state ⬇ ───────────────────────────────
-# ───────────────────────────── Nodes  ⬇ ───────────────────────────────────
-def tool_selector(state: AgentState) -> AgentState:
-    """Ask the LLM what to do next (wiki / ocr / audio / excel / final)."""
-    if state.tool_calls >= MAX_TOOL_CALLS:
-        state.add(SystemMessage(content="You have reached the maximum number of tool calls. Use the already gathered information to answer the question."))
-        state.next_action = "final"
-        return state
-    prompt = SystemMessage(
-        content=(
-            "if the tool you want isnt listed below, return {'action':'final'} \n"
-            "Use wiki if you need to search online for information. Keep the query short and concise and accurate. The query should not be a prompt but instad you should search for the relevant information rather than asking for the answer directly.\n"
-            "If the question is about any image, you have to use ocr tool. It will tell you about the image also\n"
-            "Use audio if the question is about an audio file\n"
-            "Use excel if the question is about an excel file\n"
-            "Use code if the question is about a code file, or if you want to run your own code\n"
-            "Reply with ONE JSON only (no markdown). Choices:\n"
-            "  {'action':'wiki','query':'…'}\n"
-            "  {'action':'ocr'}\n"
-            "  {'action':'audio'}\n"
-            "  {'action':'excel'}\n"
-            "  {'action':'code', 'snippet':'<python code>'}\n"
-            "  {'action':'code'}\n"
-            "  {'action':'final'}\n"
-        )
-    )
-    raw = LLM.invoke(state.messages + [prompt]).content.strip()
-    print(f"Tool selector response: {raw}")
-    state.add(AIMessage(content=raw))
-    parsed = safe_json(raw)
-    # parsed = json.loads(raw)
-    # print("parsed : ", parsed)
-    # print(f"Parsed: {parsed},  type: {type(parsed)}")
-    if not parsed or "action" not in parsed:
-        state.next_action = "final"
-        return state
-    # print("reached here")
-    state.next_action = parsed["action"]
-    state.query = parsed.get("query")
-    return state
-# ------------- tool adapters -------------
-def wiki_tool(state: AgentState) -> AgentState:
-    out = wikipedia_search_tool({"wiki_query": state.query or ""})
-    state.tool_calls += 1
-    state.add(SystemMessage(content=f"WIKI_TOOL_OUT: {out}"))
-    state.next_action = None
-    return state
-def ocr_tool(state: AgentState) -> AgentState:
-    out = ocr_image_tool({"task_id": state.task_id, "ocr_path": ""})
-    state.tool_calls += 1
-    state.add(SystemMessage(content=f"OCR_TOOL_OUT: {out}"))
-    state.next_action = None
-    return state
-def audio_tool(state: AgentState) -> AgentState:
-    out = audio_transcriber_tool({"task_id": state.task_id, "audio_path": ""})
-    state.tool_calls += 1
-    state.add(SystemMessage(content=f"AUDIO_TOOL_OUT: {out}"))
-    state.next_action = None
-    return state
-def excel_tool(state: AgentState) -> AgentState:
-    result = parse_excel_tool({
-        "task_id": state.task_id,
-        "excel_sheet_name": ""
-    })
-    out = {"excel_result": result}
-    state.tool_calls += 1
-    state.add(SystemMessage(content=f"EXCEL_TOOL_OUT: {out}"))
-    state.next_action = None
-    return state
-def code_tool(state: AgentState) -> AgentState:
-    if state.snippet:
-        out = {"analysis": analyze_code_tool({
-            "task_id": state.task_id,
-            "snippet": state.snippet,
-        })}
-    else:
-        out = {"analysis": analyze_code_tool({
-            "task_id": state.task_id,
-            "snippet": ""
-        })}
-    state.tool_calls += 1
-    state.add(SystemMessage(content=f"CODE_TOOL_OUT: {out}"))
-    state.next_action = None
-    return state
-# ------------- final answer -------------
-def final_node(state: AgentState) -> AgentState:
-    print("reached final node")
-    wrap = SystemMessage(
-        content="Using everything so far, reply ONLY with {'final_answer':'…'}. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. \n"
-        "reply **only** with "
-        "{\"final_answer\":\"…\"} (no markdown, no commentary)."
-    )
-    raw = LLM.invoke(state.messages + [wrap]).content.strip()
-    # print("raw : ", raw)
-    state.add(AIMessage(content=raw))
-    parsed = safe_json(raw)
-    # print("parsed : ", parsed, "type : ", type(parsed))
-    state.final_answer = parsed.get("final_answer") if parsed else "Unable to parse final answer."
-    # print("state.final_answer : ", state.final_answer)
-    return state
-# ─────────────────────────── Graph wiring ───────────────────────────────
-graph = StateGraph(AgentState)
-# Register nodes
-for name, fn in [
-    ("tool_selector", tool_selector),
-    ("wiki_tool", wiki_tool),
-    ("ocr_tool", ocr_tool),
-    ("audio_tool", audio_tool),
-    ("excel_tool", excel_tool),
-    ("code_tool", code_tool),
-    ("final_node", final_node),
-]:
-    graph.add_node(name, fn)
-# Edges
-graph.add_edge(START, "tool_selector")
-def dispatch(state: AgentState) -> str:
-    return {
-        "wiki": "wiki_tool",
-        "ocr": "ocr_tool",
-        "audio": "audio_tool",
-        "excel": "excel_tool",
-        "code": "code_tool",
-        "final": "final_node",
-    }.get(state.next_action, "final_node")
-graph.add_conditional_edges(
-    "tool_selector",
-    dispatch,
-    {
-        "wiki_tool": "wiki_tool",
-        "ocr_tool": "ocr_tool",
-        "audio_tool": "audio_tool",
-        "excel_tool": "excel_tool",
-        "code_tool": "code_tool",
-        "final_node": "final_node",
-    },
-)
-# tools loop back to selector
-for tool_name in ("wiki_tool", "ocr_tool", "audio_tool", "excel_tool", "code_tool"):
-    graph.add_edge(tool_name, "tool_selector")
-# final_answer → END
-graph.add_edge("final_node", END)
-compiled_graph = graph.compile()
-# ─────────────────────────── Public API  ────────────────────────────────
-def answer(question: str, task_id: Optional[str] = None) -> str:
-    """Run the agent and return whatever FINAL_ANSWER the graph produces."""
-    init_state = AgentState(question, task_id)
-    init_state.add(SystemMessage(content="You are a helpful assistant."))
-    init_state.add(HumanMessage(content=question))
-    # IMPORTANT: invoke() returns a **new** state instance (or an AddableValuesDict),
-    # not the object we pass in.  Use the returned value to fetch final_answer.
-    out_state = compiled_graph.invoke(init_state)
-    if isinstance(out_state, dict):        # AddableValuesDict behaves like a dict
-        return out_state.get("final_answer", "No answer.")
-    else:                                  # If future versions return the dataclass
-        return getattr(out_state, "final_answer", "No answer.")
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
-    def __call__(self, question: str, task_id) -> str:
-        # print(f"Agent received question (first 50 chars): {question[:50]}...")
-        # fixed_answer = "This is a default answer."
-        # print(f"Agent returning fixed answer: {fixed_answer}")
-        print()
-        print()
-        print()
-        print()
-        print(f"Agent received question: {question}")
-        print()
-        return answer(question, task_id)
-        # return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):

 import gradio as gr
 import requests
 import pandas as pd
+from langchain.schema import HumanMessage, SystemMessage
+from typing import Optional
+from agent import build_graph
 from state import AgentState
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
+        graph = build_graph()
+        self.compiled_graph = graph.compile()
+    def __call__(self, question: str, task_id: Optional[str] = None) -> str:
+        """Run the agent and return whatever FINAL_ANSWER the graph produces."""
+        print(f"Agent received question: {question}")
+        # The user_question argument for AgentState is the question.
+        init_state = AgentState(user_question=question, task_id=task_id)
+        init_state.add(SystemMessage(content="You are a helpful assistant."))
+        init_state.add(HumanMessage(content=question))
+        # IMPORTANT: invoke() returns a **new** state instance (or an AddableValuesDict),
+        # not the object we pass in.  Use the returned value to fetch final_answer.
+        out_state = self.compiled_graph.invoke(init_state)
+        if isinstance(out_state, dict):        # AddableValuesDict behaves like a dict
+            return out_state.get("final_answer", "No answer.")
+        else:                                  # If future versions return the dataclass
+            return getattr(out_state, "final_answer", "No answer.")
 def run_and_submit_all( profile: gr.OAuthProfile | None):

old2app.py → old/old2app.py RENAMED Viewed

File without changes

old2state.py → old/old2state.py RENAMED Viewed

File without changes

old2tools.py → old/old2tools.py RENAMED Viewed

@@ -5,7 +5,7 @@ import pandas as pd
 from pathlib import Path
 # from PIL import Image
 # import pytesseract
-from old2state import AgentState
 from langchain.schema import HumanMessage
 import regex as re
 import time
@@ -284,7 +284,7 @@ import os
 import os
 import openai
-from old2state import AgentState
 def audio_transcriber_tool(state: AgentState) -> AgentState:
     """
@@ -344,7 +344,7 @@ def audio_transcriber_tool(state: AgentState) -> AgentState:
 import re
 import requests
-from old2state import AgentState
 def wikipedia_search_tool(state: AgentState) -> AgentState:
     """

 from pathlib import Path
 # from PIL import Image
 # import pytesseract
+from old.old2state import AgentState
 from langchain.schema import HumanMessage
 import regex as re
 import time
 import os
 import openai
+from old.old2state import AgentState
 def audio_transcriber_tool(state: AgentState) -> AgentState:
     """
 import re
 import requests
+from old.old2state import AgentState
 def wikipedia_search_tool(state: AgentState) -> AgentState:
     """

old_app_copy.py → old/old_app_copy.py RENAMED Viewed

File without changes

state.py CHANGED Viewed

@@ -15,6 +15,7 @@ class AgentState:
     next_action: Optional[str] = None      # wiki | ocr | audio | final
     query: Optional[str] = None            # wiki search term
     tool_calls: int = 0
     final_answer: Optional[str] = None

     next_action: Optional[str] = None      # wiki | ocr | audio | final
     query: Optional[str] = None            # wiki search term
+    snippet: Optional[str] = None          # code snippet
     tool_calls: int = 0
     final_answer: Optional[str] = None