naman1102 commited on
Commit
d3b49b4
Β·
1 Parent(s): e312936
agent.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import os
3
+ from langchain_openai import ChatOpenAI
4
+ from langgraph.graph import StateGraph, START, END
5
+ from langchain.schema import HumanMessage, SystemMessage, AIMessage
6
+ from state import AgentState
7
+ from typing import Any, Dict, List, Optional
8
+ import json
9
+
10
+
11
+ # ─────────────────────────── External tools ──────────────────────────────
12
+ from tools import (
13
+ wikipedia_search_tool,
14
+ ocr_image_tool,
15
+ audio_transcriber_tool,
16
+ parse_excel_tool,
17
+ analyze_code_tool
18
+ )
19
+
20
+ # ─────────────────────────── Configuration ───────────────────────────────
21
+ LLM = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.3)
22
+ MAX_TOOL_CALLS = 5
23
+
24
+ # ─────────────────────────── Helper utilities ────────────────────────────
25
+
26
+ # ─────────────────────────── Agent state ⬇ ───────────────────────────────
27
+
28
+ # ───────────────────────────── Nodes ⬇ ───────────────────────────────────
29
+
30
+ # ------------- tool adapters -------------
31
+
32
+ def wiki_tool(state: AgentState) -> AgentState:
33
+ out = wikipedia_search_tool({"wiki_query": state.query or ""})
34
+ state.tool_calls += 1
35
+ state.add(SystemMessage(content=f"WIKI_TOOL_OUT: {out}"))
36
+ state.next_action = None
37
+ return state
38
+
39
+
40
+ def ocr_tool(state: AgentState) -> AgentState:
41
+ out = ocr_image_tool({"task_id": state.task_id, "ocr_path": ""})
42
+ state.tool_calls += 1
43
+ state.add(SystemMessage(content=f"OCR_TOOL_OUT: {out}"))
44
+ state.next_action = None
45
+ return state
46
+
47
+
48
+ def audio_tool(state: AgentState) -> AgentState:
49
+ out = audio_transcriber_tool({"task_id": state.task_id, "audio_path": ""})
50
+ state.tool_calls += 1
51
+ state.add(SystemMessage(content=f"AUDIO_TOOL_OUT: {out}"))
52
+ state.next_action = None
53
+ return state
54
+
55
+ def excel_tool(state: AgentState) -> AgentState:
56
+ result = parse_excel_tool({
57
+ "task_id": state.task_id,
58
+ "excel_sheet_name": ""
59
+ })
60
+ out = {"excel_result": result}
61
+ state.tool_calls += 1
62
+ state.add(SystemMessage(content=f"EXCEL_TOOL_OUT: {out}"))
63
+ state.next_action = None
64
+ return state
65
+
66
+ def code_tool(state: AgentState) -> AgentState:
67
+ if state.snippet:
68
+ out = {"analysis": analyze_code_tool({
69
+ "task_id": state.task_id,
70
+ "snippet": state.snippet,
71
+ })}
72
+ else:
73
+ out = {"analysis": analyze_code_tool({
74
+ "task_id": state.task_id,
75
+ "snippet": ""
76
+ })}
77
+ state.tool_calls += 1
78
+ state.add(SystemMessage(content=f"CODE_TOOL_OUT: {out}"))
79
+ state.next_action = None
80
+ return state
81
+
82
+ # ------------- final answer -------------
83
+
84
+ def final_node(state: AgentState) -> AgentState:
85
+ print("reached final node")
86
+ wrap = SystemMessage(
87
+ content="Using everything so far, reply ONLY with {'final_answer':'…'}. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. \n"
88
+ "reply **only** with "
89
+ "{\"final_answer\":\"…\"} (no markdown, no commentary)."
90
+ )
91
+ raw = LLM.invoke(state.messages + [wrap]).content.strip()
92
+ # print("raw : ", raw)
93
+ state.add(AIMessage(content=raw))
94
+ parsed = safe_json(raw)
95
+ # print("parsed : ", parsed, "type : ", type(parsed))
96
+ state.final_answer = parsed.get("final_answer") if parsed else "Unable to parse final answer."
97
+ # print("state.final_answer : ", state.final_answer)
98
+ return state
99
+
100
+ # ─────────────────────────── Graph wiring ───────────────────────────────
101
+
102
+ def build_graph():
103
+ graph = StateGraph(AgentState)
104
+
105
+ # Register nodes
106
+ for name, fn in [
107
+ ("tool_selector", tool_selector),
108
+ ("wiki_tool", wiki_tool),
109
+ ("ocr_tool", ocr_tool),
110
+ ("audio_tool", audio_tool),
111
+ ("excel_tool", excel_tool),
112
+ ("code_tool", code_tool),
113
+ ("final_node", final_node),
114
+ ]:
115
+ graph.add_node(name, fn)
116
+
117
+ # Edges
118
+ graph.add_edge(START, "tool_selector")
119
+
120
+ def dispatch(state: AgentState) -> str:
121
+ return {
122
+ "wiki": "wiki_tool",
123
+ "ocr": "ocr_tool",
124
+ "audio": "audio_tool",
125
+ "excel": "excel_tool",
126
+ "code": "code_tool",
127
+ "final": "final_node",
128
+ }.get(state.next_action, "final_node")
129
+
130
+ graph.add_conditional_edges(
131
+ "tool_selector",
132
+ dispatch,
133
+ {
134
+ "wiki_tool": "wiki_tool",
135
+ "ocr_tool": "ocr_tool",
136
+ "audio_tool": "audio_tool",
137
+ "excel_tool": "excel_tool",
138
+ "code_tool": "code_tool",
139
+ "final_node": "final_node",
140
+ },
141
+ )
142
+
143
+ # tools loop back to selector
144
+ for tool_name in ("wiki_tool", "ocr_tool", "audio_tool", "excel_tool", "code_tool"):
145
+ graph.add_edge(tool_name, "tool_selector")
146
+
147
+ # final_answer β†’ END
148
+ graph.add_edge("final_node", END)
149
+
150
+ return graph
app.py CHANGED
@@ -3,298 +3,39 @@ import os
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
- from langchain_openai import ChatOpenAI
7
- from langgraph.graph import StateGraph, START, END
8
- from langchain.schema import HumanMessage, SystemMessage, AIMessage
9
- # Create a ToolNode that knows about your web_search function
10
- import json
11
  from state import AgentState
12
 
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
 
17
-
18
- import json
19
-
20
- from typing import Any, Dict, List, Optional
21
-
22
-
23
- # ─────────────────────────── External tools ──────────────────────────────
24
- from tools import (
25
- wikipedia_search_tool,
26
- ocr_image_tool,
27
- audio_transcriber_tool,
28
- parse_excel_tool,
29
- analyze_code_tool
30
- )
31
-
32
- # ─────────────────────────── Configuration ───────────────────────────────
33
- LLM = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.3)
34
- MAX_TOOL_CALLS = 5
35
-
36
- # ─────────────────────────── Helper utilities ────────────────────────────
37
-
38
-
39
- def safe_json(text: str) -> Optional[Dict[str, Any]]:
40
- """Parse the *first* mapping‑literal in `text`.
41
-
42
- β€’ Accepts **strict JSON** or Python‑style single‑quoted dicts.
43
- β€’ Ignores markdown fences / leading commentary.
44
- """
45
- import re, json, ast
46
-
47
- # Strip ``` fences if any
48
- if text.strip().startswith("```"):
49
- text = re.split(r"```+", text.strip(), maxsplit=2)[1]
50
-
51
- # Find the first {...}
52
- brace, start = 0, None
53
- for i, ch in enumerate(text):
54
- if ch == '{':
55
- if brace == 0:
56
- start = i
57
- brace += 1
58
- elif ch == '}' and brace:
59
- brace -= 1
60
- if brace == 0 and start is not None:
61
- candidate = text[start:i+1]
62
- # First try strict JSON
63
- try:
64
- return json.loads(candidate)
65
- except json.JSONDecodeError:
66
- # Fallback: Python literal (handles single quotes)
67
- try:
68
- obj = ast.literal_eval(candidate)
69
- return obj if isinstance(obj, dict) else None
70
- except Exception:
71
- return None
72
- return None
73
-
74
-
75
- # def brief(d: Dict[str, Any]) -> str:
76
- # for k in ("wiki_result", "ocr_result", "transcript"):
77
- # if k in d:
78
- # return f"{k}: {str(d[k])[:160].replace('\n', ' ')}…"
79
- # return "(no output)"
80
-
81
- # ─────────────────────────── Agent state ⬇ ───────────────────────────────
82
-
83
-
84
-
85
- # ───────────────────────────── Nodes ⬇ ───────────────────────────────────
86
-
87
- def tool_selector(state: AgentState) -> AgentState:
88
- """Ask the LLM what to do next (wiki / ocr / audio / excel / final)."""
89
- if state.tool_calls >= MAX_TOOL_CALLS:
90
- state.add(SystemMessage(content="You have reached the maximum number of tool calls. Use the already gathered information to answer the question."))
91
- state.next_action = "final"
92
- return state
93
-
94
- prompt = SystemMessage(
95
- content=(
96
- "if the tool you want isnt listed below, return {'action':'final'} \n"
97
- "Use wiki if you need to search online for information. Keep the query short and concise and accurate. The query should not be a prompt but instad you should search for the relevant information rather than asking for the answer directly.\n"
98
- "If the question is about any image, you have to use ocr tool. It will tell you about the image also\n"
99
- "Use audio if the question is about an audio file\n"
100
- "Use excel if the question is about an excel file\n"
101
- "Use code if the question is about a code file, or if you want to run your own code\n"
102
- "Reply with ONE JSON only (no markdown). Choices:\n"
103
- " {'action':'wiki','query':'…'}\n"
104
- " {'action':'ocr'}\n"
105
- " {'action':'audio'}\n"
106
- " {'action':'excel'}\n"
107
- " {'action':'code', 'snippet':'<python code>'}\n"
108
- " {'action':'code'}\n"
109
- " {'action':'final'}\n"
110
-
111
-
112
- )
113
- )
114
- raw = LLM.invoke(state.messages + [prompt]).content.strip()
115
- print(f"Tool selector response: {raw}")
116
- state.add(AIMessage(content=raw))
117
- parsed = safe_json(raw)
118
- # parsed = json.loads(raw)
119
- # print("parsed : ", parsed)
120
- # print(f"Parsed: {parsed}, type: {type(parsed)}")
121
- if not parsed or "action" not in parsed:
122
- state.next_action = "final"
123
- return state
124
- # print("reached here")
125
- state.next_action = parsed["action"]
126
- state.query = parsed.get("query")
127
- return state
128
-
129
- # ------------- tool adapters -------------
130
-
131
- def wiki_tool(state: AgentState) -> AgentState:
132
- out = wikipedia_search_tool({"wiki_query": state.query or ""})
133
- state.tool_calls += 1
134
- state.add(SystemMessage(content=f"WIKI_TOOL_OUT: {out}"))
135
- state.next_action = None
136
- return state
137
-
138
-
139
- def ocr_tool(state: AgentState) -> AgentState:
140
- out = ocr_image_tool({"task_id": state.task_id, "ocr_path": ""})
141
- state.tool_calls += 1
142
- state.add(SystemMessage(content=f"OCR_TOOL_OUT: {out}"))
143
- state.next_action = None
144
- return state
145
-
146
-
147
- def audio_tool(state: AgentState) -> AgentState:
148
- out = audio_transcriber_tool({"task_id": state.task_id, "audio_path": ""})
149
- state.tool_calls += 1
150
- state.add(SystemMessage(content=f"AUDIO_TOOL_OUT: {out}"))
151
- state.next_action = None
152
- return state
153
-
154
- def excel_tool(state: AgentState) -> AgentState:
155
- result = parse_excel_tool({
156
- "task_id": state.task_id,
157
- "excel_sheet_name": ""
158
- })
159
- out = {"excel_result": result}
160
- state.tool_calls += 1
161
- state.add(SystemMessage(content=f"EXCEL_TOOL_OUT: {out}"))
162
- state.next_action = None
163
- return state
164
-
165
- def code_tool(state: AgentState) -> AgentState:
166
- if state.snippet:
167
- out = {"analysis": analyze_code_tool({
168
- "task_id": state.task_id,
169
- "snippet": state.snippet,
170
- })}
171
- else:
172
- out = {"analysis": analyze_code_tool({
173
- "task_id": state.task_id,
174
- "snippet": ""
175
- })}
176
- state.tool_calls += 1
177
- state.add(SystemMessage(content=f"CODE_TOOL_OUT: {out}"))
178
- state.next_action = None
179
- return state
180
-
181
- # ------------- final answer -------------
182
-
183
- def final_node(state: AgentState) -> AgentState:
184
- print("reached final node")
185
- wrap = SystemMessage(
186
- content="Using everything so far, reply ONLY with {'final_answer':'…'}. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. \n"
187
- "reply **only** with "
188
- "{\"final_answer\":\"…\"} (no markdown, no commentary)."
189
- )
190
- raw = LLM.invoke(state.messages + [wrap]).content.strip()
191
- # print("raw : ", raw)
192
- state.add(AIMessage(content=raw))
193
- parsed = safe_json(raw)
194
- # print("parsed : ", parsed, "type : ", type(parsed))
195
- state.final_answer = parsed.get("final_answer") if parsed else "Unable to parse final answer."
196
- # print("state.final_answer : ", state.final_answer)
197
- return state
198
-
199
- # ─────────────────────────── Graph wiring ───────────────────────────────
200
-
201
- graph = StateGraph(AgentState)
202
-
203
- # Register nodes
204
- for name, fn in [
205
- ("tool_selector", tool_selector),
206
- ("wiki_tool", wiki_tool),
207
- ("ocr_tool", ocr_tool),
208
- ("audio_tool", audio_tool),
209
- ("excel_tool", excel_tool),
210
- ("code_tool", code_tool),
211
- ("final_node", final_node),
212
- ]:
213
- graph.add_node(name, fn)
214
-
215
- # Edges
216
- graph.add_edge(START, "tool_selector")
217
-
218
- def dispatch(state: AgentState) -> str:
219
- return {
220
- "wiki": "wiki_tool",
221
- "ocr": "ocr_tool",
222
- "audio": "audio_tool",
223
- "excel": "excel_tool",
224
- "code": "code_tool",
225
- "final": "final_node",
226
- }.get(state.next_action, "final_node")
227
-
228
- graph.add_conditional_edges(
229
- "tool_selector",
230
- dispatch,
231
- {
232
- "wiki_tool": "wiki_tool",
233
- "ocr_tool": "ocr_tool",
234
- "audio_tool": "audio_tool",
235
- "excel_tool": "excel_tool",
236
- "code_tool": "code_tool",
237
- "final_node": "final_node",
238
- },
239
- )
240
-
241
- # tools loop back to selector
242
- for tool_name in ("wiki_tool", "ocr_tool", "audio_tool", "excel_tool", "code_tool"):
243
- graph.add_edge(tool_name, "tool_selector")
244
-
245
- # final_answer β†’ END
246
- graph.add_edge("final_node", END)
247
-
248
- compiled_graph = graph.compile()
249
-
250
- # ─────────────────────────── Public API ────────────────────────────────
251
-
252
- def answer(question: str, task_id: Optional[str] = None) -> str:
253
- """Run the agent and return whatever FINAL_ANSWER the graph produces."""
254
- init_state = AgentState(question, task_id)
255
- init_state.add(SystemMessage(content="You are a helpful assistant."))
256
- init_state.add(HumanMessage(content=question))
257
-
258
- # IMPORTANT: invoke() returns a **new** state instance (or an AddableValuesDict),
259
- # not the object we pass in. Use the returned value to fetch final_answer.
260
- out_state = compiled_graph.invoke(init_state)
261
-
262
- if isinstance(out_state, dict): # AddableValuesDict behaves like a dict
263
- return out_state.get("final_answer", "No answer.")
264
- else: # If future versions return the dataclass
265
- return getattr(out_state, "final_answer", "No answer.")
266
-
267
-
268
-
269
-
270
-
271
-
272
-
273
-
274
-
275
-
276
-
277
  class BasicAgent:
278
  def __init__(self):
279
  print("BasicAgent initialized.")
280
- def __call__(self, question: str, task_id) -> str:
281
- # print(f"Agent received question (first 50 chars): {question[:50]}...")
282
- # fixed_answer = "This is a default answer."
283
- # print(f"Agent returning fixed answer: {fixed_answer}")
284
- print()
285
- print()
286
- print()
287
- print()
288
-
289
-
290
- print(f"Agent received question: {question}")
291
- print()
292
- return answer(question, task_id)
293
- # return fixed_answer
294
-
295
-
296
-
297
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
 
300
  def run_and_submit_all( profile: gr.OAuthProfile | None):
 
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
+ from langchain.schema import HumanMessage, SystemMessage
7
+ from typing import Optional
8
+
9
+ from agent import build_graph
 
10
  from state import AgentState
11
 
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  class BasicAgent:
17
  def __init__(self):
18
  print("BasicAgent initialized.")
19
+ graph = build_graph()
20
+ self.compiled_graph = graph.compile()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ def __call__(self, question: str, task_id: Optional[str] = None) -> str:
23
+ """Run the agent and return whatever FINAL_ANSWER the graph produces."""
24
+ print(f"Agent received question: {question}")
25
+
26
+ # The user_question argument for AgentState is the question.
27
+ init_state = AgentState(user_question=question, task_id=task_id)
28
+ init_state.add(SystemMessage(content="You are a helpful assistant."))
29
+ init_state.add(HumanMessage(content=question))
30
+
31
+ # IMPORTANT: invoke() returns a **new** state instance (or an AddableValuesDict),
32
+ # not the object we pass in. Use the returned value to fetch final_answer.
33
+ out_state = self.compiled_graph.invoke(init_state)
34
+
35
+ if isinstance(out_state, dict): # AddableValuesDict behaves like a dict
36
+ return out_state.get("final_answer", "No answer.")
37
+ else: # If future versions return the dataclass
38
+ return getattr(out_state, "final_answer", "No answer.")
39
 
40
 
41
  def run_and_submit_all( profile: gr.OAuthProfile | None):
old2app.py β†’ old/old2app.py RENAMED
File without changes
old2state.py β†’ old/old2state.py RENAMED
File without changes
old2tools.py β†’ old/old2tools.py RENAMED
@@ -5,7 +5,7 @@ import pandas as pd
5
  from pathlib import Path
6
  # from PIL import Image
7
  # import pytesseract
8
- from old2state import AgentState
9
  from langchain.schema import HumanMessage
10
  import regex as re
11
  import time
@@ -284,7 +284,7 @@ import os
284
 
285
  import os
286
  import openai
287
- from old2state import AgentState
288
 
289
  def audio_transcriber_tool(state: AgentState) -> AgentState:
290
  """
@@ -344,7 +344,7 @@ def audio_transcriber_tool(state: AgentState) -> AgentState:
344
 
345
  import re
346
  import requests
347
- from old2state import AgentState
348
 
349
  def wikipedia_search_tool(state: AgentState) -> AgentState:
350
  """
 
5
  from pathlib import Path
6
  # from PIL import Image
7
  # import pytesseract
8
+ from old.old2state import AgentState
9
  from langchain.schema import HumanMessage
10
  import regex as re
11
  import time
 
284
 
285
  import os
286
  import openai
287
+ from old.old2state import AgentState
288
 
289
  def audio_transcriber_tool(state: AgentState) -> AgentState:
290
  """
 
344
 
345
  import re
346
  import requests
347
+ from old.old2state import AgentState
348
 
349
  def wikipedia_search_tool(state: AgentState) -> AgentState:
350
  """
old_app_copy.py β†’ old/old_app_copy.py RENAMED
File without changes
state.py CHANGED
@@ -15,6 +15,7 @@ class AgentState:
15
 
16
  next_action: Optional[str] = None # wiki | ocr | audio | final
17
  query: Optional[str] = None # wiki search term
 
18
  tool_calls: int = 0
19
 
20
  final_answer: Optional[str] = None
 
15
 
16
  next_action: Optional[str] = None # wiki | ocr | audio | final
17
  query: Optional[str] = None # wiki search term
18
+ snippet: Optional[str] = None # code snippet
19
  tool_calls: int = 0
20
 
21
  final_answer: Optional[str] = None