naman1102 commited on
Commit
14fa0cc
Β·
1 Parent(s): 9af3089

React_graph

Browse files
Files changed (4) hide show
  1. agent.py +16 -117
  2. app.py +7 -3
  3. tools.py +26 -48
  4. toolsold.py +349 -0
agent.py CHANGED
@@ -6,19 +6,19 @@ from langchain.schema import HumanMessage, SystemMessage, AIMessage
6
  from state import AgentState
7
  from typing import Any, Dict, List, Optional
8
  import json
9
-
10
 
11
  # ─────────────────────────── External tools ──────────────────────────────
12
  from tools import (
13
  wikipedia_search_tool,
14
- ocr_image_tool,
15
  audio_transcriber_tool,
16
- parse_excel_tool,
17
  analyze_code_tool
18
  )
19
 
20
  # ─────────────────────────── Configuration ───────────────────────────────
21
- LLM = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.3)
22
  MAX_TOOL_CALLS = 5
23
 
24
  # ─────────────────────────── Helper utilities ────────────────────────────
@@ -29,122 +29,21 @@ MAX_TOOL_CALLS = 5
29
 
30
  # ------------- tool adapters -------------
31
 
32
- def wiki_tool(state: AgentState) -> AgentState:
33
- out = wikipedia_search_tool({"wiki_query": state.query or ""})
34
- state.tool_calls += 1
35
- state.add(SystemMessage(content=f"WIKI_TOOL_OUT: {out}"))
36
- state.next_action = None
37
- return state
38
-
39
-
40
- def ocr_tool(state: AgentState) -> AgentState:
41
- out = ocr_image_tool({"task_id": state.task_id, "ocr_path": ""})
42
- state.tool_calls += 1
43
- state.add(SystemMessage(content=f"OCR_TOOL_OUT: {out}"))
44
- state.next_action = None
45
- return state
46
-
47
-
48
- def audio_tool(state: AgentState) -> AgentState:
49
- out = audio_transcriber_tool({"task_id": state.task_id, "audio_path": ""})
50
- state.tool_calls += 1
51
- state.add(SystemMessage(content=f"AUDIO_TOOL_OUT: {out}"))
52
- state.next_action = None
53
- return state
54
-
55
- def excel_tool(state: AgentState) -> AgentState:
56
- result = parse_excel_tool({
57
- "task_id": state.task_id,
58
- "excel_sheet_name": ""
59
- })
60
- out = {"excel_result": result}
61
- state.tool_calls += 1
62
- state.add(SystemMessage(content=f"EXCEL_TOOL_OUT: {out}"))
63
- state.next_action = None
64
- return state
65
-
66
- def code_tool(state: AgentState) -> AgentState:
67
- if state.snippet:
68
- out = {"analysis": analyze_code_tool({
69
- "task_id": state.task_id,
70
- "snippet": state.snippet,
71
- })}
72
- else:
73
- out = {"analysis": analyze_code_tool({
74
- "task_id": state.task_id,
75
- "snippet": ""
76
- })}
77
- state.tool_calls += 1
78
- state.add(SystemMessage(content=f"CODE_TOOL_OUT: {out}"))
79
- state.next_action = None
80
- return state
81
-
82
- # ------------- final answer -------------
83
-
84
- def final_node(state: AgentState) -> AgentState:
85
- print("reached final node")
86
- wrap = SystemMessage(
87
- content="Using everything so far, reply ONLY with {'final_answer':'…'}. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. \n"
88
- "reply **only** with "
89
- "{\"final_answer\":\"…\"} (no markdown, no commentary)."
90
- )
91
- raw = LLM.invoke(state.messages + [wrap]).content.strip()
92
- # print("raw : ", raw)
93
- state.add(AIMessage(content=raw))
94
- parsed = safe_json(raw)
95
- # print("parsed : ", parsed, "type : ", type(parsed))
96
- state.final_answer = parsed.get("final_answer") if parsed else "Unable to parse final answer."
97
- # print("state.final_answer : ", state.final_answer)
98
- return state
99
 
100
  # ─────────────────────────── Graph wiring ───────────────────────────────
101
 
102
  def build_graph():
103
  graph = StateGraph(AgentState)
104
 
105
- # Register nodes
106
- for name, fn in [
107
- ("tool_selector", tool_selector),
108
- ("wiki_tool", wiki_tool),
109
- ("ocr_tool", ocr_tool),
110
- ("audio_tool", audio_tool),
111
- ("excel_tool", excel_tool),
112
- ("code_tool", code_tool),
113
- ("final_node", final_node),
114
- ]:
115
- graph.add_node(name, fn)
116
-
117
- # Edges
118
- graph.add_edge(START, "tool_selector")
119
-
120
- def dispatch(state: AgentState) -> str:
121
- return {
122
- "wiki": "wiki_tool",
123
- "ocr": "ocr_tool",
124
- "audio": "audio_tool",
125
- "excel": "excel_tool",
126
- "code": "code_tool",
127
- "final": "final_node",
128
- }.get(state.next_action, "final_node")
129
-
130
- graph.add_conditional_edges(
131
- "tool_selector",
132
- dispatch,
133
- {
134
- "wiki_tool": "wiki_tool",
135
- "ocr_tool": "ocr_tool",
136
- "audio_tool": "audio_tool",
137
- "excel_tool": "excel_tool",
138
- "code_tool": "code_tool",
139
- "final_node": "final_node",
140
- },
141
- )
142
-
143
- # tools loop back to selector
144
- for tool_name in ("wiki_tool", "ocr_tool", "audio_tool", "excel_tool", "code_tool"):
145
- graph.add_edge(tool_name, "tool_selector")
146
-
147
- # final_answer β†’ END
148
- graph.add_edge("final_node", END)
149
-
150
- return graph
 
6
  from state import AgentState
7
  from typing import Any, Dict, List, Optional
8
  import json
9
+ from langgraph.prebuilt import create_react_agent
10
 
11
  # ─────────────────────────── External tools ──────────────────────────────
12
  from tools import (
13
  wikipedia_search_tool,
14
+ arxiv_search_tool,
15
  audio_transcriber_tool,
16
+ excel_tool,
17
  analyze_code_tool
18
  )
19
 
20
  # ─────────────────────────── Configuration ───────────────────────────────
21
+ llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.3)
22
  MAX_TOOL_CALLS = 5
23
 
24
  # ─────────────────────────── Helper utilities ────────────────────────────
 
29
 
30
  # ------------- tool adapters -------------
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # ─────────────────────────── Graph wiring ───────────────────────────────
34
 
35
  def build_graph():
36
  graph = StateGraph(AgentState)
37
 
38
+
39
+ llm_tools = [
40
+ wikipedia_search_tool,
41
+ arxiv_search_tool,
42
+ audio_transcriber_tool,
43
+ excel_tool,
44
+ analyze_code_tool,
45
+ ]
46
+ llm = llm.bind_tools(llm_tools)
47
+ agent = create_react_agent(llm, llm_tools)
48
+
49
+ return agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -12,12 +12,16 @@ from state import AgentState
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
 
 
 
 
15
 
16
  class BasicAgent:
17
  def __init__(self):
18
  print("BasicAgent initialized.")
19
  graph = build_graph()
20
- self.compiled_graph = graph.compile()
21
 
22
  def __call__(self, question: str, task_id: Optional[str] = None) -> str:
23
  """Run the agent and return whatever FINAL_ANSWER the graph produces."""
@@ -25,12 +29,12 @@ class BasicAgent:
25
 
26
  # The user_question argument for AgentState is the question.
27
  init_state = AgentState(user_question=question, task_id=task_id)
28
- init_state.add(SystemMessage(content="You are a helpful assistant."))
29
  init_state.add(HumanMessage(content=question))
30
 
31
  # IMPORTANT: invoke() returns a **new** state instance (or an AddableValuesDict),
32
  # not the object we pass in. Use the returned value to fetch final_answer.
33
- out_state = self.compiled_graph.invoke(init_state)
34
 
35
  if isinstance(out_state, dict): # AddableValuesDict behaves like a dict
36
  return out_state.get("final_answer", "No answer.")
 
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
+ SYSTEM_PROMPT = """
16
+ You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
17
+ """
18
+
19
 
20
  class BasicAgent:
21
  def __init__(self):
22
  print("BasicAgent initialized.")
23
  graph = build_graph()
24
+
25
 
26
  def __call__(self, question: str, task_id: Optional[str] = None) -> str:
27
  """Run the agent and return whatever FINAL_ANSWER the graph produces."""
 
29
 
30
  # The user_question argument for AgentState is the question.
31
  init_state = AgentState(user_question=question, task_id=task_id)
32
+ init_state.add(SystemMessage(content=SYSTEM_PROMPT))
33
  init_state.add(HumanMessage(content=question))
34
 
35
  # IMPORTANT: invoke() returns a **new** state instance (or an AddableValuesDict),
36
  # not the object we pass in. Use the returned value to fetch final_answer.
37
+ out_state = self.graph.invoke(init_state)
38
 
39
  if isinstance(out_state, dict): # AddableValuesDict behaves like a dict
40
  return out_state.get("final_answer", "No answer.")
tools.py CHANGED
@@ -9,6 +9,8 @@ import time
9
  import os
10
  from duckduckgo_search import DDGS
11
  from langchain_core.tools import tool
 
 
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
 
@@ -199,7 +201,7 @@ import requests
199
  @tool
200
  def wikipedia_search_tool(wiki_query: str) -> str:
201
  """
202
- LangGraph wrapper for searching Wikipedia.
203
  Expects: wiki_query is a non‐empty string.
204
  Returns: text summary of first matching page or an error message>"
205
 
@@ -207,53 +209,29 @@ def wikipedia_search_tool(wiki_query: str) -> str:
207
  """
208
  print("reached wikipedia search tool")
209
  query = wiki_query
210
- if not query:
211
- return {}
212
-
213
- try:
214
- # 1) Use the MediaWiki API to search for page titles matching the query
215
- search_params = {
216
- "action": "query",
217
- "list": "search",
218
- "srsearch": query,
219
- "format": "json",
220
- "utf8": 1
221
- }
222
- search_resp = requests.get("https://en.wikipedia.org/w/api.php", params=search_params, timeout=10)
223
- search_resp.raise_for_status()
224
- search_data = search_resp.json()
225
-
226
- search_results = search_data.get("query", {}).get("search", [])
227
- # print("wikipedia: search_results",search_results)
228
- if not search_results:
229
- print(f"No Wikipedia page found for '{query}'.")
230
- return f"No Wikipedia page found for '{query}'."
231
-
232
- # 2) Take the first search result's title
233
- first_title = search_results[0].get("title", "")
234
- if not first_title:
235
- print("Unexpected format from Wikipedia search.")
236
- return "Unexpected format from Wikipedia search."
237
-
238
- # 3) Fetch the page summary for that title via the REST summary endpoint
239
- title_for_url = requests.utils.requote_uri(first_title)
240
- summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title_for_url}"
241
- summary_resp = requests.get(summary_url, timeout=10)
242
- summary_resp.raise_for_status()
243
- summary_data = summary_resp.json()
244
-
245
- # 4) Extract either the "extract" field or a fallback message
246
- summary_text = summary_data.get("extract")
247
- if not summary_text:
248
- summary_text = summary_data.get("description", "No summary available.")
249
- print(f"Title: {first_title}\n\n{summary_text}")
250
- return f"Title: {first_title}\n\n{summary_text}"
251
-
252
-
253
- except requests.exceptions.RequestException as e:
254
- return f"Wikipedia search error: {e}"
255
- except Exception as e:
256
- return f"Unexpected error in wikipedia_search_tool: {e}"
257
 
258
 
259
  from langchain_openai import ChatOpenAI
 
9
  import os
10
  from duckduckgo_search import DDGS
11
  from langchain_core.tools import tool
12
+ from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
13
+
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
 
 
201
  @tool
202
  def wikipedia_search_tool(wiki_query: str) -> str:
203
  """
204
+ Searches Wikipedia for the given query and returns the first 5 pages.
205
  Expects: wiki_query is a non‐empty string.
206
  Returns: text summary of first matching page or an error message>"
207
 
 
209
  """
210
  print("reached wikipedia search tool")
211
  query = wiki_query
212
+ docs = WikipediaLoader(query=query, load_max_docs=5).load()
213
+ result = ""
214
+ counter = 1
215
+ for doc in docs:
216
+ result += f"\n\nDocument{counter}: {doc.metadata['title']}\n. {doc.page_content}"
217
+ counter += 1
218
+ return result
219
+
220
+ @tool
221
+ def arxiv_search_tool(arxiv_query: str) -> str:
222
+ """
223
+ Searches Arxiv for the given query and returns the first 5 pages.
224
+ Expects: arxiv_query is a non‐empty string.
225
+ Returns: text summary of first matching page or an error message>"
226
+ """
227
+ print("reached arxiv_search_tool")
228
+ docs = ArxivLoader(query=arxiv_query, load_max_docs=5).load()
229
+ result = ""
230
+ counter = 1
231
+ for doc in docs:
232
+ result += f"\n\nDocument{counter}: {doc.metadata['title']}\n. {doc.page_content}"
233
+ counter += 1
234
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
 
237
  from langchain_openai import ChatOpenAI
toolsold.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tools.py
2
+
3
+ import pandas as pd
4
+
5
+ from pathlib import Path
6
+ import requests
7
+ import regex as re
8
+ import time
9
+ import os
10
+ from duckduckgo_search import DDGS
11
+ from langchain_core.tools import tool
12
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
+
14
+
15
+ def _download_file_for_task(task_id: str, ext: str) -> str:
16
+ """
17
+ Helper: attempt to GET the remote file for a given task_id.
18
+ Saves under ./hf_files/{task_id}.{ext}. Returns the local path if successful,
19
+ or an empty string if no file / download failed.
20
+ """
21
+
22
+ print("reached _download_file_for_task")
23
+ os.makedirs("hf_files", exist_ok=True)
24
+ local_path = os.path.join("hf_files", f"{task_id}.{ext}")
25
+ url = f"{DEFAULT_API_URL}/files/{task_id}"
26
+
27
+ try:
28
+ resp = requests.get(url, timeout=10)
29
+ if resp.status_code == 200 and resp.content:
30
+ print(f"Downloaded file from {url} to {local_path}")
31
+ with open(local_path, "wb") as f:
32
+ f.write(resp.content)
33
+ return local_path
34
+ except Exception:
35
+ print(f"Error downloading file from {url} to {local_path}")
36
+ pass
37
+
38
+ # If we get here, either 404 or download error
39
+ return ""
40
+
41
+ @tool
42
+ def image_tool(task_id: str) -> str:
43
+ """
44
+ Expects: task_id is a string
45
+ Returns: "OCR text + brief caption or an error message"
46
+
47
+ """
48
+ print("reached image_tool")
49
+ # path_or_id = state.get("ocr_path", "")
50
+ for ext in ("png", "jpg", "jpeg"):
51
+ candidate = _download_file_for_task(task_id, ext)
52
+ if candidate:
53
+ local_img = candidate
54
+ break
55
+
56
+ if not local_img or not os.path.exists(local_img):
57
+ return {
58
+ "ocr_path": None,
59
+ "ocr_result": "Error: No image file found (local nonexistent or download failed)."
60
+ }
61
+
62
+ # 2) Read raw bytes
63
+ try:
64
+ with open(local_img, "rb") as f:
65
+ image_bytes = f.read()
66
+ except Exception as e:
67
+ return f"Error reading image file: {e}"
68
+
69
+
70
+ # 3) Prepare HF Inference headers
71
+ hf_token = os.getenv("HF_TOKEN")
72
+ if not hf_token:
73
+ return "Error: HUGGINGFACE_API_KEY not set in environment."
74
+
75
+
76
+ headers = {"Authorization": f"Bearer {hf_token}"}
77
+
78
+ # 4) Call HF’s vision-ocr to extract text
79
+ ocr_text = ""
80
+ try:
81
+ ocr_resp = requests.post(
82
+ "https://api-inference.huggingface.co/models/google/vit-ocr",
83
+ headers=headers,
84
+ files={"file": image_bytes},
85
+ timeout=30
86
+ )
87
+ ocr_resp.raise_for_status()
88
+ ocr_json = ocr_resp.json()
89
+
90
+ # The JSON has β€œpages” β†’ list of blocks β†’ β€œlines” β†’ each line has β€œtext”
91
+ lines = []
92
+ for page in ocr_json.get("pages", []):
93
+ for line in page.get("lines", []):
94
+ lines.append(line.get("text", "").strip())
95
+ ocr_text = "\n".join(lines).strip() or "(no visible text)"
96
+ except Exception as e:
97
+ ocr_text = f"Error during HF OCR: {e}"
98
+
99
+ # 5) Call HF’s image-captioning to get a brief description
100
+ caption = ""
101
+ try:
102
+ cap_resp = requests.post(
103
+ "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base",
104
+ headers=headers,
105
+ files={"file": image_bytes},
106
+ timeout=30
107
+ )
108
+ cap_resp.raise_for_status()
109
+ cap_json = cap_resp.json()
110
+ # The response looks like: {"generated_text": "...caption..."}
111
+ caption = cap_json.get("generated_text", "").strip()
112
+ if not caption:
113
+ caption = "(no caption returned)"
114
+ except Exception as e:
115
+ caption = f"Error during HF captioning: {e}"
116
+
117
+ # 6) Combine OCR + caption
118
+ combined = f"OCR text:\n{ocr_text}\n\nImage caption:\n{caption}"
119
+ print("combined: ")
120
+ return combined
121
+
122
+ @tool
123
+ def excel_tool(task_id: str) -> str:
124
+ """
125
+ Downloads <task_id>.xlsx (if any) and returns a stringified list of
126
+ records from the specified sheet. No fallback to user-supplied tables.
127
+ Expected keys in `task_id`:
128
+ β€’ task_id – required (used to download the file)
129
+
130
+ returns: stringified list of records from the specified sheet
131
+ """
132
+ print("reached excel_tool")
133
+ sheet = "Sheet1"
134
+
135
+ local_xlsx = _download_file_for_task(task_id, "xlsx")
136
+ if not local_xlsx or not os.path.exists(local_xlsx):
137
+ return "Error: Excel file not found for this task."
138
+
139
+ try:
140
+ xls = pd.ExcelFile(local_xlsx)
141
+ df = pd.read_excel(
142
+ xls,
143
+ sheet_name=sheet if sheet and sheet in xls.sheet_names else xls.sheet_names[0]
144
+ )
145
+ print(f"Excel file read successfully: {str(df.to_dict(orient='records'))}")
146
+ return str(df.to_dict(orient="records"))
147
+ except Exception as e:
148
+ return f"Error reading Excel file: {e}"
149
+
150
+
151
+ import openai
152
+ @tool
153
+ def audio_transcriber_tool(task_id: str) -> str:
154
+ """
155
+ LangGraph tool for transcribing audio via OpenAI's Whisper API.
156
+ Expects: task_id is a string
157
+ Returns:
158
+ "<text or error message>"
159
+ Always attempts to download the file for the given path or task ID.
160
+ """
161
+ print("reached audio_transcriber_tool")
162
+
163
+
164
+ # Always attempt to download the file, regardless of local existence
165
+ local_audio = ""
166
+ for ext in ("mp3", "wav", "m4a"):
167
+ candidate = _download_file_for_task(task_id, ext)
168
+ if candidate:
169
+ local_audio = candidate
170
+ break
171
+
172
+ if not local_audio or not os.path.exists(local_audio):
173
+ return "Error: No audio file found (download failed)."
174
+
175
+
176
+ # Send to OpenAI Whisper
177
+ try:
178
+ openai.api_key = os.getenv("OPENAI_API_KEY")
179
+ if not openai.api_key:
180
+ raise RuntimeError("OPENAI_API_KEY is not set in environment.")
181
+
182
+ with open(local_audio, "rb") as audio_file:
183
+ print("reached openai.audio.transcriptions.create")
184
+ response = openai.audio.transcriptions.create(
185
+ model="whisper-1",
186
+ file=audio_file,
187
+ )
188
+ print("reached response")
189
+ text = response.text.strip()
190
+ except Exception as e:
191
+ text = f"Error during transcription: {e}"
192
+ print(f"Transcripted as transcript: {text}")
193
+ return text
194
+ # tools.py
195
+
196
+ import re
197
+ import requests
198
+
199
+ @tool
200
+ def wikipedia_search_tool(wiki_query: str) -> str:
201
+ """
202
+ LangGraph wrapper for searching Wikipedia.
203
+ Expects: wiki_query is a non‐empty string.
204
+ Returns: text summary of first matching page or an error message>"
205
+
206
+ If no valid wiki_query is provided, returns {}.
207
+ """
208
+ print("reached wikipedia search tool")
209
+ query = wiki_query
210
+ if not query:
211
+ return {}
212
+
213
+ try:
214
+ # 1) Use the MediaWiki API to search for page titles matching the query
215
+ search_params = {
216
+ "action": "query",
217
+ "list": "search",
218
+ "srsearch": query,
219
+ "format": "json",
220
+ "utf8": 1
221
+ }
222
+ search_resp = requests.get("https://en.wikipedia.org/w/api.php", params=search_params, timeout=10)
223
+ search_resp.raise_for_status()
224
+ search_data = search_resp.json()
225
+
226
+ search_results = search_data.get("query", {}).get("search", [])
227
+ # print("wikipedia: search_results",search_results)
228
+ if not search_results:
229
+ print(f"No Wikipedia page found for '{query}'.")
230
+ return f"No Wikipedia page found for '{query}'."
231
+
232
+ # 2) Take the first search result's title
233
+ first_title = search_results[0].get("title", "")
234
+ if not first_title:
235
+ print("Unexpected format from Wikipedia search.")
236
+ return "Unexpected format from Wikipedia search."
237
+
238
+ # 3) Fetch the page summary for that title via the REST summary endpoint
239
+ title_for_url = requests.utils.requote_uri(first_title)
240
+ summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title_for_url}"
241
+ summary_resp = requests.get(summary_url, timeout=10)
242
+ summary_resp.raise_for_status()
243
+ summary_data = summary_resp.json()
244
+
245
+ # 4) Extract either the "extract" field or a fallback message
246
+ summary_text = summary_data.get("extract")
247
+ if not summary_text:
248
+ summary_text = summary_data.get("description", "No summary available.")
249
+ print(f"Title: {first_title}\n\n{summary_text}")
250
+ return f"Title: {first_title}\n\n{summary_text}"
251
+
252
+
253
+ except requests.exceptions.RequestException as e:
254
+ return f"Wikipedia search error: {e}"
255
+ except Exception as e:
256
+ return f"Unexpected error in wikipedia_search_tool: {e}"
257
+
258
+
259
+ from langchain_openai import ChatOpenAI
260
+ from langchain.schema import SystemMessage, HumanMessage
261
+ LLM = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.2)
262
+
263
+ @tool
264
+ def analyze_code_tool(task_id: str) -> str:
265
+ """
266
+ Either task_id OR (file + task_id)
267
+ Reads the code (max 400 lines / 10 kB) and asks the LLM for:
268
+ β€’ plain-language summary
269
+ β€’ list of key functions/classes
270
+ β€’ obvious bugs or style smells
271
+ Returns that analysis as a string.
272
+ """
273
+ print("reached analyze_code_tool")
274
+ code_txt = ""
275
+ if not task_id:
276
+ code_txt = "No code provided."
277
+ else:
278
+ path = _download_file_for_task(task_id, "py")
279
+ if not path:
280
+ return "Error: .py file not found for this task."
281
+ code_txt = Path(path).read_text(encoding="utf-8", errors="ignore")
282
+ # else:
283
+ # return "Error: neither snippet nor file provided."
284
+
285
+ # Truncate for safety
286
+ lines = code_txt.splitlines()[:400]
287
+ code_sample = "\n".join(lines)[:10_000]
288
+
289
+ prompt = [
290
+ SystemMessage(content="You are a senior Python code reviewer."),
291
+ HumanMessage(content=(
292
+ "Please analyse the following code. "
293
+ "Summarise what it does, list key functions/classes, "
294
+ "and point out any obvious bugs, performance issues or style problems.\n\n"
295
+ f"```python\n{code_sample}\n```"
296
+ "If you can then find the output of the code and return it in the output."
297
+ ))
298
+ ]
299
+ return LLM.invoke(prompt).content.strip()
300
+
301
+
302
+ # def web_search_tool(state: AgentState) -> AgentState:
303
+ # """
304
+ # Expects: state["web_search_query"] is a non‐empty string.
305
+ # Returns: {"web_search_query": None, "web_search_result": <string>}.
306
+ # Retries up to 5 times on either a DuckDuckGo β€œ202 Ratelimit” response or any exception (e.g. timeout).
307
+ # """
308
+ # print("reached web_search_tool")
309
+ # query = state.get("web_search_query", "")
310
+ # if not query:
311
+ # return {} # nothing to do
312
+
313
+ # ddg = DDGS()
314
+ # max_retries = 5
315
+ # result_text = ""
316
+
317
+ # for attempt in range(1, max_retries + 1):
318
+ # try:
319
+ # result_text = str(ddg.text(query, max_results=5))
320
+ # except Exception as e:
321
+ # # Network error or timeoutβ€”retry up to max_retries
322
+ # if attempt < max_retries:
323
+ # print(f"web_search_tool: exception '{e}', retrying in 4 seconds ({attempt}/{max_retries})")
324
+ # time.sleep(4)
325
+ # continue
326
+ # else:
327
+ # # Final attempt failed
328
+ # return {
329
+ # "web_search_query": None,
330
+ # "web_search_result": f"Error during DuckDuckGo search: {e}"
331
+ # }
332
+
333
+ # # Check for DuckDuckGo rate‐limit indicator
334
+ # if "202 Ratelimit" in result_text:
335
+ # if attempt < max_retries:
336
+ # print(f"web_search_tool: received '202 Ratelimit', retrying in 4 seconds ({attempt}/{max_retries})")
337
+ # time.sleep(4)
338
+ # continue
339
+ # else:
340
+ # # Final attempt still rate‐limited
341
+ # break
342
+
343
+ # # Successful response (no exception and no rate‐limit text)
344
+ # break
345
+
346
+ # return {
347
+ # "web_search_query": None,
348
+ # "web_search_result": result_text
349
+ # }