naman1102 commited on
Commit
7c5f7b3
·
1 Parent(s): 838224c

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +29 -43
tools.py CHANGED
@@ -41,7 +41,7 @@ def web_search_tool(state: AgentState) -> AgentState:
41
  """
42
  Expects: state["web_search_query"] is a non‐empty string.
43
  Returns: {"web_search_query": None, "web_search_result": <string>}
44
- We also clear web_search_query so we dont loop forever.
45
  If the result is a DuckDuckGo 202 Ratelimit error, retry up to 5 times with a 5 second sleep between attempts.
46
  """
47
  # print("reached web search tool")
@@ -54,11 +54,12 @@ def web_search_tool(state: AgentState) -> AgentState:
54
  for attempt in range(max_retries):
55
  result_text = ddg.run(query)
56
  if "202 Ratelimit" not in result_text:
 
57
  break
58
  if attempt < max_retries - 1:
59
  print(f"web_search_result: rate limit error, retrying in 10 seconds")
60
  time.sleep(4)
61
- print(f"web_search_result reached ")
62
  return {
63
  "web_search_query": None,
64
  "web_search_result": result_text
@@ -73,31 +74,27 @@ def ocr_image_tool(state: AgentState) -> AgentState:
73
  • A Task ID string like "abc123", in which case we GET /files/abc123.
74
  Returns:
75
  { "ocr_path": None, "ocr_result": "<OCRed text or error string>" }
 
76
  """
77
  path_or_id = state.get("ocr_path", "")
78
  if not path_or_id:
79
  return {}
80
 
81
- # 1) If local file exists, use it. Otherwise, treat "path_or_id" as task_id and download.
82
- if os.path.exists(path_or_id):
83
- local_img = path_or_id
84
- else:
85
- # Assume it's a task_id; try to download a PNG or JPG
86
- # (We don’t know extension, so try common ones in order)
87
- local_img = ""
88
- for ext in ("png", "jpg", "jpeg"):
89
- candidate = _download_file_for_task(path_or_id, ext)
90
- if candidate:
91
- local_img = candidate
92
- break
93
 
94
  if not local_img or not os.path.exists(local_img):
95
  return {
96
  "ocr_path": None,
97
- "ocr_result": "Error: No image file found (neither local nor downloadable)."
98
  }
99
 
100
- # 2) Run OCR
101
  try:
102
  img = Image.open(local_img)
103
  text = pytesseract.image_to_string(img).strip() or "(no visible text)"
@@ -121,19 +118,17 @@ def parse_excel_tool(state: AgentState) -> AgentState:
121
  "excel_sheet_name": None,
122
  "excel_result": "<stringified records or Markdown table>"
123
  }
 
124
  """
125
  path_or_id = state.get("excel_path", "")
126
  sheet = state.get("excel_sheet_name", "")
127
  if not path_or_id:
128
  return {}
129
 
130
- # 1) If local .xlsx exists, use it. Otherwise, try downloading.
131
- if os.path.exists(path_or_id):
132
- local_xlsx = path_or_id
133
- else:
134
- local_xlsx = _download_file_for_task(path_or_id, "xlsx")
135
 
136
- # 2) If we finally have a real file, read it
137
  if local_xlsx and os.path.exists(local_xlsx):
138
  try:
139
  xls = pd.ExcelFile(local_xlsx)
@@ -152,7 +147,7 @@ def parse_excel_tool(state: AgentState) -> AgentState:
152
  print(f">>> parse_excel_tool: Error reading Excel file {local_xlsx}: {e}")
153
  # Fall back to scanning for Markdown below
154
 
155
- # 3) Fallback: scan any HumanMessage for a Markdown‐style table
156
  messages = state.get("messages", [])
157
  table_lines = []
158
  collecting = False
@@ -177,7 +172,6 @@ def parse_excel_tool(state: AgentState) -> AgentState:
177
  "excel_result": "Error: No Excel file found and no Markdown table detected in prompt."
178
  }
179
 
180
- # 4) Strip out separator rows and return the table block
181
  clean_rows = [row for row in table_lines if not re.match(r"^\s*\|\s*-+", row)]
182
  table_block = "\n".join(clean_rows).strip()
183
  print(f"Parsed excel as excel_result: {table_block}")
@@ -210,52 +204,44 @@ from state import AgentState
210
 
211
  def audio_transcriber_tool(state: AgentState) -> AgentState:
212
  """
213
- LangGraph tool for transcribing audio via OpenAIs Whisper API.
214
  Expects: state["audio_path"] to be either:
215
  • A local file path (e.g. "./hf_files/abc.mp3"), OR
216
  • A Task ID (e.g. "abc123"), in which case we try downloading
217
  GET {DEFAULT_API_URL}/files/{task_id} with .mp3, .wav, .m4a extensions.
218
-
219
  Returns:
220
  {
221
  "audio_path": None,
222
  "transcript": "<text or error message>"
223
  }
 
224
  """
225
  path_or_id = state.get("audio_path", "")
226
  if not path_or_id:
227
  return {}
228
 
229
- # 1) If local file exists, use it. Otherwise, treat path_or_id as task_id and try downloads:
230
- if os.path.exists(path_or_id):
231
- local_audio = path_or_id
232
- else:
233
- local_audio = ""
234
- for ext in ("mp3", "wav", "m4a"):
235
- candidate = _download_file_for_task(path_or_id, ext)
236
- if candidate:
237
- local_audio = candidate
238
- break
239
 
240
  if not local_audio or not os.path.exists(local_audio):
241
- # Neither a real file nor a downloadable attachment
242
  return {
243
  "audio_path": None,
244
- "transcript": "Error: No audio file found (neither local nor downloadable)."
245
  }
246
 
247
- # 2) Send to OpenAI Whisper
248
  try:
249
  openai.api_key = os.getenv("OPENAI_API_KEY")
250
  if not openai.api_key:
251
  raise RuntimeError("OPENAI_API_KEY is not set in environment.")
252
 
253
  with open(local_audio, "rb") as audio_file:
254
- # For OpenAI Python library v0.27.0+:
255
  response = openai.Audio.transcribe("whisper-1", audio_file)
256
- # If you’re on an older library:
257
- # response = openai.Audio.create_transcription(file=audio_file, model="whisper-1")
258
-
259
  text = response.get("text", "").strip()
260
  except Exception as e:
261
  text = f"Error during transcription: {e}"
 
41
  """
42
  Expects: state["web_search_query"] is a non‐empty string.
43
  Returns: {"web_search_query": None, "web_search_result": <string>}
44
+ We also clear web_search_query so we don't loop forever.
45
  If the result is a DuckDuckGo 202 Ratelimit error, retry up to 5 times with a 5 second sleep between attempts.
46
  """
47
  # print("reached web search tool")
 
54
  for attempt in range(max_retries):
55
  result_text = ddg.run(query)
56
  if "202 Ratelimit" not in result_text:
57
+ print(f"web_search_result success:")
58
  break
59
  if attempt < max_retries - 1:
60
  print(f"web_search_result: rate limit error, retrying in 10 seconds")
61
  time.sleep(4)
62
+ # print(f"web_search_result reached ")
63
  return {
64
  "web_search_query": None,
65
  "web_search_result": result_text
 
74
  • A Task ID string like "abc123", in which case we GET /files/abc123.
75
  Returns:
76
  { "ocr_path": None, "ocr_result": "<OCRed text or error string>" }
77
+ Always attempts to download the file for the given path or task ID.
78
  """
79
  path_or_id = state.get("ocr_path", "")
80
  if not path_or_id:
81
  return {}
82
 
83
+ # Always attempt to download the file, regardless of local existence
84
+ local_img = ""
85
+ for ext in ("png", "jpg", "jpeg"):
86
+ candidate = _download_file_for_task(path_or_id, ext)
87
+ if candidate:
88
+ local_img = candidate
89
+ break
 
 
 
 
 
90
 
91
  if not local_img or not os.path.exists(local_img):
92
  return {
93
  "ocr_path": None,
94
+ "ocr_result": "Error: No image file found (download failed)."
95
  }
96
 
97
+ # Run OCR
98
  try:
99
  img = Image.open(local_img)
100
  text = pytesseract.image_to_string(img).strip() or "(no visible text)"
 
118
  "excel_sheet_name": None,
119
  "excel_result": "<stringified records or Markdown table>"
120
  }
121
+ Always attempts to download the file for the given path or task ID.
122
  """
123
  path_or_id = state.get("excel_path", "")
124
  sheet = state.get("excel_sheet_name", "")
125
  if not path_or_id:
126
  return {}
127
 
128
+ # Always attempt to download the file, regardless of local existence
129
+ local_xlsx = _download_file_for_task(path_or_id, "xlsx")
 
 
 
130
 
131
+ # If we finally have a real file, read it
132
  if local_xlsx and os.path.exists(local_xlsx):
133
  try:
134
  xls = pd.ExcelFile(local_xlsx)
 
147
  print(f">>> parse_excel_tool: Error reading Excel file {local_xlsx}: {e}")
148
  # Fall back to scanning for Markdown below
149
 
150
+ # Fallback: scan any HumanMessage for a Markdown‐style table
151
  messages = state.get("messages", [])
152
  table_lines = []
153
  collecting = False
 
172
  "excel_result": "Error: No Excel file found and no Markdown table detected in prompt."
173
  }
174
 
 
175
  clean_rows = [row for row in table_lines if not re.match(r"^\s*\|\s*-+", row)]
176
  table_block = "\n".join(clean_rows).strip()
177
  print(f"Parsed excel as excel_result: {table_block}")
 
204
 
205
  def audio_transcriber_tool(state: AgentState) -> AgentState:
206
  """
207
+ LangGraph tool for transcribing audio via OpenAI's Whisper API.
208
  Expects: state["audio_path"] to be either:
209
  • A local file path (e.g. "./hf_files/abc.mp3"), OR
210
  • A Task ID (e.g. "abc123"), in which case we try downloading
211
  GET {DEFAULT_API_URL}/files/{task_id} with .mp3, .wav, .m4a extensions.
 
212
  Returns:
213
  {
214
  "audio_path": None,
215
  "transcript": "<text or error message>"
216
  }
217
+ Always attempts to download the file for the given path or task ID.
218
  """
219
  path_or_id = state.get("audio_path", "")
220
  if not path_or_id:
221
  return {}
222
 
223
+ # Always attempt to download the file, regardless of local existence
224
+ local_audio = ""
225
+ for ext in ("mp3", "wav", "m4a"):
226
+ candidate = _download_file_for_task(path_or_id, ext)
227
+ if candidate:
228
+ local_audio = candidate
229
+ break
 
 
 
230
 
231
  if not local_audio or not os.path.exists(local_audio):
 
232
  return {
233
  "audio_path": None,
234
+ "transcript": "Error: No audio file found (download failed)."
235
  }
236
 
237
+ # Send to OpenAI Whisper
238
  try:
239
  openai.api_key = os.getenv("OPENAI_API_KEY")
240
  if not openai.api_key:
241
  raise RuntimeError("OPENAI_API_KEY is not set in environment.")
242
 
243
  with open(local_audio, "rb") as audio_file:
 
244
  response = openai.Audio.transcribe("whisper-1", audio_file)
 
 
 
245
  text = response.get("text", "").strip()
246
  except Exception as e:
247
  text = f"Error during transcription: {e}"