wt002 commited on
Commit
3ddca4e
·
verified ·
1 Parent(s): 93451f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -114
app.py CHANGED
@@ -157,43 +157,37 @@ from langchain.schema import HumanMessage, AIMessage, SystemMessage
157
  from langchain.prompts import ChatPromptTemplate
158
  from langgraph.graph import StateGraph, END
159
  from google.api_core.exceptions import ResourceExhausted
 
160
 
161
  # Assume these tools are defined elsewhere and imported
162
  # Placeholder for your actual tool implementations
163
- # For example:
164
- # from your_tools_module import duckduckgo_search, wikipedia_search, arxiv_search, document_qa, python_execution
165
- # And ensure you have a proper VideoTranscriptionTool
166
  def duckduckgo_search(query: str) -> str:
167
  """Performs a DuckDuckGo search for current events or general facts."""
168
- # Placeholder for actual implementation
169
  print(f"DEBUG: duckduckgo_search called with: {query}")
170
  return f"Search result for '{query}': Example relevant information from web."
171
 
172
  def wikipedia_search(query: str) -> str:
173
  """Searches Wikipedia for encyclopedic information."""
174
- # Placeholder for actual implementation
175
  print(f"DEBUG: wikipedia_search called with: {query}")
176
  return f"Wikipedia result for '{query}': Found detailed article."
177
 
178
  def arxiv_search(query: str) -> str:
179
  """Searches ArXiv for scientific preprints and papers."""
180
- # Placeholder for actual implementation
181
  print(f"DEBUG: arxiv_search called with: {query}")
182
  return f"ArXiv result for '{query}': Found relevant research paper."
183
 
184
  def document_qa(document_path: str, question: str) -> str:
185
  """Answers questions based on the content of a given document file (PDF, DOCX, TXT)."""
186
- # Placeholder for actual implementation
187
  print(f"DEBUG: document_qa called with: {document_path}, question: {question}")
188
  return f"Document QA result for '{question}': Answer extracted from document."
189
 
190
  def python_execution(code: str) -> str:
191
  """Executes Python code in a sandboxed environment for calculations or data manipulation."""
192
- # Placeholder for actual implementation - IMPORTANT: Implement this securely!
193
- # Example (UNSAFE for real use without proper sandboxing):
194
  try:
195
  exec_globals = {}
196
  exec_locals = {}
 
 
197
  exec(code, exec_globals, exec_locals)
198
  return str(exec_locals.get('result', 'Code executed, no explicit result assigned to "result" variable.'))
199
  except Exception as e:
@@ -202,7 +196,6 @@ def python_execution(code: str) -> str:
202
  class VideoTranscriptionTool:
203
  """Transcribes and analyzes video content from a URL or ID."""
204
  def __call__(self, video_id_or_url: str) -> str:
205
- # Placeholder for actual implementation using youtube-transcript-api etc.
206
  print(f"DEBUG: VideoTranscriptionTool called with: {video_id_or_url}")
207
  return f"Video transcription/analysis result for '{video_id_or_url}': Summary of video content."
208
 
@@ -210,13 +203,15 @@ class VideoTranscriptionTool:
210
  # --- Agent State Definition ---
211
  class AgentState(TypedDict):
212
  question: str
213
- history: List[Union[HumanMessage, AIMessage, Dict[str, Any]]] # Allows for tool calls as dicts
214
- context: Dict[str, Any]
215
  reasoning: str
216
  iterations: int
217
  final_answer: Union[str, float, int, None]
218
- current_task: str # Added for more focused reasoning
219
- current_thoughts: str # Added for more focused reasoning
 
 
220
 
221
  # --- Utility Functions ---
222
  def parse_agent_response(response_content: str) -> tuple[str, str, str]:
@@ -230,9 +225,8 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
230
  action_input = response_json.get("Action Input", "").strip()
231
  return reasoning, action, action_input
232
  except json.JSONDecodeError:
233
- # Fallback for when LLM doesn't return perfect JSON (less likely with good prompt)
234
  print(f"WARNING: LLM response not perfectly JSON: {response_content[:200]}...")
235
- # Attempt heuristic parsing as a last resort
236
  reasoning_match = response_content.split("Reasoning:", 1)
237
  reasoning = reasoning_match[1].split("Action:", 1)[0].strip() if len(reasoning_match) > 1 else ""
238
 
@@ -251,29 +245,19 @@ def should_continue(state: AgentState) -> str:
251
  """
252
  Determines if the agent should continue reasoning, use a tool, or end.
253
  """
254
- history = state.get("history", [])
255
-
256
- # Check for final answer in the last AIMessage
257
- if history and isinstance(history[-1], AIMessage) and "FINAL ANSWER:" in history[-1].content:
258
- print("DEBUG: should_continue -> END (Final Answer detected)")
259
- return "end"
260
 
261
- # Check if a tool was just executed (its output is in history)
262
- # and the next step should be reasoning over that output
263
- for msg in reversed(history):
264
- if isinstance(msg, AIMessage) and any(f"[{tool.name} output]" in msg.content for tool in state.get("tools", [])):
265
- print("DEBUG: should_continue -> REASON (Tool output detected, need to process)")
266
- return "reason"
267
-
268
- # Check if there's an action request to be executed
269
- # This happens *after* reasoning has determined a tool is needed,
270
- # but *before* the tool has run.
271
- for msg in reversed(history):
272
- if isinstance(msg, dict) and msg.get("type") == "action_request":
273
- print("DEBUG: should_continue -> ACTION (Action request pending)")
274
- return "action"
275
-
276
- # If nothing else, assume we need to reason
277
  print("DEBUG: should_continue -> REASON (Default to reasoning)")
278
  return "reason"
279
 
@@ -291,21 +275,20 @@ def reasoning_node(state: AgentState) -> AgentState:
291
  if not GOOGLE_API_KEY:
292
  raise ValueError("GOOGLE_API_KEY not set in environment variables.")
293
 
294
- # Ensure history is well-formed for the LLM prompt
295
- if "history" not in state or not isinstance(state["history"], list):
296
- state["history"] = []
297
-
298
  # Initialize/update state fields
299
  state.setdefault("context", {})
300
  state.setdefault("reasoning", "")
301
  state.setdefault("iterations", 0)
302
  state.setdefault("current_task", "Understand the question and plan the next step.")
303
  state.setdefault("current_thoughts", "")
 
 
 
304
 
305
  # Create Gemini model wrapper
306
  llm = ChatGoogleGenerativeAI(
307
- model="gemini-1.5-flash", # Use a fast model for agentic loops
308
- temperature=0.1, # Keep it low for more deterministic reasoning
309
  google_api_key=GOOGLE_API_KEY
310
  )
311
 
@@ -314,7 +297,6 @@ def reasoning_node(state: AgentState) -> AgentState:
314
  f"- **{t.name}**: {t.description}" for t in state.get("tools", [])
315
  ])
316
 
317
- # Craft a more robust and explicit system prompt
318
  system_prompt = (
319
  "You are an expert problem solver, designed to provide concise and accurate answers. "
320
  "Your process involves analyzing the question, intelligently selecting and using tools, "
@@ -326,8 +308,8 @@ def reasoning_node(state: AgentState) -> AgentState:
326
  "- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics.\n"
327
  "- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information.\n"
328
  "- Use **document_qa** when the question explicitly refers to a specific document file (e.g., 'Analyze this PDF').\n"
329
- "- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code.\n"
330
- "- Use **VideoTranscriptionTool** for any question involving video or audio content.\n\n"
331
  "**Current Context:**\n{context}\n\n"
332
  "**Previous Reasoning Steps:**\n{reasoning}\n\n"
333
  "**Current Task:** {current_task}\n"
@@ -340,6 +322,8 @@ def reasoning_node(state: AgentState) -> AgentState:
340
  " \"Action Input\": \"[Input for the selected tool OR the final response]\"\n"
341
  "}\n"
342
  "```\n"
 
 
343
  "Ensure 'Action Input' is appropriate for the chosen 'Action'. If 'Action' is 'Final Answer', provide the complete, concise answer."
344
  )
345
 
@@ -350,13 +334,12 @@ def reasoning_node(state: AgentState) -> AgentState:
350
 
351
  chain = prompt | llm
352
 
353
- # === Add Retry Logic ===
354
  def call_with_retry(inputs, retries=3, delay=60):
355
  for attempt in range(retries):
356
  try:
357
  response = chain.invoke(inputs)
358
  # Attempt to parse immediately to catch bad JSON before returning
359
- parse_agent_response(response.content)
360
  return response
361
  except ResourceExhausted as e:
362
  print(f"[Retry {attempt+1}/{retries}] Gemini rate limit hit. Waiting {delay}s...")
@@ -370,7 +353,6 @@ def reasoning_node(state: AgentState) -> AgentState:
370
  time.sleep(delay)
371
  raise RuntimeError("Failed after multiple retries due to Gemini quota limit or invalid JSON.")
372
 
373
- # Call model with retry protection
374
  response = call_with_retry({
375
  "context": state["context"],
376
  "reasoning": state["reasoning"],
@@ -379,12 +361,11 @@ def reasoning_node(state: AgentState) -> AgentState:
379
  "current_thoughts": state["current_thoughts"]
380
  })
381
 
382
- # Parse output using the robust JSON parser
383
  content = response.content
384
  reasoning, action, action_input = parse_agent_response(content)
385
 
386
- print(f"DEBUG: LLM Response Content: {content[:200]}...")
387
- print(f"DEBUG: Parsed Action: {action}, Action Input: {action_input[:100]}...")
388
 
389
  # Update state
390
  state["history"].append(AIMessage(content=content)) # Store the raw LLM response
@@ -393,50 +374,54 @@ def reasoning_node(state: AgentState) -> AgentState:
393
  state["current_thoughts"] = reasoning # Update current thoughts for next iteration
394
 
395
  if "final answer" in action.lower():
396
- state["history"].append(AIMessage(content=f"FINAL ANSWER: {action_input}"))
397
  state["final_answer"] = action_input # Set final answer directly in state
 
398
  else:
399
- # Store the action request in history for tool_node
400
- state["history"].append({
401
- "type": "action_request",
402
  "tool": action,
403
  "input": action_input
404
- })
 
 
 
405
 
406
  print(f"DEBUG: Exiting reasoning_node. New history length: {len(state['history'])}")
407
  return state
408
 
409
 
410
  def tool_node(state: AgentState) -> AgentState:
411
- # ... (previous code)
 
 
 
412
 
413
- tool_call_dict = None
414
- for msg in reversed(state["history"]):
415
- if isinstance(msg, dict) and msg.get("type") == "action_request":
416
- tool_call_dict = msg
417
- break
418
 
419
  if not tool_call_dict:
420
- print("WARNING: No action_request found in history, skipping tool execution.")
421
- return state # Or raise a more specific error if this truly shouldn't happen
 
 
 
 
422
 
423
  tool_name = tool_call_dict.get("tool")
424
  tool_input = tool_call_dict.get("input")
425
 
426
- # --- ADD THIS DEBUG PRINT ---
427
- print(f"DEBUG: tool_node received action_request: tool='{tool_name}', input='{tool_input[:100]}...'")
428
- # --- END DEBUG PRINT ---
429
-
430
- if not tool_name or tool_input is None: # tool_input can be empty string for some tools, but not None
431
- print(f"ERROR: Invalid tool call in action_request. Tool name: '{tool_name}', Input: '{tool_input}'")
432
- # Instead of raising directly, you might want to send this back to reasoning
433
- # Or provide a specific error message as tool output
434
- state["history"].append(AIMessage(content=f"[Tool Error] Invalid tool call: Tool name '{tool_name}' or input was empty. LLM needs to provide valid action."))
435
  return state
436
 
437
  # Look up and invoke the tool from the state's tool list
438
  available_tools = state.get("tools", [])
439
- tool_fn = next((t for t in available_tools if t.name == tool_name), None) # Assuming tools are LangChain Tool objects now
440
 
441
  if tool_fn is None:
442
  # Fallback for unrecognized tool - feedback to LLM
@@ -445,17 +430,14 @@ def tool_node(state: AgentState) -> AgentState:
445
  else:
446
  try:
447
  print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
448
- tool_output = tool_fn.run(tool_input) # Assuming tool.run() method for LangChain Tools
449
- if not tool_output: # Handle empty tool output
450
  tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
451
  except Exception as e:
452
  tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
453
  print(f"ERROR: {tool_output}")
454
 
455
- # Add output to history as an AIMessage
456
- # Ensure the history only contains HumanMessage and AIMessage objects for LangGraph's internal processing.
457
- # The action_request dict can be removed or transformed if it's no longer needed for internal state.
458
- # For now, we'll just add the tool output.
459
  state["history"].append(AIMessage(content=f"[{tool_name} output]\n{tool_output}"))
460
 
461
  print(f"DEBUG: Exiting tool_node. Tool output added to history. New history length: {len(state['history'])}")
@@ -463,7 +445,7 @@ def tool_node(state: AgentState) -> AgentState:
463
 
464
 
465
  # ====== Agent Graph ======
466
- def create_agent_workflow(tools: List[Any]): # tools are passed in now
467
  workflow = StateGraph(AgentState)
468
 
469
  # Define nodes
@@ -478,43 +460,34 @@ def create_agent_workflow(tools: List[Any]): # tools are passed in now
478
  "reason",
479
  should_continue,
480
  {
481
- "action": "action", # Go to action node if a tool is requested
482
- "reason": "reason", # Loop back to reason if more thinking is needed
483
- "end": END # End if final answer detected
484
  }
485
  )
486
 
487
- workflow.add_edge("action", "reason") # Always go back to reasoning after a tool action
488
 
489
- # Compile the graph
490
  app = workflow.compile()
491
-
492
- # Pass tools into the state so nodes can access them.
493
- # This is a bit of a hacky way to get them into the state, but works for now.
494
- # A cleaner way might be to make `tool_node` receive tools as a closure or directly from agent init.
495
- # For this example, we'll modify the initial state for each invocation.
496
  return app
497
 
498
 
499
  # ====== Agent Interface ======
500
  class BasicAgent:
501
  def __init__(self):
502
- # Tools need to be LangChain Tool objects for name and description
503
- from langchain.tools import Tool
504
  self.tools = [
505
  Tool(name="duckduckgo_search", func=duckduckgo_search, description="Performs a DuckDuckGo search for current events or general facts."),
506
  Tool(name="wikipedia_search", func=wikipedia_search, description="Searches Wikipedia for encyclopedic information."),
507
  Tool(name="arxiv_search", func=arxiv_search, description="Searches ArXiv for scientific preprints and papers."),
508
- Tool(name="document_qa", func=document_qa, description="Answers questions based on the content of a given document file (PDF, DOCX, TXT). Requires 'attachment_path' and 'question' as input."),
509
  Tool(name="python_execution", func=python_execution, description="Executes Python code in a sandboxed environment for complex calculations or data manipulation."),
510
  Tool(name="VideoTranscriptionTool", func=VideoTranscriptionTool(), description="Transcribes and analyzes video content from a URL or ID. Use for any question involving video or audio.")
511
  ]
512
- self.workflow = create_agent_workflow(self.tools) # Pass tools to workflow creator
513
 
514
  def __call__(self, question: str) -> str:
515
- print(f"\n--- Agent received question: {question[:50]}{'...' if len(question) > 50 else ''} ---")
516
 
517
- # Initialize state with proper structure and pass tools
518
  state = {
519
  "question": question,
520
  "context": {},
@@ -524,28 +497,22 @@ class BasicAgent:
524
  "final_answer": None,
525
  "current_task": "Understand the question and plan the next step.",
526
  "current_thoughts": "",
527
- "tools": self.tools # Pass tools into the state
528
  }
529
 
530
- # Invoke the workflow
 
531
  final_state = self.workflow.invoke(state)
532
 
533
- # Extract the FINAL ANSWER from history
534
- if final_state.get("final_answer"):
535
  answer = final_state["final_answer"]
536
  print(f"--- Agent returning FINAL ANSWER: {answer} ---")
537
  return answer
538
-
539
- # Fallback if final_answer wasn't set correctly in state
540
- for msg in reversed(final_state["history"]):
541
- if isinstance(msg, AIMessage) and "FINAL ANSWER:" in msg.content:
542
- answer = msg.content.split("FINAL ANSWER:")[1].strip()
543
- print(f"--- Agent returning FINAL ANSWER (from history): {answer} ---")
544
- return answer
545
-
546
- print(f"--- ERROR: No FINAL ANSWER found in agent history for question: {question} ---")
547
- raise ValueError("No FINAL ANSWER found in agent history.")
548
-
549
 
550
 
551
 
 
157
  from langchain.prompts import ChatPromptTemplate
158
  from langgraph.graph import StateGraph, END
159
  from google.api_core.exceptions import ResourceExhausted
160
+ from langchain.tools import Tool # Import Tool for consistent tool definitions
161
 
162
  # Assume these tools are defined elsewhere and imported
163
  # Placeholder for your actual tool implementations
 
 
 
164
  def duckduckgo_search(query: str) -> str:
165
  """Performs a DuckDuckGo search for current events or general facts."""
 
166
  print(f"DEBUG: duckduckgo_search called with: {query}")
167
  return f"Search result for '{query}': Example relevant information from web."
168
 
169
  def wikipedia_search(query: str) -> str:
170
  """Searches Wikipedia for encyclopedic information."""
 
171
  print(f"DEBUG: wikipedia_search called with: {query}")
172
  return f"Wikipedia result for '{query}': Found detailed article."
173
 
174
  def arxiv_search(query: str) -> str:
175
  """Searches ArXiv for scientific preprints and papers."""
 
176
  print(f"DEBUG: arxiv_search called with: {query}")
177
  return f"ArXiv result for '{query}': Found relevant research paper."
178
 
179
  def document_qa(document_path: str, question: str) -> str:
180
  """Answers questions based on the content of a given document file (PDF, DOCX, TXT)."""
 
181
  print(f"DEBUG: document_qa called with: {document_path}, question: {question}")
182
  return f"Document QA result for '{question}': Answer extracted from document."
183
 
184
  def python_execution(code: str) -> str:
185
  """Executes Python code in a sandboxed environment for calculations or data manipulation."""
 
 
186
  try:
187
  exec_globals = {}
188
  exec_locals = {}
189
+ # WARNING: This is a highly insecure way to execute arbitrary Python code.
190
+ # For production, use a secure, sandboxed environment (e.g., Docker container, dedicated service).
191
  exec(code, exec_globals, exec_locals)
192
  return str(exec_locals.get('result', 'Code executed, no explicit result assigned to "result" variable.'))
193
  except Exception as e:
 
196
  class VideoTranscriptionTool:
197
  """Transcribes and analyzes video content from a URL or ID."""
198
  def __call__(self, video_id_or_url: str) -> str:
 
199
  print(f"DEBUG: VideoTranscriptionTool called with: {video_id_or_url}")
200
  return f"Video transcription/analysis result for '{video_id_or_url}': Summary of video content."
201
 
 
203
  # --- Agent State Definition ---
204
  class AgentState(TypedDict):
205
  question: str
206
+ history: List[Union[HumanMessage, AIMessage]] # History only contains proper messages
207
+ context: Dict[str, Any] # Use context for internal agent state
208
  reasoning: str
209
  iterations: int
210
  final_answer: Union[str, float, int, None]
211
+ current_task: str
212
+ current_thoughts: str
213
+ tools: List[Tool] # Pass tools into state
214
+
215
 
216
  # --- Utility Functions ---
217
  def parse_agent_response(response_content: str) -> tuple[str, str, str]:
 
225
  action_input = response_json.get("Action Input", "").strip()
226
  return reasoning, action, action_input
227
  except json.JSONDecodeError:
 
228
  print(f"WARNING: LLM response not perfectly JSON: {response_content[:200]}...")
229
+ # Fallback heuristic parsing (less reliable but better than nothing)
230
  reasoning_match = response_content.split("Reasoning:", 1)
231
  reasoning = reasoning_match[1].split("Action:", 1)[0].strip() if len(reasoning_match) > 1 else ""
232
 
 
245
  """
246
  Determines if the agent should continue reasoning, use a tool, or end.
247
  """
248
+ print(f"DEBUG: Entering should_continue. Current context: {state.get('context', {})}")
 
 
 
 
 
249
 
250
+ # End if agent has produced a final answer
251
+ if state.get("final_answer"):
252
+ print("DEBUG: should_continue -> END (Final Answer set in state)")
253
+ return "end"
254
+
255
+ # Check if a tool action is pending in context
256
+ if state.get("context", {}).get("pending_action"):
257
+ print("DEBUG: should_continue -> ACTION (Pending action in context)")
258
+ return "action"
259
+
260
+ # Otherwise, go back to reasoning (e.g., after initial question, or after tool output)
 
 
 
 
 
261
  print("DEBUG: should_continue -> REASON (Default to reasoning)")
262
  return "reason"
263
 
 
275
  if not GOOGLE_API_KEY:
276
  raise ValueError("GOOGLE_API_KEY not set in environment variables.")
277
 
 
 
 
 
278
  # Initialize/update state fields
279
  state.setdefault("context", {})
280
  state.setdefault("reasoning", "")
281
  state.setdefault("iterations", 0)
282
  state.setdefault("current_task", "Understand the question and plan the next step.")
283
  state.setdefault("current_thoughts", "")
284
+
285
+ # Clear any old pending action from context before generating a new one
286
+ state["context"].pop("pending_action", None)
287
 
288
  # Create Gemini model wrapper
289
  llm = ChatGoogleGenerativeAI(
290
+ model="gemini-1.5-flash",
291
+ temperature=0.1,
292
  google_api_key=GOOGLE_API_KEY
293
  )
294
 
 
297
  f"- **{t.name}**: {t.description}" for t in state.get("tools", [])
298
  ])
299
 
 
300
  system_prompt = (
301
  "You are an expert problem solver, designed to provide concise and accurate answers. "
302
  "Your process involves analyzing the question, intelligently selecting and using tools, "
 
308
  "- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics.\n"
309
  "- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information.\n"
310
  "- Use **document_qa** when the question explicitly refers to a specific document file (e.g., 'Analyze this PDF').\n"
311
+ "- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named 'result' if applicable.\n"
312
+ "- Use **VideoTranscriptionTool** for any question involving video or audio content. Provide the full YouTube URL or video ID.\n\n"
313
  "**Current Context:**\n{context}\n\n"
314
  "**Previous Reasoning Steps:**\n{reasoning}\n\n"
315
  "**Current Task:** {current_task}\n"
 
322
  " \"Action Input\": \"[Input for the selected tool OR the final response]\"\n"
323
  "}\n"
324
  "```\n"
325
+ "**CRITICAL RULE: 'Action' and 'Action Input' MUST NOT be empty, unless 'Action' is 'Final Answer' and 'Action Input' is the conclusive response.**\n"
326
+ "If you cannot determine a suitable tool or a final answer, return Action: 'Final Answer' with a message like 'I cannot answer this question with the available tools.' or 'More information is needed.'\n"
327
  "Ensure 'Action Input' is appropriate for the chosen 'Action'. If 'Action' is 'Final Answer', provide the complete, concise answer."
328
  )
329
 
 
334
 
335
  chain = prompt | llm
336
 
 
337
  def call_with_retry(inputs, retries=3, delay=60):
338
  for attempt in range(retries):
339
  try:
340
  response = chain.invoke(inputs)
341
  # Attempt to parse immediately to catch bad JSON before returning
342
+ json.loads(response.content) # Validate JSON structure
343
  return response
344
  except ResourceExhausted as e:
345
  print(f"[Retry {attempt+1}/{retries}] Gemini rate limit hit. Waiting {delay}s...")
 
353
  time.sleep(delay)
354
  raise RuntimeError("Failed after multiple retries due to Gemini quota limit or invalid JSON.")
355
 
 
356
  response = call_with_retry({
357
  "context": state["context"],
358
  "reasoning": state["reasoning"],
 
361
  "current_thoughts": state["current_thoughts"]
362
  })
363
 
 
364
  content = response.content
365
  reasoning, action, action_input = parse_agent_response(content)
366
 
367
+ print(f"DEBUG: LLM Raw Response Content: {content[:200]}...")
368
+ print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
369
 
370
  # Update state
371
  state["history"].append(AIMessage(content=content)) # Store the raw LLM response
 
374
  state["current_thoughts"] = reasoning # Update current thoughts for next iteration
375
 
376
  if "final answer" in action.lower():
 
377
  state["final_answer"] = action_input # Set final answer directly in state
378
+ # The should_continue check will handle ending the graph based on final_answer presence
379
  else:
380
+ # Store the action request in context, not in history
381
+ state["context"]["pending_action"] = {
 
382
  "tool": action,
383
  "input": action_input
384
+ }
385
+ # Add a message to history to indicate the agent's intent for the LLM
386
+ state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
387
+
388
 
389
  print(f"DEBUG: Exiting reasoning_node. New history length: {len(state['history'])}")
390
  return state
391
 
392
 
393
  def tool_node(state: AgentState) -> AgentState:
394
+ """
395
+ Node for executing the chosen tool and returning its output.
396
+ """
397
+ print(f"DEBUG: Entering tool_node. Iteration: {state['iterations']}")
398
 
399
+ # Get the pending action from context
400
+ tool_call_dict = state["context"].pop("pending_action", None)
 
 
 
401
 
402
  if not tool_call_dict:
403
+ # This case should ideally not be reached if should_continue is robust,
404
+ # but provides a fallback.
405
+ error_message = "[Tool Error] No pending_action found in context. This indicates an issue with graph flow."
406
+ print(f"ERROR: {error_message}")
407
+ state["history"].append(AIMessage(content=error_message))
408
+ return state
409
 
410
  tool_name = tool_call_dict.get("tool")
411
  tool_input = tool_call_dict.get("input")
412
 
413
+ # Defensive check for empty tool name or input (still needed as LLM might generate empty strings)
414
+ if not tool_name or tool_input is None:
415
+ error_message = f"[Tool Error] Invalid action request from LLM: Tool name '{tool_name}' or input '{tool_input}' was empty. LLM needs to provide valid 'Action' and 'Action Input'."
416
+ print(f"ERROR: {error_message}")
417
+ state["history"].append(AIMessage(content=error_message))
418
+ # Clear any problematic pending action
419
+ state["context"].pop("pending_action", None)
 
 
420
  return state
421
 
422
  # Look up and invoke the tool from the state's tool list
423
  available_tools = state.get("tools", [])
424
+ tool_fn = next((t for t in available_tools if t.name == tool_name), None)
425
 
426
  if tool_fn is None:
427
  # Fallback for unrecognized tool - feedback to LLM
 
430
  else:
431
  try:
432
  print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
433
+ tool_output = tool_fn.run(tool_input)
434
+ if not tool_output and tool_output is not False: # Ensure 'False' is not treated as empty
435
  tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
436
  except Exception as e:
437
  tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
438
  print(f"ERROR: {tool_output}")
439
 
440
+ # Add tool output to history as an AIMessage for the LLM to process next
 
 
 
441
  state["history"].append(AIMessage(content=f"[{tool_name} output]\n{tool_output}"))
442
 
443
  print(f"DEBUG: Exiting tool_node. Tool output added to history. New history length: {len(state['history'])}")
 
445
 
446
 
447
  # ====== Agent Graph ======
448
+ def create_agent_workflow(tools: List[Tool]): # tools are passed in now
449
  workflow = StateGraph(AgentState)
450
 
451
  # Define nodes
 
460
  "reason",
461
  should_continue,
462
  {
463
+ "action": "action",
464
+ "reason": "reason",
465
+ "end": END
466
  }
467
  )
468
 
469
+ workflow.add_edge("action", "reason")
470
 
 
471
  app = workflow.compile()
 
 
 
 
 
472
  return app
473
 
474
 
475
  # ====== Agent Interface ======
476
  class BasicAgent:
477
  def __init__(self):
 
 
478
  self.tools = [
479
  Tool(name="duckduckgo_search", func=duckduckgo_search, description="Performs a DuckDuckGo search for current events or general facts."),
480
  Tool(name="wikipedia_search", func=wikipedia_search, description="Searches Wikipedia for encyclopedic information."),
481
  Tool(name="arxiv_search", func=arxiv_search, description="Searches ArXiv for scientific preprints and papers."),
482
+ Tool(name="document_qa", func=document_qa, description="Answers questions based on the content of a given document file (PDF, DOCX, TXT). Requires 'document_path' and 'question' as input."),
483
  Tool(name="python_execution", func=python_execution, description="Executes Python code in a sandboxed environment for complex calculations or data manipulation."),
484
  Tool(name="VideoTranscriptionTool", func=VideoTranscriptionTool(), description="Transcribes and analyzes video content from a URL or ID. Use for any question involving video or audio.")
485
  ]
486
+ self.workflow = create_agent_workflow(self.tools)
487
 
488
  def __call__(self, question: str) -> str:
489
+ print(f"\n--- Agent received question: {question[:80]}{'...' if len(question) > 80 else ''} ---")
490
 
 
491
  state = {
492
  "question": question,
493
  "context": {},
 
497
  "final_answer": None,
498
  "current_task": "Understand the question and plan the next step.",
499
  "current_thoughts": "",
500
+ "tools": self.tools
501
  }
502
 
503
+ # The invoke method will now return the final state, or raise an error if it hits a dead end
504
+ # LangGraph runs are synchronous by default here.
505
  final_state = self.workflow.invoke(state)
506
 
507
+ if final_state.get("final_answer") is not None:
 
508
  answer = final_state["final_answer"]
509
  print(f"--- Agent returning FINAL ANSWER: {answer} ---")
510
  return answer
511
+ else:
512
+ # This should ideally not happen if the agent is designed to always provide a final answer
513
+ # or a specific "cannot answer" message.
514
+ print(f"--- ERROR: Agent finished without setting 'final_answer' for question: {question} ---")
515
+ raise ValueError("Agent finished without providing a final answer.")
 
 
 
 
 
 
516
 
517
 
518