wt002 commited on
Commit
2bc5c0f
·
verified ·
1 Parent(s): 7293dcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -87
app.py CHANGED
@@ -49,6 +49,21 @@ import arxiv
49
  from transformers import pipeline as hf_pipeline # Renamed to avoid clash with main pipeline
50
  from youtube_transcript_api import YouTubeTranscriptApi
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # --- Helper function for python_execution tool ---
53
  def indent_code(code: str, indent: str = " ") -> str:
54
  """Indents multi-line code for execution within a function."""
@@ -203,9 +218,9 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
203
  reasoning = response_content[reasoning_idx + len("Reasoning:"):action_idx].strip()
204
  if reasoning.startswith('"') and reasoning.endswith('"'):
205
  reasoning = reasoning[1:-1]
206
- elif reasoning_idx != -1:
207
- reasoning = response_content[reasoning_idx + len("Reasoning:"):].strip()
208
- if reasoning.startswith('"') and reasoning.endswith('"'):
209
  reasoning = reasoning[1:-1]
210
 
211
  # Attempt to find Action and Action Input
@@ -217,27 +232,25 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
217
  action_input = response_content[action_input_idx + len("Action Input:"):].strip()
218
  else:
219
  action = response_content[action_idx + len("Action:"):].strip()
220
-
221
  if action.startswith('"') and action.endswith('"'):
222
  action = action[1:-1]
223
  if action_input.startswith('"') and action_input.endswith('"'):
224
  action_input = action_input[1:-1]
225
 
226
  # Final cleanup for any trailing JSON artifacts if heuristic grabs too much
227
- action = action.split('"', 1)[0].strip()
228
- action_input = action_input.split('"', 1)[0].strip()
229
 
230
  return reasoning, action, action_input
231
 
232
-
233
  # --- Graph Nodes ---
234
-
235
  def should_continue(state: AgentState) -> str:
236
  """
237
  Determines if the agent should continue reasoning, use a tool, or end.
238
  """
239
  print(f"DEBUG: Entering should_continue. Current context: {state.get('context', {})}")
240
-
241
  if state.get("final_answer") is not None:
242
  print("DEBUG: should_continue -> END (Final Answer set in state)")
243
  return "end"
@@ -249,12 +262,8 @@ def should_continue(state: AgentState) -> str:
249
  print("DEBUG: should_continue -> REASON (Default to reasoning)")
250
  return "reason"
251
 
252
-
253
  # ====== NEW IMPORTS ======
254
- from langchain_community.embeddings import HuggingFaceEmbeddings
255
- from langchain_community.vectorstores import FAISS # Use FAISS instead of Chroma
256
- from langchain.text_splitter import RecursiveCharacterTextSplitter
257
- from langchain_core.documents import Document
258
 
259
  # ====== DOCUMENT PROCESSING SETUP ======
260
  def create_vector_store():
@@ -265,25 +274,23 @@ def create_vector_store():
265
  Document(page_content="Python is a popular programming language created by Guido van Rossum.", metadata={"source": "tech"}),
266
  Document(page_content="The Eiffel Tower is located in Paris, France.", metadata={"source": "landmarks"}),
267
  ]
268
-
269
  # Initialize embedding model
270
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
271
-
272
  # Split documents into chunks
273
  text_splitter = RecursiveCharacterTextSplitter(
274
  chunk_size=500, # Smaller chunks for better precision
275
  chunk_overlap=100
276
  )
277
  chunks = text_splitter.split_documents(documents)
278
-
279
  # Create FAISS vector store
280
  return FAISS.from_documents(
281
  documents=chunks,
282
  embedding=embeddings
283
  )
284
 
285
-
286
-
287
  def reasoning_node(state: AgentState) -> AgentState:
288
  """
289
  Node for the agent to analyze the question, determine next steps,
@@ -297,24 +304,24 @@ def reasoning_node(state: AgentState) -> AgentState:
297
  state.setdefault("iterations", 0)
298
  state.setdefault("current_task", "Understand the question and plan the next step.")
299
  state.setdefault("current_thoughts", "")
300
-
301
- state["context"].pop("pending_action", None)
302
 
303
  # --- Initialize local HuggingFacePipeline ---
304
  # Using Mistral-7B-Instruct-v0.2 for better agent performance
305
- model_name = "mistralai/Mistral-7B-Instruct-v0.2"
306
-
307
  print(f"DEBUG: Loading local model: {model_name}...")
308
-
309
  tokenizer = AutoTokenizer.from_pretrained(model_name)
310
-
311
  # Load model with optimal settings for GPU if available, else CPU
312
  # Use bfloat16 for GPUs that support it (NVIDIA Ampere architecture and newer)
313
  # else float16 for older GPUs or float32 for CPU/fallback.
314
  # device_map="auto" intelligently distributes the model across available devices.
315
  model = AutoModelForCausalLM.from_pretrained(
316
- model_name,
317
- torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
318
  device_map="auto"
319
  )
320
 
@@ -342,19 +349,19 @@ def reasoning_node(state: AgentState) -> AgentState:
342
  # Initialize vector store if not present
343
  if "vector_store" not in state["context"]:
344
  state["context"]["vector_store"] = create_vector_store()
345
-
346
  vector_store = state["context"]["vector_store"]
347
-
348
  # Perform retrieval
349
  relevant_docs = vector_store.similarity_search(
350
- state["question"],
351
  k=3 # Retrieve top 3 most relevant chunks
352
  )
353
-
354
  # Format context for LLM
355
  rag_context = "\n\n[Relevant Knowledge]\n"
356
  rag_context += "\n---\n".join([doc.page_content for doc in relevant_docs])
357
-
358
  # ====== MODIFIED PROMPT ======
359
  # Add RAG context to system prompt
360
  system_prompt = (
@@ -370,33 +377,43 @@ def reasoning_node(state: AgentState) -> AgentState:
370
  "- Use **document_qa** when the question explicitly refers to a specific document or when you have content to query. Input format: 'document_text||question'.\n"
371
  "- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named '_result_value' (e.g., '_result_value = 1 + 1').\n"
372
  "- Use **transcript_video** for any question involving video or audio content (e.g., YouTube). Provide the full YouTube URL or video ID.\n\n"
373
- "**Retrieved Context:**\n{rag_context}\n\n" # ADDED RAG CONTEXT
 
374
  "**Current Context:**\n{context}\n\n"
375
  "**Previous Reasoning Steps:**\n{reasoning}\n\n"
376
  "**Current Task:** {current_task}\n"
377
  "**Current Thoughts:** {current_thoughts}\n\n"
378
- # ... [rest of prompt remains same] ...
 
 
 
 
 
 
 
 
 
379
  )
380
 
381
  prompt = ChatPromptTemplate.from_messages([
382
  SystemMessage(content=system_prompt),
383
- *state["history"]
384
  ])
385
 
386
  formatted_messages = prompt.format_messages(
387
- rag_context=rag_context, # ADD THIS ARGUMENT
388
  context=state["context"],
389
  reasoning=state["reasoning"],
390
  question=state["question"],
391
  current_task=state["current_task"],
392
  current_thoughts=state["current_thoughts"]
393
  )
394
-
395
  # Use tokenizer's chat template for optimal formatting with chat models
396
  try:
397
  full_input_string = tokenizer.apply_chat_template(
398
- formatted_messages,
399
- tokenize=False,
400
  add_generation_prompt=True # Adds the assistant's turn start token
401
  )
402
  except Exception as e:
@@ -406,47 +423,50 @@ def reasoning_node(state: AgentState) -> AgentState:
406
  def call_with_retry_local(inputs, retries=3): # Reduced retries for local models as network isn't primary issue
407
  for attempt in range(retries):
408
  try:
409
- response_text = llm.invoke(inputs)
410
-
411
  # Strip the prompt from the generated text
412
  if response_text.startswith(inputs):
413
  content = response_text[len(inputs):].strip()
414
  else:
415
- content = response_text.strip()
416
 
417
  print(f"DEBUG: RAW LOCAL LLM Response (Attempt {attempt+1}):\n---\n{content}\n---")
418
-
419
  # Attempt to parse to validate structure
420
- json.loads(content)
421
-
422
  return AIMessage(content=content)
423
  except json.JSONDecodeError as e:
424
  print(f"[Retry {attempt+1}/{retries}] Local LLM returned invalid JSON. Error: {e}. Retrying...")
425
  print(f"Invalid JSON content (partial): {content[:200]}...")
426
  state["history"].append(AIMessage(content=f"[Parsing Error] The previous LLM output was not valid JSON. Expected format: ```json{{\"Reasoning\": \"...\", \"Action\": \"...\", \"Action Input\": \"...\"}}```. Please ensure your response is ONLY valid JSON and strictly follows the format. Error: {e}"))
427
- time.sleep(5)
428
- except Exception as e:
429
  print(f"[Retry {attempt+1}/{retries}] An unexpected error occurred during local LLM call: {e}.")
430
  state["history"].append(AIMessage(content=f"[Local LLM Error] Failed to get a response from the local LLM: {e}. Trying again."))
431
- time.sleep(10)
 
432
  raise RuntimeError("Failed after multiple retries due to local Hugging Face model issues or invalid JSON.")
433
 
434
- response = call_with_retry_local(full_input_string)
435
 
436
  content = response.content
437
  reasoning, action, action_input = parse_agent_response(content)
438
-
439
  print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
440
 
441
- if isinstance(response, AIMessage) and content == response.content:
442
- state["history"].append(AIMessage(content=content))
443
-
444
  state["reasoning"] += f"\nStep {state['iterations'] + 1}: {reasoning}"
445
  state["iterations"] += 1
446
- state["current_thoughts"] = reasoning
447
 
448
- if "final answer" in action.lower():
449
- state["final_answer"] = action_input
 
 
450
  else:
451
  state["context"]["pending_action"] = {
452
  "tool": action,
@@ -454,11 +474,9 @@ def reasoning_node(state: AgentState) -> AgentState:
454
  }
455
  state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
456
 
457
-
458
  print(f"DEBUG: Exiting reasoning_node. New history length: {len(state['history'])}")
459
  return state
460
 
461
-
462
  def tool_node(state: AgentState) -> AgentState:
463
  """
464
  Node for executing the chosen tool and returning its output.
@@ -476,11 +494,11 @@ def tool_node(state: AgentState) -> AgentState:
476
  tool_name = tool_call_dict.get("tool")
477
  tool_input = tool_call_dict.get("input")
478
 
479
- if not tool_name or tool_input is None:
480
  error_message = f"[Tool Error] Invalid action request from LLM: Tool name '{tool_name}' or input '{tool_input}' was empty or None. LLM needs to provide valid 'Action' and 'Action Input'."
481
- print(f"ERROR: {error_message}")
482
  state["history"].append(AIMessage(content=error_message))
483
- state["context"].pop("pending_action", None)
484
  return state
485
 
486
  available_tools = state.get("tools", [])
@@ -493,27 +511,26 @@ def tool_node(state: AgentState) -> AgentState:
493
  try:
494
  print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
495
  tool_output = tool_fn.run(tool_input)
496
- if not tool_output and tool_output is not False:
497
  tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
498
  except Exception as e:
499
  tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
500
  print(f"ERROR: {tool_output}")
501
 
502
  state["history"].append(AIMessage(content=f"[{tool_name} output]\n{tool_output}"))
503
-
504
  print(f"DEBUG: Exiting tool_node. Tool output added to history. New history length: {len(state['history'])}")
505
  return state
506
 
507
-
508
  # ====== Agent Graph ======
509
  def create_agent_workflow(tools: List[BaseTool]): # Use BaseTool for consistency
510
  workflow = StateGraph(AgentState)
511
-
512
  workflow.add_node("reason", reasoning_node)
513
  workflow.add_node("action", tool_node)
514
-
515
  workflow.set_entry_point("reason")
516
-
517
  workflow.add_conditional_edges(
518
  "reason",
519
  should_continue,
@@ -523,13 +540,12 @@ def create_agent_workflow(tools: List[BaseTool]): # Use BaseTool for consistency
523
  "end": END
524
  }
525
  )
526
-
527
  workflow.add_edge("action", "reason")
528
-
529
  app = workflow.compile()
530
  return app
531
 
532
-
533
  # ====== Agent Interface ======
534
  class BasicAgent:
535
  def __init__(self):
@@ -540,13 +556,13 @@ class BasicAgent:
540
  arxiv_search,
541
  document_qa,
542
  python_execution,
543
- VideoTranscriptionTool()
544
  ]
545
-
546
  # Pre-initialize RAG vector store
547
  self.vector_store = create_vector_store()
548
  self.workflow = create_agent_workflow(self.tools)
549
-
550
  def __call__(self, question: str) -> str:
551
  print(f"\n--- Agent received question: {question[:50]}{'...' if len(question) > 50 else ''} ---")
552
 
@@ -561,23 +577,34 @@ class BasicAgent:
561
  "final_answer": None,
562
  "current_task": "Understand the question and plan the next step.",
563
  "current_thoughts": "",
564
- "tools": self.tools
565
  }
566
 
567
- final_state = self.workflow.invoke(state)
568
-
569
- if final_state.get("final_answer") is not None:
570
- answer = final_state["final_answer"]
571
- print(f"--- Agent returning FINAL ANSWER: {answer} ---")
572
- return answer
573
- else:
574
- print(f"--- ERROR: Agent finished without setting 'final_answer' for question: {question} ---")
575
- if final_state["history"]:
576
- last_message = final_state["history"][-1].content
577
- print(f"Last message in history: {last_message}")
578
- return f"Agent could not fully answer. Last message: {last_message}"
 
579
  else:
580
- raise ValueError("Agent finished without providing a final answer and no history messages.")
 
 
 
 
 
 
 
 
 
 
581
 
582
 
583
 
 
49
  from transformers import pipeline as hf_pipeline # Renamed to avoid clash with main pipeline
50
  from youtube_transcript_api import YouTubeTranscriptApi
51
 
52
+ from typing import List, Literal, TypedDict
53
+
54
+ import torch
55
+ from langchain_core.messages import AIMessage, HumanMessage
56
+ from langchain_core.tools import BaseTool
57
+ from langchain_community.embeddings import HuggingFaceEmbeddings
58
+ from langchain_community.vectorstores import FAISS
59
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
60
+ from langchain_core.documents import Document
61
+ from langchain.chains.Youtubeing import load_qa_chain
62
+ from langchain_community.llms import HuggingFacePipeline
63
+ from langchain.prompts import ChatPromptTemplate, SystemMessage
64
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
65
+ from langgraph.graph import END, StateGraph
66
+
67
  # --- Helper function for python_execution tool ---
68
  def indent_code(code: str, indent: str = " ") -> str:
69
  """Indents multi-line code for execution within a function."""
 
218
  reasoning = response_content[reasoning_idx + len("Reasoning:"):action_idx].strip()
219
  if reasoning.startswith('"') and reasoning.endswith('"'):
220
  reasoning = reasoning[1:-1]
221
+ elif reasoning_idx != -1:
222
+ reasoning = response_content[reasoning_idx + len("Reasoning:"):].strip()
223
+ if reasoning.startswith('"') and reasoning.endswith('"'):
224
  reasoning = reasoning[1:-1]
225
 
226
  # Attempt to find Action and Action Input
 
232
  action_input = response_content[action_input_idx + len("Action Input:"):].strip()
233
  else:
234
  action = response_content[action_idx + len("Action:"):].strip()
235
+
236
  if action.startswith('"') and action.endswith('"'):
237
  action = action[1:-1]
238
  if action_input.startswith('"') and action_input.endswith('"'):
239
  action_input = action_input[1:-1]
240
 
241
  # Final cleanup for any trailing JSON artifacts if heuristic grabs too much
242
+ action = action.split('"', 1)[0].strip()
243
+ action_input = action_input.split('"', 1)[0].strip()
244
 
245
  return reasoning, action, action_input
246
 
 
247
  # --- Graph Nodes ---
 
248
  def should_continue(state: AgentState) -> str:
249
  """
250
  Determines if the agent should continue reasoning, use a tool, or end.
251
  """
252
  print(f"DEBUG: Entering should_continue. Current context: {state.get('context', {})}")
253
+
254
  if state.get("final_answer") is not None:
255
  print("DEBUG: should_continue -> END (Final Answer set in state)")
256
  return "end"
 
262
  print("DEBUG: should_continue -> REASON (Default to reasoning)")
263
  return "reason"
264
 
 
265
  # ====== NEW IMPORTS ======
266
+ # Already included at the top.
 
 
 
267
 
268
  # ====== DOCUMENT PROCESSING SETUP ======
269
  def create_vector_store():
 
274
  Document(page_content="Python is a popular programming language created by Guido van Rossum.", metadata={"source": "tech"}),
275
  Document(page_content="The Eiffel Tower is located in Paris, France.", metadata={"source": "landmarks"}),
276
  ]
277
+
278
  # Initialize embedding model
279
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
280
+
281
  # Split documents into chunks
282
  text_splitter = RecursiveCharacterTextSplitter(
283
  chunk_size=500, # Smaller chunks for better precision
284
  chunk_overlap=100
285
  )
286
  chunks = text_splitter.split_documents(documents)
287
+
288
  # Create FAISS vector store
289
  return FAISS.from_documents(
290
  documents=chunks,
291
  embedding=embeddings
292
  )
293
 
 
 
294
  def reasoning_node(state: AgentState) -> AgentState:
295
  """
296
  Node for the agent to analyze the question, determine next steps,
 
304
  state.setdefault("iterations", 0)
305
  state.setdefault("current_task", "Understand the question and plan the next step.")
306
  state.setdefault("current_thoughts", "")
307
+
308
+ state["context"].pop("pending_action", None)
309
 
310
  # --- Initialize local HuggingFacePipeline ---
311
  # Using Mistral-7B-Instruct-v0.2 for better agent performance
312
+ model_name = "mistralai/Mistral-7B-Instruct-v0.2"
313
+
314
  print(f"DEBUG: Loading local model: {model_name}...")
315
+
316
  tokenizer = AutoTokenizer.from_pretrained(model_name)
317
+
318
  # Load model with optimal settings for GPU if available, else CPU
319
  # Use bfloat16 for GPUs that support it (NVIDIA Ampere architecture and newer)
320
  # else float16 for older GPUs or float32 for CPU/fallback.
321
  # device_map="auto" intelligently distributes the model across available devices.
322
  model = AutoModelForCausalLM.from_pretrained(
323
+ model_name,
324
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
325
  device_map="auto"
326
  )
327
 
 
349
  # Initialize vector store if not present
350
  if "vector_store" not in state["context"]:
351
  state["context"]["vector_store"] = create_vector_store()
352
+
353
  vector_store = state["context"]["vector_store"]
354
+
355
  # Perform retrieval
356
  relevant_docs = vector_store.similarity_search(
357
+ state["question"],
358
  k=3 # Retrieve top 3 most relevant chunks
359
  )
360
+
361
  # Format context for LLM
362
  rag_context = "\n\n[Relevant Knowledge]\n"
363
  rag_context += "\n---\n".join([doc.page_content for doc in relevant_docs])
364
+
365
  # ====== MODIFIED PROMPT ======
366
  # Add RAG context to system prompt
367
  system_prompt = (
 
377
  "- Use **document_qa** when the question explicitly refers to a specific document or when you have content to query. Input format: 'document_text||question'.\n"
378
  "- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named '_result_value' (e.g., '_result_value = 1 + 1').\n"
379
  "- Use **transcript_video** for any question involving video or audio content (e.g., YouTube). Provide the full YouTube URL or video ID.\n\n"
380
+ "**To provide a final answer, use the Action 'final answer' with the complete answer in 'Action Input'.**\n\n" # Added explicit instruction
381
+ "**Retrieved Context:**\n{rag_context}\n\n"
382
  "**Current Context:**\n{context}\n\n"
383
  "**Previous Reasoning Steps:**\n{reasoning}\n\n"
384
  "**Current Task:** {current_task}\n"
385
  "**Current Thoughts:** {current_thoughts}\n\n"
386
+ "**Question:** {question}\n\n"
387
+ "**Expected JSON Output Format:**\n"
388
+ "```json\n"
389
+ "{\n"
390
+ " \"Reasoning\": \"Your reasoning process to decide the next step, including why a tool is chosen or how an answer is derived.\",\n"
391
+ " \"Action\": \"The name of the tool to use (e.g., duckduckgo_search, final answer), or 'No Action' if no tool is needed yet.\",\n"
392
+ " \"Action Input\": \"The input for the tool (e.g., 'What is the capital of France?', 'The final answer is Paris.').\"\n"
393
+ "}\n"
394
+ "```\n"
395
+ "Ensure your response is ONLY valid JSON and strictly follows this format."
396
  )
397
 
398
  prompt = ChatPromptTemplate.from_messages([
399
  SystemMessage(content=system_prompt),
400
+ *state["history"]
401
  ])
402
 
403
  formatted_messages = prompt.format_messages(
404
+ rag_context=rag_context,
405
  context=state["context"],
406
  reasoning=state["reasoning"],
407
  question=state["question"],
408
  current_task=state["current_task"],
409
  current_thoughts=state["current_thoughts"]
410
  )
411
+
412
  # Use tokenizer's chat template for optimal formatting with chat models
413
  try:
414
  full_input_string = tokenizer.apply_chat_template(
415
+ formatted_messages,
416
+ tokenize=False,
417
  add_generation_prompt=True # Adds the assistant's turn start token
418
  )
419
  except Exception as e:
 
423
  def call_with_retry_local(inputs, retries=3): # Reduced retries for local models as network isn't primary issue
424
  for attempt in range(retries):
425
  try:
426
+ response_text = llm.invoke(inputs)
427
+
428
  # Strip the prompt from the generated text
429
  if response_text.startswith(inputs):
430
  content = response_text[len(inputs):].strip()
431
  else:
432
+ content = response_text.strip()
433
 
434
  print(f"DEBUG: RAW LOCAL LLM Response (Attempt {attempt+1}):\n---\n{content}\n---")
435
+
436
  # Attempt to parse to validate structure
437
+ json.loads(content)
438
+
439
  return AIMessage(content=content)
440
  except json.JSONDecodeError as e:
441
  print(f"[Retry {attempt+1}/{retries}] Local LLM returned invalid JSON. Error: {e}. Retrying...")
442
  print(f"Invalid JSON content (partial): {content[:200]}...")
443
  state["history"].append(AIMessage(content=f"[Parsing Error] The previous LLM output was not valid JSON. Expected format: ```json{{\"Reasoning\": \"...\", \"Action\": \"...\", \"Action Input\": \"...\"}}```. Please ensure your response is ONLY valid JSON and strictly follows the format. Error: {e}"))
444
+ time.sleep(5)
445
+ except Exception as e:
446
  print(f"[Retry {attempt+1}/{retries}] An unexpected error occurred during local LLM call: {e}.")
447
  state["history"].append(AIMessage(content=f"[Local LLM Error] Failed to get a response from the local LLM: {e}. Trying again."))
448
+ time.sleep(10)
449
+
450
  raise RuntimeError("Failed after multiple retries due to local Hugging Face model issues or invalid JSON.")
451
 
452
+ response = call_with_retry_local(full_input_string)
453
 
454
  content = response.content
455
  reasoning, action, action_input = parse_agent_response(content)
456
+
457
  print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
458
 
459
+ if isinstance(response, AIMessage) and content == response.content:
460
+ state["history"].append(AIMessage(content=content))
461
+
462
  state["reasoning"] += f"\nStep {state['iterations'] + 1}: {reasoning}"
463
  state["iterations"] += 1
464
+ state["current_thoughts"] = reasoning
465
 
466
+ # --- FIX: Set final_answer directly if the action is "final answer" ---
467
+ if action.lower() == "final answer":
468
+ state["final_answer"] = action_input
469
+ print(f"DEBUG: Final answer set in state: {state['final_answer']}")
470
  else:
471
  state["context"]["pending_action"] = {
472
  "tool": action,
 
474
  }
475
  state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
476
 
 
477
  print(f"DEBUG: Exiting reasoning_node. New history length: {len(state['history'])}")
478
  return state
479
 
 
480
  def tool_node(state: AgentState) -> AgentState:
481
  """
482
  Node for executing the chosen tool and returning its output.
 
494
  tool_name = tool_call_dict.get("tool")
495
  tool_input = tool_call_dict.get("input")
496
 
497
+ if not tool_name or tool_input is None:
498
  error_message = f"[Tool Error] Invalid action request from LLM: Tool name '{tool_name}' or input '{tool_input}' was empty or None. LLM needs to provide valid 'Action' and 'Action Input'."
499
+ print(f"ERROR: {error_message}")
500
  state["history"].append(AIMessage(content=error_message))
501
+ state["context"].pop("pending_action", None) # Clear invalid pending action
502
  return state
503
 
504
  available_tools = state.get("tools", [])
 
511
  try:
512
  print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
513
  tool_output = tool_fn.run(tool_input)
514
+ if not tool_output and tool_output is not False:
515
  tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
516
  except Exception as e:
517
  tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
518
  print(f"ERROR: {tool_output}")
519
 
520
  state["history"].append(AIMessage(content=f"[{tool_name} output]\n{tool_output}"))
521
+
522
  print(f"DEBUG: Exiting tool_node. Tool output added to history. New history length: {len(state['history'])}")
523
  return state
524
 
 
525
  # ====== Agent Graph ======
526
  def create_agent_workflow(tools: List[BaseTool]): # Use BaseTool for consistency
527
  workflow = StateGraph(AgentState)
528
+
529
  workflow.add_node("reason", reasoning_node)
530
  workflow.add_node("action", tool_node)
531
+
532
  workflow.set_entry_point("reason")
533
+
534
  workflow.add_conditional_edges(
535
  "reason",
536
  should_continue,
 
540
  "end": END
541
  }
542
  )
543
+
544
  workflow.add_edge("action", "reason")
545
+
546
  app = workflow.compile()
547
  return app
548
 
 
549
  # ====== Agent Interface ======
550
  class BasicAgent:
551
  def __init__(self):
 
556
  arxiv_search,
557
  document_qa,
558
  python_execution,
559
+ VideoTranscriptionTool()
560
  ]
561
+
562
  # Pre-initialize RAG vector store
563
  self.vector_store = create_vector_store()
564
  self.workflow = create_agent_workflow(self.tools)
565
+
566
  def __call__(self, question: str) -> str:
567
  print(f"\n--- Agent received question: {question[:50]}{'...' if len(question) > 50 else ''} ---")
568
 
 
577
  "final_answer": None,
578
  "current_task": "Understand the question and plan the next step.",
579
  "current_thoughts": "",
580
+ "tools": self.tools
581
  }
582
 
583
+ # The invoke method returns an iterator, so we need to consume it to get the final state
584
+ # LangGraph's invoke will run until the graph reaches an END node.
585
+ try:
586
+ # Running the graph
587
+ # Note: For simple single-path graphs, `invoke` often gives the final state directly.
588
+ # For more complex graphs, streaming with `stream` and then getting the final state
589
+ # might be more appropriate if you need intermediate steps.
590
+ final_state = self.workflow.invoke(state)
591
+
592
+ if final_state.get("final_answer") is not None:
593
+ answer = final_state["final_answer"]
594
+ print(f"--- Agent returning FINAL ANSWER: {answer} ---")
595
+ return answer
596
  else:
597
+ print(f"--- ERROR: Agent finished without setting 'final_answer' for question: {question} ---")
598
+ if final_state["history"]:
599
+ last_message = final_state["history"][-1].content
600
+ print(f"Last message in history: {last_message}")
601
+ return f"Agent could not fully answer. Last message: {last_message}"
602
+ else:
603
+ raise ValueError("Agent finished without providing a final answer and no history messages.")
604
+ except Exception as e:
605
+ print(f"--- FATAL ERROR during agent execution: {e} ---")
606
+ # In case of an unexpected error, return a helpful message
607
+ return f"An unexpected error occurred during agent execution: {str(e)}"
608
 
609
 
610