wt002 commited on
Commit
fdddb14
·
verified ·
1 Parent(s): 98543af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -106
app.py CHANGED
@@ -3,17 +3,24 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- #from smolagents import tool, Tool, CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpageTool, SpeechToTextTool, FinalAnswerTool
7
- from dotenv import load_dotenv
8
- import heapq
9
- from collections import Counter
10
- import re
11
- from io import BytesIO
12
- from youtube_transcript_api import YouTubeTranscriptApi
13
- from langchain_community.tools.tavily_search import TavilySearchResults
14
- from langchain_community.document_loaders import WikipediaLoader
15
- from langchain_community.utilities import WikipediaAPIWrapper
16
- from langchain_community.document_loaders import ArxivLoader
 
 
 
 
 
 
 
17
 
18
 
19
  # (Keep Constants as is)
@@ -49,17 +56,7 @@ from youtube_transcript_api import YouTubeTranscriptApi
49
 
50
  from typing import List, Literal, TypedDict
51
 
52
- import torch
53
- from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
54
- from langchain_core.tools import BaseTool
55
- from langchain_community.embeddings import HuggingFaceEmbeddings
56
- from langchain_community.vectorstores import FAISS
57
- from langchain.text_splitter import RecursiveCharacterTextSplitter
58
- from langchain_core.documents import Document
59
- from langchain_community.llms import HuggingFacePipeline
60
- from langchain.prompts import ChatPromptTemplate
61
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
62
- from langgraph.graph import END, StateGraph
63
 
64
  # --- Helper function for python_execution tool ---
65
  def indent_code(code: str, indent: str = " ") -> str:
@@ -177,6 +174,7 @@ class VideoTranscriptionTool(BaseTool):
177
  raise NotImplementedError("Async not supported for this tool.")
178
 
179
  # --- Agent State Definition ---
 
180
  class AgentState(TypedDict):
181
  question: str
182
  history: List[Union[HumanMessage, AIMessage]]
@@ -196,11 +194,19 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
196
  If JSON parsing fails, it attempts heuristic parsing.
197
  """
198
  try:
199
- response_json = json.loads(response_content)
200
- reasoning = response_json.get("Reasoning", "").strip()
201
- action = response_json.get("Action", "").strip()
202
- action_input = response_json.get("Action Input", "").strip()
203
- return reasoning, action, action_input
 
 
 
 
 
 
 
 
204
  except json.JSONDecodeError:
205
  print(f"WARNING: JSONDecodeError: LLM response was not valid JSON. Attempting heuristic parse: {response_content[:200]}...")
206
  # Heuristic parsing for non-JSON or partial JSON responses
@@ -245,13 +251,22 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
245
  def should_continue(state: AgentState) -> str:
246
  """
247
  Determines if the agent should continue reasoning, use a tool, or end.
 
248
  """
249
- print(f"DEBUG: Entering should_continue. Current context: {state.get('context', {})}")
 
250
 
251
  if state.get("final_answer") is not None:
252
  print("DEBUG: should_continue -> END (Final Answer set in state)")
253
  return "end"
254
 
 
 
 
 
 
 
 
255
  if state.get("context", {}).get("pending_action"):
256
  print("DEBUG: should_continue -> ACTION (Pending action in context)")
257
  return "action"
@@ -259,9 +274,6 @@ def should_continue(state: AgentState) -> str:
259
  print("DEBUG: should_continue -> REASON (Default to reasoning)")
260
  return "reason"
261
 
262
- # ====== NEW IMPORTS ======
263
- # Already included at the top.
264
-
265
  # ====== DOCUMENT PROCESSING SETUP ======
266
  def create_vector_store():
267
  """Create vector store with predefined documents using FAISS"""
@@ -270,6 +282,8 @@ def create_vector_store():
270
  Document(page_content="The capital of France is Paris.", metadata={"source": "geography"}),
271
  Document(page_content="Python is a popular programming language created by Guido van Rossum.", metadata={"source": "tech"}),
272
  Document(page_content="The Eiffel Tower is located in Paris, France.", metadata={"source": "landmarks"}),
 
 
273
  ]
274
 
275
  # Initialize embedding model
@@ -302,37 +316,37 @@ def reasoning_node(state: AgentState) -> AgentState:
302
  state.setdefault("current_task", "Understand the question and plan the next step.")
303
  state.setdefault("current_thoughts", "")
304
 
 
 
 
 
 
 
 
305
  state["context"].pop("pending_action", None)
306
 
307
  # --- Initialize local HuggingFacePipeline ---
308
- # Using Mistral-7B-Instruct-v0.2 for better agent performance
309
  model_name = "mistralai/Mistral-7B-Instruct-v0.2"
310
 
311
  print(f"DEBUG: Loading local model: {model_name}...")
312
 
313
  tokenizer = AutoTokenizer.from_pretrained(model_name)
314
 
315
- # Load model with optimal settings for GPU if available, else CPU
316
- # Use bfloat16 for GPUs that support it (NVIDIA Ampere architecture and newer)
317
- # else float16 for older GPUs or float32 for CPU/fallback.
318
- # device_map="auto" intelligently distributes the model across available devices.
319
  model = AutoModelForCausalLM.from_pretrained(
320
  model_name,
321
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
322
  device_map="auto"
323
  )
324
 
325
- # Create a transformers pipeline
326
  pipe = pipeline(
327
  "text-generation",
328
  model=model,
329
  tokenizer=tokenizer,
330
- max_new_tokens=1024, # Increased max_new_tokens for potentially longer JSON
331
- temperature=0.1, # Keep low for factual, tool-use tasks
332
- do_sample=True, # Allow some sampling
333
- top_p=0.9, # Nucleus sampling
334
- repetition_penalty=1.1, # Avoid repetition
335
- # device_map handled by model loading
336
  )
337
 
338
  llm = HuggingFacePipeline(pipeline=pipe)
@@ -343,24 +357,20 @@ def reasoning_node(state: AgentState) -> AgentState:
343
  ])
344
 
345
  # ====== RAG RETRIEVAL ======
346
- # Initialize vector store if not present
347
  if "vector_store" not in state["context"]:
348
  state["context"]["vector_store"] = create_vector_store()
349
 
350
  vector_store = state["context"]["vector_store"]
351
 
352
- # Perform retrieval
353
  relevant_docs = vector_store.similarity_search(
354
  state["question"],
355
- k=3 # Retrieve top 3 most relevant chunks
356
  )
357
 
358
- # Format context for LLM
359
  rag_context = "\n\n[Relevant Knowledge]\n"
360
  rag_context += "\n---\n".join([doc.page_content for doc in relevant_docs])
361
 
362
  # ====== MODIFIED PROMPT ======
363
- # Add RAG context to system prompt
364
  system_prompt = (
365
  "You are an expert problem solver, designed to provide concise and accurate answers. "
366
  "Your process involves analyzing the question, intelligently selecting and using tools, "
@@ -368,15 +378,27 @@ def reasoning_node(state: AgentState) -> AgentState:
368
  "**Available Tools:**\n"
369
  f"{tool_descriptions}\n\n"
370
  "**Tool Usage Guidelines:**\n"
371
- "- Use **duckduckgo_search** for current events, general facts, or quick lookups. Provide a concise search query.\n"
372
- "- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics. Provide a concise search term.\n"
373
- "- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information. Provide a concise search query.\n"
374
- "- Use **document_qa** when the question explicitly refers to a specific document or when you have content to query. Input format: 'document_text||question'.\n"
375
- "- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named '_result_value' (e.g., '_result_value = 1 + 1').\n"
376
- "- Use **transcript_video** for any question involving video or audio content (e.g., YouTube). Provide the full YouTube URL or video ID.\n\n"
377
- "**To provide a final answer, use the Action 'final answer' with the complete answer in 'Action Input'.**\n\n" # Added explicit instruction
 
 
 
 
 
 
 
 
 
 
 
 
378
  "**Retrieved Context:**\n{rag_context}\n\n"
379
- "**Current Context:**\n{context}\n\n"
380
  "**Previous Reasoning Steps:**\n{reasoning}\n\n"
381
  "**Current Task:** {current_task}\n"
382
  "**Current Thoughts:** {current_thoughts}\n\n"
@@ -385,15 +407,15 @@ def reasoning_node(state: AgentState) -> AgentState:
385
  "```json\n"
386
  "{\n"
387
  " \"Reasoning\": \"Your reasoning process to decide the next step, including why a tool is chosen or how an answer is derived.\",\n"
388
- " \"Action\": \"The name of the tool to use (e.g., duckduckgo_search, final answer), or 'No Action' if no tool is needed yet.\",\n"
389
  " \"Action Input\": \"The input for the tool (e.g., 'What is the capital of France?', 'The final answer is Paris.').\"\n"
390
  "}\n"
391
  "```\n"
392
- "Ensure your response is ONLY valid JSON and strictly follows this format."
393
  )
394
 
395
  prompt = ChatPromptTemplate.from_messages([
396
- SystemMessage(content=system_prompt),
397
  *state["history"]
398
  ])
399
 
@@ -406,58 +428,49 @@ def reasoning_node(state: AgentState) -> AgentState:
406
  current_thoughts=state["current_thoughts"]
407
  )
408
 
409
- # Use tokenizer's chat template for optimal formatting with chat models
410
  try:
411
  full_input_string = tokenizer.apply_chat_template(
412
  formatted_messages,
413
  tokenize=False,
414
- add_generation_prompt=True # Adds the assistant's turn start token
415
  )
416
  except Exception as e:
417
  print(f"WARNING: Failed to apply chat template: {e}. Falling back to simple string join. Model performance may be affected.")
418
  full_input_string = "\n".join([msg.content for msg in formatted_messages])
419
 
420
- def call_with_retry_local(inputs, retries=3): # Reduced retries for local models as network isn't primary issue
421
  for attempt in range(retries):
422
  try:
423
  response_text = llm.invoke(inputs)
424
-
425
- # Strip the prompt from the generated text
426
- if response_text.startswith(inputs):
427
- content = response_text[len(inputs):].strip()
428
- else:
429
- content = response_text.strip()
430
 
431
  print(f"DEBUG: RAW LOCAL LLM Response (Attempt {attempt+1}):\n---\n{content}\n---")
432
 
433
  # Attempt to parse to validate structure
434
- json.loads(content)
435
-
 
436
  return AIMessage(content=content)
437
- except json.JSONDecodeError as e:
438
- print(f"[Retry {attempt+1}/{retries}] Local LLM returned invalid JSON. Error: {e}. Retrying...")
439
- print(f"Invalid JSON content (partial): {content[:200]}...")
440
- state["history"].append(AIMessage(content=f"[Parsing Error] The previous LLM output was not valid JSON. Expected format: ```json{{\"Reasoning\": \"...\", \"Action\": \"...\", \"Action Input\": \"...\"}}```. Please ensure your response is ONLY valid JSON and strictly follows the format. Error: {e}"))
441
  time.sleep(5)
442
- except Exception as e:
443
- print(f"[Retry {attempt+1}/{retries}] An unexpected error occurred during local LLM call: {e}.")
444
- state["history"].append(AIMessage(content=f"[Local LLM Error] Failed to get a response from the local LLM: {e}. Trying again."))
445
- time.sleep(10)
446
-
447
  raise RuntimeError("Failed after multiple retries due to local Hugging Face model issues or invalid JSON.")
448
 
449
  response = call_with_retry_local(full_input_string)
450
 
451
  content = response.content
452
- reasoning, action, action_input = parse_agent_response(content)
453
 
454
  print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
455
 
456
- if isinstance(response, AIMessage) and content == response.content:
 
457
  state["history"].append(AIMessage(content=content))
458
 
459
- state["reasoning"] += f"\nStep {state['iterations'] + 1}: {reasoning}"
460
- state["iterations"] += 1
461
  state["current_thoughts"] = reasoning
462
 
463
  # --- FIX: Set final_answer directly if the action is "final answer" ---
@@ -469,7 +482,18 @@ def reasoning_node(state: AgentState) -> AgentState:
469
  "tool": action,
470
  "input": action_input
471
  }
472
- state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
 
 
 
 
 
 
 
 
 
 
 
473
 
474
  print(f"DEBUG: Exiting reasoning_node. New history length: {len(state['history'])}")
475
  return state
@@ -483,9 +507,14 @@ def tool_node(state: AgentState) -> AgentState:
483
  tool_call_dict = state["context"].pop("pending_action", None)
484
 
485
  if not tool_call_dict:
486
- error_message = "[Tool Error] No pending_action found in context. This indicates an issue with graph flow."
487
  print(f"ERROR: {error_message}")
488
  state["history"].append(AIMessage(content=error_message))
 
 
 
 
 
489
  return state
490
 
491
  tool_name = tool_call_dict.get("tool")
@@ -501,20 +530,25 @@ def tool_node(state: AgentState) -> AgentState:
501
  available_tools = state.get("tools", [])
502
  tool_fn = next((t for t in available_tools if t.name == tool_name), None)
503
 
 
 
504
  if tool_fn is None:
505
  tool_output = f"[Tool Error] Tool '{tool_name}' not found or not available. Please choose from: {', '.join([t.name for t in available_tools])}"
506
  print(f"ERROR: {tool_output}")
507
  else:
508
  try:
509
  print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
510
- tool_output = tool_fn.run(tool_input)
511
- if not tool_output and tool_output is not False:
512
  tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
 
 
513
  except Exception as e:
514
  tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
515
  print(f"ERROR: {tool_output}")
516
 
517
- state["history"].append(AIMessage(content=f"[{tool_name} output]\n{tool_output}"))
 
518
 
519
  print(f"DEBUG: Exiting tool_node. Tool output added to history. New history length: {len(state['history'])}")
520
  return state
@@ -546,13 +580,13 @@ def create_agent_workflow(tools: List[BaseTool]): # Use BaseTool for consistency
546
  # ====== Agent Interface ======
547
  class BasicAgent:
548
  def __init__(self):
549
- # Instantiate tools
550
  self.tools = [
551
- duckduckgo_search,
552
- wikipedia_search,
553
- arxiv_search,
554
- document_qa,
555
- python_execution,
556
  VideoTranscriptionTool()
557
  ]
558
 
@@ -566,10 +600,10 @@ class BasicAgent:
566
  state = {
567
  "question": question,
568
  "context": {
569
- "vector_store": self.vector_store # Include vector store in context
570
  },
571
  "reasoning": "",
572
- "iterations": 0,
573
  "history": [HumanMessage(content=question)],
574
  "final_answer": None,
575
  "current_task": "Understand the question and plan the next step.",
@@ -577,14 +611,8 @@ class BasicAgent:
577
  "tools": self.tools
578
  }
579
 
580
- # The invoke method returns an iterator, so we need to consume it to get the final state
581
- # LangGraph's invoke will run until the graph reaches an END node.
582
  try:
583
- # Running the graph
584
- # Note: For simple single-path graphs, `invoke` often gives the final state directly.
585
- # For more complex graphs, streaming with `stream` and then getting the final state
586
- # might be more appropriate if you need intermediate steps.
587
- final_state = self.workflow.invoke(state)
588
 
589
  if final_state.get("final_answer") is not None:
590
  answer = final_state["final_answer"]
@@ -597,15 +625,13 @@ class BasicAgent:
597
  print(f"Last message in history: {last_message}")
598
  return f"Agent could not fully answer. Last message: {last_message}"
599
  else:
600
- raise ValueError("Agent finished without providing a final answer and no history messages.")
601
  except Exception as e:
602
  print(f"--- FATAL ERROR during agent execution: {e} ---")
603
- # In case of an unexpected error, return a helpful message
604
  return f"An unexpected error occurred during agent execution: {str(e)}"
605
 
606
 
607
 
608
-
609
  def run_and_submit_all( profile: gr.OAuthProfile | None):
610
  """
611
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ import time
7
+ from typing import List, Union, Dict, Any, TypedDict # Ensure all types are imported
8
+
9
+ import torch
10
+ from langchain_core.messages import AIMessage, HumanMessage # Corrected import for message types
11
+ from langchain_core.tools import BaseTool
12
+ from langchain_community.embeddings import HuggingFaceEmbeddings
13
+ from langchain_community.vectorstores import FAISS
14
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
15
+ from langchain_core.documents import Document
16
+ # No longer needed: from langchain.chains.Youtubeing import load_qa_chain (as it's unused)
17
+ from langchain_community.llms import HuggingFacePipeline
18
+ from langchain.prompts import ChatPromptTemplate # SystemMessage moved to langchain_core.messages
19
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
20
+ from langgraph.graph import END, StateGraph
21
+
22
+ # --- Import for actual YouTube transcription (if you make the tool functional) ---
23
+ # from youtube_transcript_api import YouTubeTranscriptApi
24
 
25
 
26
  # (Keep Constants as is)
 
56
 
57
  from typing import List, Literal, TypedDict
58
 
59
+
 
 
 
 
 
 
 
 
 
 
60
 
61
  # --- Helper function for python_execution tool ---
62
  def indent_code(code: str, indent: str = " ") -> str:
 
174
  raise NotImplementedError("Async not supported for this tool.")
175
 
176
  # --- Agent State Definition ---
177
+ # --- Agent State ---
178
  class AgentState(TypedDict):
179
  question: str
180
  history: List[Union[HumanMessage, AIMessage]]
 
194
  If JSON parsing fails, it attempts heuristic parsing.
195
  """
196
  try:
197
+ # Attempt to find the first valid JSON block
198
+ # This is robust to surrounding text that some LLMs might generate
199
+ json_start = response_content.find('{')
200
+ json_end = response_content.rfind('}')
201
+ if json_start != -1 and json_end != -1 and json_end > json_start:
202
+ json_str = response_content[json_start : json_end + 1]
203
+ response_json = json.loads(json_str)
204
+ reasoning = response_json.get("Reasoning", "").strip()
205
+ action = response_json.get("Action", "").strip()
206
+ action_input = response_json.get("Action Input", "").strip()
207
+ return reasoning, action, action_input
208
+ else:
209
+ raise json.JSONDecodeError("No valid JSON object found within the response.", response_content, 0)
210
  except json.JSONDecodeError:
211
  print(f"WARNING: JSONDecodeError: LLM response was not valid JSON. Attempting heuristic parse: {response_content[:200]}...")
212
  # Heuristic parsing for non-JSON or partial JSON responses
 
251
  def should_continue(state: AgentState) -> str:
252
  """
253
  Determines if the agent should continue reasoning, use a tool, or end.
254
+ Includes a maximum iteration limit to prevent infinite loops.
255
  """
256
+ MAX_ITERATIONS = 8 # Set a sensible limit to prevent infinite loops
257
+ print(f"DEBUG: Entering should_continue. Iteration: {state['iterations']}. Current context: {state.get('context', {})}")
258
 
259
  if state.get("final_answer") is not None:
260
  print("DEBUG: should_continue -> END (Final Answer set in state)")
261
  return "end"
262
 
263
+ if state["iterations"] >= MAX_ITERATIONS:
264
+ print(f"DEBUG: should_continue -> END (Max iterations {MAX_ITERATIONS} reached)")
265
+ # Optionally, set a final answer here indicating failure or current progress
266
+ if not state.get("final_answer"):
267
+ state["final_answer"] = "Agent terminated due to maximum iteration limit without finding a conclusive answer."
268
+ return "end"
269
+
270
  if state.get("context", {}).get("pending_action"):
271
  print("DEBUG: should_continue -> ACTION (Pending action in context)")
272
  return "action"
 
274
  print("DEBUG: should_continue -> REASON (Default to reasoning)")
275
  return "reason"
276
 
 
 
 
277
  # ====== DOCUMENT PROCESSING SETUP ======
278
  def create_vector_store():
279
  """Create vector store with predefined documents using FAISS"""
 
282
  Document(page_content="The capital of France is Paris.", metadata={"source": "geography"}),
283
  Document(page_content="Python is a popular programming language created by Guido van Rossum.", metadata={"source": "tech"}),
284
  Document(page_content="The Eiffel Tower is located in Paris, France.", metadata={"source": "landmarks"}),
285
+ Document(page_content="The highest mountain in New Zealand is Aoraki/Mount Cook.", metadata={"source": "geography"}),
286
+ Document(page_content="Wellington is the capital city of New Zealand.", metadata={"source": "geography"}),
287
  ]
288
 
289
  # Initialize embedding model
 
316
  state.setdefault("current_task", "Understand the question and plan the next step.")
317
  state.setdefault("current_thoughts", "")
318
 
319
+ # Increment iterations here to track them for the current step
320
+ state["iterations"] += 1
321
+ if state["iterations"] > should_continue.__defaults__[0]: # Accessing MAX_ITERATIONS from should_continue
322
+ print(f"DEBUG: Max iterations reached in reasoning_node. Exiting gracefully.")
323
+ state["final_answer"] = "Agent halted due to exceeding maximum allowed reasoning iterations."
324
+ return state
325
+
326
  state["context"].pop("pending_action", None)
327
 
328
  # --- Initialize local HuggingFacePipeline ---
 
329
  model_name = "mistralai/Mistral-7B-Instruct-v0.2"
330
 
331
  print(f"DEBUG: Loading local model: {model_name}...")
332
 
333
  tokenizer = AutoTokenizer.from_pretrained(model_name)
334
 
 
 
 
 
335
  model = AutoModelForCausalLM.from_pretrained(
336
  model_name,
337
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
338
  device_map="auto"
339
  )
340
 
 
341
  pipe = pipeline(
342
  "text-generation",
343
  model=model,
344
  tokenizer=tokenizer,
345
+ max_new_tokens=1024,
346
+ temperature=0.1,
347
+ do_sample=True,
348
+ top_p=0.9,
349
+ repetition_penalty=1.1,
 
350
  )
351
 
352
  llm = HuggingFacePipeline(pipeline=pipe)
 
357
  ])
358
 
359
  # ====== RAG RETRIEVAL ======
 
360
  if "vector_store" not in state["context"]:
361
  state["context"]["vector_store"] = create_vector_store()
362
 
363
  vector_store = state["context"]["vector_store"]
364
 
 
365
  relevant_docs = vector_store.similarity_search(
366
  state["question"],
367
+ k=3
368
  )
369
 
 
370
  rag_context = "\n\n[Relevant Knowledge]\n"
371
  rag_context += "\n---\n".join([doc.page_content for doc in relevant_docs])
372
 
373
  # ====== MODIFIED PROMPT ======
 
374
  system_prompt = (
375
  "You are an expert problem solver, designed to provide concise and accurate answers. "
376
  "Your process involves analyzing the question, intelligently selecting and using tools, "
 
378
  "**Available Tools:**\n"
379
  f"{tool_descriptions}\n\n"
380
  "**Tool Usage Guidelines:**\n"
381
+ "- Use **duckduckgo_search** for current events, general facts, or quick lookups. Provide a concise search query. Example: `What is the population of New York?`\n"
382
+ "- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics. Provide a concise search term. Example: `Eiffel Tower history`\n"
383
+ "- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information. Provide a concise search query. Example: `Large Language Models recent advances`\n"
384
+ "- Use **document_qa** when the question explicitly refers to a specific document or when you have content to query. Input format: 'document_text||question'. Example: `The capital of France is Paris.||What is the capital of France?`\n"
385
+ "- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named '_result_value'. Example: `_result_value = 1 + 1`\n"
386
+ "- Use **transcript_video** for any question involving video or audio content (e.g., YouTube). Provide the full YouTube URL or video ID. Example: `youtube.com`\n\n"
387
+ "**Crucial Instructions:**\n"
388
+ "1. **Always aim to provide a definitive answer.** If you have enough information, use the 'final answer' action.\n"
389
+ "2. **To provide a final answer, use the Action 'final answer' with the complete answer in 'Action Input'.** This is how you tell me you're done. Example:\n"
390
+ " ```json\n"
391
+ " {\n"
392
+ " \"Reasoning\": \"I have found the capital of France.\",\n"
393
+ " \"Action\": \"final answer\",\n"
394
+ " \"Action Input\": \"The capital of France is Paris.\"\n"
395
+ " }\n"
396
+ " ```\n"
397
+ "3. **If you need more information or cannot answer yet, select an appropriate tool and provide a clear, concise query.**\n"
398
+ "4. **Think step-by-step.** Reflect on previous tool outputs and the question.\n"
399
+ "5. **Do NOT repeat actions or search queries unless the previous attempt yielded an error.**\n\n"
400
  "**Retrieved Context:**\n{rag_context}\n\n"
401
+ "**Current Context (Tool Outputs/Intermediate Info):**\n{context}\n\n"
402
  "**Previous Reasoning Steps:**\n{reasoning}\n\n"
403
  "**Current Task:** {current_task}\n"
404
  "**Current Thoughts:** {current_thoughts}\n\n"
 
407
  "```json\n"
408
  "{\n"
409
  " \"Reasoning\": \"Your reasoning process to decide the next step, including why a tool is chosen or how an answer is derived.\",\n"
410
+ " \"Action\": \"The name of the tool to use (e.g., duckduckgo_search, final answer, No Action), if no tool is needed yet, use 'No Action'.\",\n"
411
  " \"Action Input\": \"The input for the tool (e.g., 'What is the capital of France?', 'The final answer is Paris.').\"\n"
412
  "}\n"
413
  "```\n"
414
+ "Ensure your response is ONLY valid JSON and strictly follows this format. Begin your response with ````json`."
415
  )
416
 
417
  prompt = ChatPromptTemplate.from_messages([
418
+ SystemMessage(content=system_prompt), # SystemMessage is imported from langchain_core.messages
419
  *state["history"]
420
  ])
421
 
 
428
  current_thoughts=state["current_thoughts"]
429
  )
430
 
 
431
  try:
432
  full_input_string = tokenizer.apply_chat_template(
433
  formatted_messages,
434
  tokenize=False,
435
+ add_generation_prompt=True
436
  )
437
  except Exception as e:
438
  print(f"WARNING: Failed to apply chat template: {e}. Falling back to simple string join. Model performance may be affected.")
439
  full_input_string = "\n".join([msg.content for msg in formatted_messages])
440
 
441
+ def call_with_retry_local(inputs, retries=3):
442
  for attempt in range(retries):
443
  try:
444
  response_text = llm.invoke(inputs)
445
+ # Ensure the LLM response is processed correctly, removing the input prompt
446
+ content = response_text.replace(inputs, "").strip() # More robust stripping
 
 
 
 
447
 
448
  print(f"DEBUG: RAW LOCAL LLM Response (Attempt {attempt+1}):\n---\n{content}\n---")
449
 
450
  # Attempt to parse to validate structure
451
+ # The parse_agent_response handles JSONDecodeError, so just call it
452
+ reasoning, action, action_input = parse_agent_response(content)
453
+ # If parsing succeeded, return AIMessage
454
  return AIMessage(content=content)
455
+ except Exception as e: # Catch any exception, including json.JSONDecodeError from parse_agent_response
456
+ print(f"[Retry {attempt+1}/{retries}] Local LLM returned invalid content or an error. Error: {e}. Retrying...")
457
+ print(f"Invalid content (partial): {content[:200]}...")
458
+ state["history"].append(AIMessage(content=f"[Parsing Error] The previous LLM output was not valid. Expected format: ```json{{\"Reasoning\": \"...\", \"Action\": \"...\", \"Action Input\": \"...\"}}```. Please ensure your response is ONLY valid JSON and strictly follows the format. Error: {e}"))
459
  time.sleep(5)
 
 
 
 
 
460
  raise RuntimeError("Failed after multiple retries due to local Hugging Face model issues or invalid JSON.")
461
 
462
  response = call_with_retry_local(full_input_string)
463
 
464
  content = response.content
465
+ reasoning, action, action_input = parse_agent_response(content) # Use the improved parser
466
 
467
  print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
468
 
469
+ # Only append the LLM's raw response if it's not a retry message
470
+ if not content.startswith("[Parsing Error]") and not content.startswith("[Local LLM Error]"):
471
  state["history"].append(AIMessage(content=content))
472
 
473
+ state["reasoning"] += f"\nStep {state['iterations']}: {reasoning}" # Use iteration number for clarity
 
474
  state["current_thoughts"] = reasoning
475
 
476
  # --- FIX: Set final_answer directly if the action is "final answer" ---
 
482
  "tool": action,
483
  "input": action_input
484
  }
485
+ # Only append tool decision message if it's a valid action, not if LLM failed to decide
486
+ if action and action != "No Action":
487
+ state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
488
+ elif action == "No Action":
489
+ state["history"].append(AIMessage(content=f"Agent decided to take 'No Action' but needs to proceed.")) # Indicate no action taken for visibility
490
+ # If "No Action" is taken, but no final answer, it indicates a potential stuck state
491
+ # We might want to force a re-reason or provide a default answer based on current context
492
+ if not state.get("final_answer"):
493
+ state["current_task"] = "Re-evaluate the situation and attempt to find a final answer or a new tool."
494
+ state["current_thoughts"] = "The previous step resulted in 'No Action'. I need to find a way forward."
495
+ # This might lead to another reasoning cycle, which is covered by MAX_ITERATIONS
496
+ state["context"].pop("pending_action", None) # Clear pending action if it was "No Action"
497
 
498
  print(f"DEBUG: Exiting reasoning_node. New history length: {len(state['history'])}")
499
  return state
 
507
  tool_call_dict = state["context"].pop("pending_action", None)
508
 
509
  if not tool_call_dict:
510
+ error_message = "[Tool Error] No pending_action found in context. This indicates an issue with graph flow or a previous error."
511
  print(f"ERROR: {error_message}")
512
  state["history"].append(AIMessage(content=error_message))
513
+ # If no pending action, and we just came from reasoning, it means LLM failed to set one.
514
+ # Force it back to reasoning, but prevent infinite loops.
515
+ # This will be caught by MAX_ITERATIONS in should_continue.
516
+ state["current_task"] = "Re-evaluate the situation; previous tool selection failed or was missing."
517
+ state["current_thoughts"] = "No tool action was found. I need to re-think my next step."
518
  return state
519
 
520
  tool_name = tool_call_dict.get("tool")
 
530
  available_tools = state.get("tools", [])
531
  tool_fn = next((t for t in available_tools if t.name == tool_name), None)
532
 
533
+ tool_output = "" # Initialize tool_output
534
+
535
  if tool_fn is None:
536
  tool_output = f"[Tool Error] Tool '{tool_name}' not found or not available. Please choose from: {', '.join([t.name for t in available_tools])}"
537
  print(f"ERROR: {tool_output}")
538
  else:
539
  try:
540
  print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
541
+ raw_tool_output = tool_fn.run(tool_input)
542
+ if raw_tool_output is None or raw_tool_output is False or raw_tool_output == "":
543
  tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
544
+ else:
545
+ tool_output = f"[{tool_name} output]\n{raw_tool_output}"
546
  except Exception as e:
547
  tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
548
  print(f"ERROR: {tool_output}")
549
 
550
+ # Append tool output to history for LLM to see in next reasoning step
551
+ state["history"].append(AIMessage(content=tool_output))
552
 
553
  print(f"DEBUG: Exiting tool_node. Tool output added to history. New history length: {len(state['history'])}")
554
  return state
 
580
  # ====== Agent Interface ======
581
  class BasicAgent:
582
  def __init__(self):
583
+ # Instantiate tools - using the specific BaseTool subclasses now
584
  self.tools = [
585
+ DuckDuckGoSearchTool(),
586
+ WikipediaSearchTool(),
587
+ ArxivSearchTool(),
588
+ DocumentQATool(),
589
+ PythonExecutionTool(),
590
  VideoTranscriptionTool()
591
  ]
592
 
 
600
  state = {
601
  "question": question,
602
  "context": {
603
+ "vector_store": self.vector_store
604
  },
605
  "reasoning": "",
606
+ "iterations": 0, # Initialize iterations to 0
607
  "history": [HumanMessage(content=question)],
608
  "final_answer": None,
609
  "current_task": "Understand the question and plan the next step.",
 
611
  "tools": self.tools
612
  }
613
 
 
 
614
  try:
615
+ final_state = self.workflow.invoke(state, {"recursion_limit": 20}) # Increased recursion limit for LangGraph
 
 
 
 
616
 
617
  if final_state.get("final_answer") is not None:
618
  answer = final_state["final_answer"]
 
625
  print(f"Last message in history: {last_message}")
626
  return f"Agent could not fully answer. Last message: {last_message}"
627
  else:
628
+ return "Agent finished without providing a final answer and no history messages."
629
  except Exception as e:
630
  print(f"--- FATAL ERROR during agent execution: {e} ---")
 
631
  return f"An unexpected error occurred during agent execution: {str(e)}"
632
 
633
 
634
 
 
635
  def run_and_submit_all( profile: gr.OAuthProfile | None):
636
  """
637
  Fetches all questions, runs the BasicAgent on them, submits all answers,