Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on Jun 7

Commit

fdddb14

verified ·

1 Parent(s): 98543af

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -106

app.py CHANGED Viewed

@@ -3,17 +3,24 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
-#from smolagents import tool, Tool, CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpageTool, SpeechToTextTool, FinalAnswerTool
-from dotenv import load_dotenv
-import heapq
-from collections import Counter
-import re
-from io import BytesIO
-from youtube_transcript_api import YouTubeTranscriptApi
-from langchain_community.tools.tavily_search import TavilySearchResults
-from langchain_community.document_loaders import WikipediaLoader
-from langchain_community.utilities import WikipediaAPIWrapper
-from langchain_community.document_loaders import ArxivLoader
 # (Keep Constants as is)
@@ -49,17 +56,7 @@ from youtube_transcript_api import YouTubeTranscriptApi
 from typing import List, Literal, TypedDict
-import torch
-from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
-from langchain_core.tools import BaseTool
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_core.documents import Document
-from langchain_community.llms import HuggingFacePipeline
-from langchain.prompts import ChatPromptTemplate
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-from langgraph.graph import END, StateGraph
 # --- Helper function for python_execution tool ---
 def indent_code(code: str, indent: str = "    ") -> str:
@@ -177,6 +174,7 @@ class VideoTranscriptionTool(BaseTool):
         raise NotImplementedError("Async not supported for this tool.")
 # --- Agent State Definition ---
 class AgentState(TypedDict):
     question: str
     history: List[Union[HumanMessage, AIMessage]]
@@ -196,11 +194,19 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
     If JSON parsing fails, it attempts heuristic parsing.
     """
     try:
-        response_json = json.loads(response_content)
-        reasoning = response_json.get("Reasoning", "").strip()
-        action = response_json.get("Action", "").strip()
-        action_input = response_json.get("Action Input", "").strip()
-        return reasoning, action, action_input
     except json.JSONDecodeError:
         print(f"WARNING: JSONDecodeError: LLM response was not valid JSON. Attempting heuristic parse: {response_content[:200]}...")
         # Heuristic parsing for non-JSON or partial JSON responses
@@ -245,13 +251,22 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
 def should_continue(state: AgentState) -> str:
     """
     Determines if the agent should continue reasoning, use a tool, or end.
     """
-    print(f"DEBUG: Entering should_continue. Current context: {state.get('context', {})}")
     if state.get("final_answer") is not None:
         print("DEBUG: should_continue -> END (Final Answer set in state)")
         return "end"
     if state.get("context", {}).get("pending_action"):
         print("DEBUG: should_continue -> ACTION (Pending action in context)")
         return "action"
@@ -259,9 +274,6 @@ def should_continue(state: AgentState) -> str:
     print("DEBUG: should_continue -> REASON (Default to reasoning)")
     return "reason"
-# ====== NEW IMPORTS ======
-# Already included at the top.
 # ====== DOCUMENT PROCESSING SETUP ======
 def create_vector_store():
     """Create vector store with predefined documents using FAISS"""
@@ -270,6 +282,8 @@ def create_vector_store():
         Document(page_content="The capital of France is Paris.", metadata={"source": "geography"}),
         Document(page_content="Python is a popular programming language created by Guido van Rossum.", metadata={"source": "tech"}),
         Document(page_content="The Eiffel Tower is located in Paris, France.", metadata={"source": "landmarks"}),
     ]
     # Initialize embedding model
@@ -302,37 +316,37 @@ def reasoning_node(state: AgentState) -> AgentState:
     state.setdefault("current_task", "Understand the question and plan the next step.")
     state.setdefault("current_thoughts", "")
     state["context"].pop("pending_action", None)
     # --- Initialize local HuggingFacePipeline ---
-    # Using Mistral-7B-Instruct-v0.2 for better agent performance
     model_name = "mistralai/Mistral-7B-Instruct-v0.2"
     print(f"DEBUG: Loading local model: {model_name}...")
     tokenizer = AutoTokenizer.from_pretrained(model_name)
-    # Load model with optimal settings for GPU if available, else CPU
-    # Use bfloat16 for GPUs that support it (NVIDIA Ampere architecture and newer)
-    # else float16 for older GPUs or float32 for CPU/fallback.
-    # device_map="auto" intelligently distributes the model across available devices.
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
         device_map="auto"
     )
-    # Create a transformers pipeline
     pipe = pipeline(
         "text-generation",
         model=model,
         tokenizer=tokenizer,
-        max_new_tokens=1024, # Increased max_new_tokens for potentially longer JSON
-        temperature=0.1,    # Keep low for factual, tool-use tasks
-        do_sample=True,     # Allow some sampling
-        top_p=0.9,          # Nucleus sampling
-        repetition_penalty=1.1, # Avoid repetition
-        # device_map handled by model loading
     )
     llm = HuggingFacePipeline(pipeline=pipe)
@@ -343,24 +357,20 @@ def reasoning_node(state: AgentState) -> AgentState:
     ])
     # ====== RAG RETRIEVAL ======
-    # Initialize vector store if not present
     if "vector_store" not in state["context"]:
         state["context"]["vector_store"] = create_vector_store()
     vector_store = state["context"]["vector_store"]
-    # Perform retrieval
     relevant_docs = vector_store.similarity_search(
         state["question"],
-        k=3  # Retrieve top 3 most relevant chunks
     )
-    # Format context for LLM
     rag_context = "\n\n[Relevant Knowledge]\n"
     rag_context += "\n---\n".join([doc.page_content for doc in relevant_docs])
     # ====== MODIFIED PROMPT ======
-    # Add RAG context to system prompt
     system_prompt = (
         "You are an expert problem solver, designed to provide concise and accurate answers. "
         "Your process involves analyzing the question, intelligently selecting and using tools, "
@@ -368,15 +378,27 @@ def reasoning_node(state: AgentState) -> AgentState:
         "**Available Tools:**\n"
         f"{tool_descriptions}\n\n"
         "**Tool Usage Guidelines:**\n"
-        "- Use **duckduckgo_search** for current events, general facts, or quick lookups. Provide a concise search query.\n"
-        "- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics. Provide a concise search term.\n"
-        "- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information. Provide a concise search query.\n"
-        "- Use **document_qa** when the question explicitly refers to a specific document or when you have content to query. Input format: 'document_text||question'.\n"
-        "- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named '_result_value' (e.g., '_result_value = 1 + 1').\n"
-        "- Use **transcript_video** for any question involving video or audio content (e.g., YouTube). Provide the full YouTube URL or video ID.\n\n"
-        "**To provide a final answer, use the Action 'final answer' with the complete answer in 'Action Input'.**\n\n" # Added explicit instruction
         "**Retrieved Context:**\n{rag_context}\n\n"
-        "**Current Context:**\n{context}\n\n"
         "**Previous Reasoning Steps:**\n{reasoning}\n\n"
         "**Current Task:** {current_task}\n"
         "**Current Thoughts:** {current_thoughts}\n\n"
@@ -385,15 +407,15 @@ def reasoning_node(state: AgentState) -> AgentState:
         "```json\n"
         "{\n"
         "  \"Reasoning\": \"Your reasoning process to decide the next step, including why a tool is chosen or how an answer is derived.\",\n"
-        "  \"Action\": \"The name of the tool to use (e.g., duckduckgo_search, final answer), or 'No Action' if no tool is needed yet.\",\n"
         "  \"Action Input\": \"The input for the tool (e.g., 'What is the capital of France?', 'The final answer is Paris.').\"\n"
         "}\n"
         "```\n"
-        "Ensure your response is ONLY valid JSON and strictly follows this format."
     )
     prompt = ChatPromptTemplate.from_messages([
-        SystemMessage(content=system_prompt),
         *state["history"]
     ])
@@ -406,58 +428,49 @@ def reasoning_node(state: AgentState) -> AgentState:
         current_thoughts=state["current_thoughts"]
     )
-    # Use tokenizer's chat template for optimal formatting with chat models
     try:
         full_input_string = tokenizer.apply_chat_template(
             formatted_messages,
             tokenize=False,
-            add_generation_prompt=True # Adds the assistant's turn start token
         )
     except Exception as e:
         print(f"WARNING: Failed to apply chat template: {e}. Falling back to simple string join. Model performance may be affected.")
         full_input_string = "\n".join([msg.content for msg in formatted_messages])
-    def call_with_retry_local(inputs, retries=3): # Reduced retries for local models as network isn't primary issue
         for attempt in range(retries):
             try:
                 response_text = llm.invoke(inputs)
-                # Strip the prompt from the generated text
-                if response_text.startswith(inputs):
-                    content = response_text[len(inputs):].strip()
-                else:
-                    content = response_text.strip()
                 print(f"DEBUG: RAW LOCAL LLM Response (Attempt {attempt+1}):\n---\n{content}\n---")
                 # Attempt to parse to validate structure
-                json.loads(content)
                 return AIMessage(content=content)
-            except json.JSONDecodeError as e:
-                print(f"[Retry {attempt+1}/{retries}] Local LLM returned invalid JSON. Error: {e}. Retrying...")
-                print(f"Invalid JSON content (partial): {content[:200]}...")
-                state["history"].append(AIMessage(content=f"[Parsing Error] The previous LLM output was not valid JSON. Expected format: ```json{{\"Reasoning\": \"...\", \"Action\": \"...\", \"Action Input\": \"...\"}}```. Please ensure your response is ONLY valid JSON and strictly follows the format. Error: {e}"))
                 time.sleep(5)
-            except Exception as e:
-                print(f"[Retry {attempt+1}/{retries}] An unexpected error occurred during local LLM call: {e}.")
-                state["history"].append(AIMessage(content=f"[Local LLM Error] Failed to get a response from the local LLM: {e}. Trying again."))
-                time.sleep(10)
         raise RuntimeError("Failed after multiple retries due to local Hugging Face model issues or invalid JSON.")
     response = call_with_retry_local(full_input_string)
     content = response.content
-    reasoning, action, action_input = parse_agent_response(content)
     print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
-    if isinstance(response, AIMessage) and content == response.content:
         state["history"].append(AIMessage(content=content))
-    state["reasoning"] += f"\nStep {state['iterations'] + 1}: {reasoning}"
-    state["iterations"] += 1
     state["current_thoughts"] = reasoning
     # --- FIX: Set final_answer directly if the action is "final answer" ---
@@ -469,7 +482,18 @@ def reasoning_node(state: AgentState) -> AgentState:
             "tool": action,
             "input": action_input
         }
-        state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
     print(f"DEBUG: Exiting reasoning_node. New history length: {len(state['history'])}")
     return state
@@ -483,9 +507,14 @@ def tool_node(state: AgentState) -> AgentState:
     tool_call_dict = state["context"].pop("pending_action", None)
     if not tool_call_dict:
-        error_message = "[Tool Error] No pending_action found in context. This indicates an issue with graph flow."
         print(f"ERROR: {error_message}")
         state["history"].append(AIMessage(content=error_message))
         return state
     tool_name = tool_call_dict.get("tool")
@@ -501,20 +530,25 @@ def tool_node(state: AgentState) -> AgentState:
     available_tools = state.get("tools", [])
     tool_fn = next((t for t in available_tools if t.name == tool_name), None)
     if tool_fn is None:
         tool_output = f"[Tool Error] Tool '{tool_name}' not found or not available. Please choose from: {', '.join([t.name for t in available_tools])}"
         print(f"ERROR: {tool_output}")
     else:
         try:
             print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
-            tool_output = tool_fn.run(tool_input)
-            if not tool_output and tool_output is not False:
                 tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
         except Exception as e:
             tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
             print(f"ERROR: {tool_output}")
-    state["history"].append(AIMessage(content=f"[{tool_name} output]\n{tool_output}"))
     print(f"DEBUG: Exiting tool_node. Tool output added to history. New history length: {len(state['history'])}")
     return state
@@ -546,13 +580,13 @@ def create_agent_workflow(tools: List[BaseTool]): # Use BaseTool for consistency
 # ====== Agent Interface ======
 class BasicAgent:
     def __init__(self):
-        # Instantiate tools
         self.tools = [
-            duckduckgo_search,
-            wikipedia_search,
-            arxiv_search,
-            document_qa,
-            python_execution,
             VideoTranscriptionTool()
         ]
@@ -566,10 +600,10 @@ class BasicAgent:
         state = {
             "question": question,
             "context": {
-                "vector_store": self.vector_store  # Include vector store in context
             },
             "reasoning": "",
-            "iterations": 0,
             "history": [HumanMessage(content=question)],
             "final_answer": None,
             "current_task": "Understand the question and plan the next step.",
@@ -577,14 +611,8 @@ class BasicAgent:
             "tools": self.tools
         }
-        # The invoke method returns an iterator, so we need to consume it to get the final state
-        # LangGraph's invoke will run until the graph reaches an END node.
         try:
-            # Running the graph
-            # Note: For simple single-path graphs, `invoke` often gives the final state directly.
-            # For more complex graphs, streaming with `stream` and then getting the final state
-            # might be more appropriate if you need intermediate steps.
-            final_state = self.workflow.invoke(state)
             if final_state.get("final_answer") is not None:
                 answer = final_state["final_answer"]
@@ -597,15 +625,13 @@ class BasicAgent:
                     print(f"Last message in history: {last_message}")
                     return f"Agent could not fully answer. Last message: {last_message}"
                 else:
-                    raise ValueError("Agent finished without providing a final answer and no history messages.")
         except Exception as e:
             print(f"--- FATAL ERROR during agent execution: {e} ---")
-            # In case of an unexpected error, return a helpful message
             return f"An unexpected error occurred during agent execution: {str(e)}"
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,

 import requests
 import inspect
 import pandas as pd
+import time
+from typing import List, Union, Dict, Any, TypedDict # Ensure all types are imported
+import torch
+from langchain_core.messages import AIMessage, HumanMessage # Corrected import for message types
+from langchain_core.tools import BaseTool
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_core.documents import Document
+# No longer needed: from langchain.chains.Youtubeing import load_qa_chain (as it's unused)
+from langchain_community.llms import HuggingFacePipeline
+from langchain.prompts import ChatPromptTemplate # SystemMessage moved to langchain_core.messages
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from langgraph.graph import END, StateGraph
+# --- Import for actual YouTube transcription (if you make the tool functional) ---
+# from youtube_transcript_api import YouTubeTranscriptApi
 # (Keep Constants as is)
 from typing import List, Literal, TypedDict
 # --- Helper function for python_execution tool ---
 def indent_code(code: str, indent: str = "    ") -> str:
         raise NotImplementedError("Async not supported for this tool.")
 # --- Agent State Definition ---
+# --- Agent State ---
 class AgentState(TypedDict):
     question: str
     history: List[Union[HumanMessage, AIMessage]]
     If JSON parsing fails, it attempts heuristic parsing.
     """
     try:
+        # Attempt to find the first valid JSON block
+        # This is robust to surrounding text that some LLMs might generate
+        json_start = response_content.find('{')
+        json_end = response_content.rfind('}')
+        if json_start != -1 and json_end != -1 and json_end > json_start:
+            json_str = response_content[json_start : json_end + 1]
+            response_json = json.loads(json_str)
+            reasoning = response_json.get("Reasoning", "").strip()
+            action = response_json.get("Action", "").strip()
+            action_input = response_json.get("Action Input", "").strip()
+            return reasoning, action, action_input
+        else:
+            raise json.JSONDecodeError("No valid JSON object found within the response.", response_content, 0)
     except json.JSONDecodeError:
         print(f"WARNING: JSONDecodeError: LLM response was not valid JSON. Attempting heuristic parse: {response_content[:200]}...")
         # Heuristic parsing for non-JSON or partial JSON responses
 def should_continue(state: AgentState) -> str:
     """
     Determines if the agent should continue reasoning, use a tool, or end.
+    Includes a maximum iteration limit to prevent infinite loops.
     """
+    MAX_ITERATIONS = 8 # Set a sensible limit to prevent infinite loops
+    print(f"DEBUG: Entering should_continue. Iteration: {state['iterations']}. Current context: {state.get('context', {})}")
     if state.get("final_answer") is not None:
         print("DEBUG: should_continue -> END (Final Answer set in state)")
         return "end"
+    if state["iterations"] >= MAX_ITERATIONS:
+        print(f"DEBUG: should_continue -> END (Max iterations {MAX_ITERATIONS} reached)")
+        # Optionally, set a final answer here indicating failure or current progress
+        if not state.get("final_answer"):
+            state["final_answer"] = "Agent terminated due to maximum iteration limit without finding a conclusive answer."
+        return "end"
     if state.get("context", {}).get("pending_action"):
         print("DEBUG: should_continue -> ACTION (Pending action in context)")
         return "action"
     print("DEBUG: should_continue -> REASON (Default to reasoning)")
     return "reason"
 # ====== DOCUMENT PROCESSING SETUP ======
 def create_vector_store():
     """Create vector store with predefined documents using FAISS"""
         Document(page_content="The capital of France is Paris.", metadata={"source": "geography"}),
         Document(page_content="Python is a popular programming language created by Guido van Rossum.", metadata={"source": "tech"}),
         Document(page_content="The Eiffel Tower is located in Paris, France.", metadata={"source": "landmarks"}),
+        Document(page_content="The highest mountain in New Zealand is Aoraki/Mount Cook.", metadata={"source": "geography"}),
+        Document(page_content="Wellington is the capital city of New Zealand.", metadata={"source": "geography"}),
     ]
     # Initialize embedding model
     state.setdefault("current_task", "Understand the question and plan the next step.")
     state.setdefault("current_thoughts", "")
+    # Increment iterations here to track them for the current step
+    state["iterations"] += 1
+    if state["iterations"] > should_continue.__defaults__[0]: # Accessing MAX_ITERATIONS from should_continue
+        print(f"DEBUG: Max iterations reached in reasoning_node. Exiting gracefully.")
+        state["final_answer"] = "Agent halted due to exceeding maximum allowed reasoning iterations."
+        return state
     state["context"].pop("pending_action", None)
     # --- Initialize local HuggingFacePipeline ---
     model_name = "mistralai/Mistral-7B-Instruct-v0.2"
     print(f"DEBUG: Loading local model: {model_name}...")
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
         device_map="auto"
     )
     pipe = pipeline(
         "text-generation",
         model=model,
         tokenizer=tokenizer,
+        max_new_tokens=1024,
+        temperature=0.1,
+        do_sample=True,
+        top_p=0.9,
+        repetition_penalty=1.1,
     )
     llm = HuggingFacePipeline(pipeline=pipe)
     ])
     # ====== RAG RETRIEVAL ======
     if "vector_store" not in state["context"]:
         state["context"]["vector_store"] = create_vector_store()
     vector_store = state["context"]["vector_store"]
     relevant_docs = vector_store.similarity_search(
         state["question"],
+        k=3
     )
     rag_context = "\n\n[Relevant Knowledge]\n"
     rag_context += "\n---\n".join([doc.page_content for doc in relevant_docs])
     # ====== MODIFIED PROMPT ======
     system_prompt = (
         "You are an expert problem solver, designed to provide concise and accurate answers. "
         "Your process involves analyzing the question, intelligently selecting and using tools, "
         "**Available Tools:**\n"
         f"{tool_descriptions}\n\n"
         "**Tool Usage Guidelines:**\n"
+        "- Use **duckduckgo_search** for current events, general facts, or quick lookups. Provide a concise search query. Example: `What is the population of New York?`\n"
+        "- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics. Provide a concise search term. Example: `Eiffel Tower history`\n"
+        "- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information. Provide a concise search query. Example: `Large Language Models recent advances`\n"
+        "- Use **document_qa** when the question explicitly refers to a specific document or when you have content to query. Input format: 'document_text||question'. Example: `The capital of France is Paris.||What is the capital of France?`\n"
+        "- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named '_result_value'. Example: `_result_value = 1 + 1`\n"
+        "- Use **transcript_video** for any question involving video or audio content (e.g., YouTube). Provide the full YouTube URL or video ID. Example: `youtube.com`\n\n"
+        "**Crucial Instructions:**\n"
+        "1. **Always aim to provide a definitive answer.** If you have enough information, use the 'final answer' action.\n"
+        "2. **To provide a final answer, use the Action 'final answer' with the complete answer in 'Action Input'.** This is how you tell me you're done. Example:\n"
+        "   ```json\n"
+        "   {\n"
+        "     \"Reasoning\": \"I have found the capital of France.\",\n"
+        "     \"Action\": \"final answer\",\n"
+        "     \"Action Input\": \"The capital of France is Paris.\"\n"
+        "   }\n"
+        "   ```\n"
+        "3. **If you need more information or cannot answer yet, select an appropriate tool and provide a clear, concise query.**\n"
+        "4. **Think step-by-step.** Reflect on previous tool outputs and the question.\n"
+        "5. **Do NOT repeat actions or search queries unless the previous attempt yielded an error.**\n\n"
         "**Retrieved Context:**\n{rag_context}\n\n"
+        "**Current Context (Tool Outputs/Intermediate Info):**\n{context}\n\n"
         "**Previous Reasoning Steps:**\n{reasoning}\n\n"
         "**Current Task:** {current_task}\n"
         "**Current Thoughts:** {current_thoughts}\n\n"
         "```json\n"
         "{\n"
         "  \"Reasoning\": \"Your reasoning process to decide the next step, including why a tool is chosen or how an answer is derived.\",\n"
+        "  \"Action\": \"The name of the tool to use (e.g., duckduckgo_search, final answer, No Action), if no tool is needed yet, use 'No Action'.\",\n"
         "  \"Action Input\": \"The input for the tool (e.g., 'What is the capital of France?', 'The final answer is Paris.').\"\n"
         "}\n"
         "```\n"
+        "Ensure your response is ONLY valid JSON and strictly follows this format. Begin your response with ````json`."
     )
     prompt = ChatPromptTemplate.from_messages([
+        SystemMessage(content=system_prompt), # SystemMessage is imported from langchain_core.messages
         *state["history"]
     ])
         current_thoughts=state["current_thoughts"]
     )
     try:
         full_input_string = tokenizer.apply_chat_template(
             formatted_messages,
             tokenize=False,
+            add_generation_prompt=True
         )
     except Exception as e:
         print(f"WARNING: Failed to apply chat template: {e}. Falling back to simple string join. Model performance may be affected.")
         full_input_string = "\n".join([msg.content for msg in formatted_messages])
+    def call_with_retry_local(inputs, retries=3):
         for attempt in range(retries):
             try:
                 response_text = llm.invoke(inputs)
+                # Ensure the LLM response is processed correctly, removing the input prompt
+                content = response_text.replace(inputs, "").strip() # More robust stripping
                 print(f"DEBUG: RAW LOCAL LLM Response (Attempt {attempt+1}):\n---\n{content}\n---")
                 # Attempt to parse to validate structure
+                # The parse_agent_response handles JSONDecodeError, so just call it
+                reasoning, action, action_input = parse_agent_response(content)
+                # If parsing succeeded, return AIMessage
                 return AIMessage(content=content)
+            except Exception as e: # Catch any exception, including json.JSONDecodeError from parse_agent_response
+                print(f"[Retry {attempt+1}/{retries}] Local LLM returned invalid content or an error. Error: {e}. Retrying...")
+                print(f"Invalid content (partial): {content[:200]}...")
+                state["history"].append(AIMessage(content=f"[Parsing Error] The previous LLM output was not valid. Expected format: ```json{{\"Reasoning\": \"...\", \"Action\": \"...\", \"Action Input\": \"...\"}}```. Please ensure your response is ONLY valid JSON and strictly follows the format. Error: {e}"))
                 time.sleep(5)
         raise RuntimeError("Failed after multiple retries due to local Hugging Face model issues or invalid JSON.")
     response = call_with_retry_local(full_input_string)
     content = response.content
+    reasoning, action, action_input = parse_agent_response(content) # Use the improved parser
     print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
+    # Only append the LLM's raw response if it's not a retry message
+    if not content.startswith("[Parsing Error]") and not content.startswith("[Local LLM Error]"):
         state["history"].append(AIMessage(content=content))
+    state["reasoning"] += f"\nStep {state['iterations']}: {reasoning}" # Use iteration number for clarity
     state["current_thoughts"] = reasoning
     # --- FIX: Set final_answer directly if the action is "final answer" ---
             "tool": action,
             "input": action_input
         }
+        # Only append tool decision message if it's a valid action, not if LLM failed to decide
+        if action and action != "No Action":
+            state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
+        elif action == "No Action":
+             state["history"].append(AIMessage(content=f"Agent decided to take 'No Action' but needs to proceed.")) # Indicate no action taken for visibility
+             # If "No Action" is taken, but no final answer, it indicates a potential stuck state
+             # We might want to force a re-reason or provide a default answer based on current context
+             if not state.get("final_answer"):
+                 state["current_task"] = "Re-evaluate the situation and attempt to find a final answer or a new tool."
+                 state["current_thoughts"] = "The previous step resulted in 'No Action'. I need to find a way forward."
+                 # This might lead to another reasoning cycle, which is covered by MAX_ITERATIONS
+                 state["context"].pop("pending_action", None) # Clear pending action if it was "No Action"
     print(f"DEBUG: Exiting reasoning_node. New history length: {len(state['history'])}")
     return state
     tool_call_dict = state["context"].pop("pending_action", None)
     if not tool_call_dict:
+        error_message = "[Tool Error] No pending_action found in context. This indicates an issue with graph flow or a previous error."
         print(f"ERROR: {error_message}")
         state["history"].append(AIMessage(content=error_message))
+        # If no pending action, and we just came from reasoning, it means LLM failed to set one.
+        # Force it back to reasoning, but prevent infinite loops.
+        # This will be caught by MAX_ITERATIONS in should_continue.
+        state["current_task"] = "Re-evaluate the situation; previous tool selection failed or was missing."
+        state["current_thoughts"] = "No tool action was found. I need to re-think my next step."
         return state
     tool_name = tool_call_dict.get("tool")
     available_tools = state.get("tools", [])
     tool_fn = next((t for t in available_tools if t.name == tool_name), None)
+    tool_output = "" # Initialize tool_output
     if tool_fn is None:
         tool_output = f"[Tool Error] Tool '{tool_name}' not found or not available. Please choose from: {', '.join([t.name for t in available_tools])}"
         print(f"ERROR: {tool_output}")
     else:
         try:
             print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
+            raw_tool_output = tool_fn.run(tool_input)
+            if raw_tool_output is None or raw_tool_output is False or raw_tool_output == "":
                 tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
+            else:
+                tool_output = f"[{tool_name} output]\n{raw_tool_output}"
         except Exception as e:
             tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
             print(f"ERROR: {tool_output}")
+    # Append tool output to history for LLM to see in next reasoning step
+    state["history"].append(AIMessage(content=tool_output))
     print(f"DEBUG: Exiting tool_node. Tool output added to history. New history length: {len(state['history'])}")
     return state
 # ====== Agent Interface ======
 class BasicAgent:
     def __init__(self):
+        # Instantiate tools - using the specific BaseTool subclasses now
         self.tools = [
+            DuckDuckGoSearchTool(),
+            WikipediaSearchTool(),
+            ArxivSearchTool(),
+            DocumentQATool(),
+            PythonExecutionTool(),
             VideoTranscriptionTool()
         ]
         state = {
             "question": question,
             "context": {
+                "vector_store": self.vector_store
             },
             "reasoning": "",
+            "iterations": 0, # Initialize iterations to 0
             "history": [HumanMessage(content=question)],
             "final_answer": None,
             "current_task": "Understand the question and plan the next step.",
             "tools": self.tools
         }
         try:
+            final_state = self.workflow.invoke(state, {"recursion_limit": 20}) # Increased recursion limit for LangGraph
             if final_state.get("final_answer") is not None:
                 answer = final_state["final_answer"]
                     print(f"Last message in history: {last_message}")
                     return f"Agent could not fully answer. Last message: {last_message}"
                 else:
+                    return "Agent finished without providing a final answer and no history messages."
         except Exception as e:
             print(f"--- FATAL ERROR during agent execution: {e} ---")
             return f"An unexpected error occurred during agent execution: {str(e)}"
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,