Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,17 +3,24 @@ import gradio as gr
|
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
6 |
-
|
7 |
-
from
|
8 |
-
|
9 |
-
|
10 |
-
import
|
11 |
-
from
|
12 |
-
from
|
13 |
-
from langchain_community.
|
14 |
-
from
|
15 |
-
from
|
16 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
# (Keep Constants as is)
|
@@ -49,17 +56,7 @@ from youtube_transcript_api import YouTubeTranscriptApi
|
|
49 |
|
50 |
from typing import List, Literal, TypedDict
|
51 |
|
52 |
-
|
53 |
-
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
|
54 |
-
from langchain_core.tools import BaseTool
|
55 |
-
from langchain_community.embeddings import HuggingFaceEmbeddings
|
56 |
-
from langchain_community.vectorstores import FAISS
|
57 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
58 |
-
from langchain_core.documents import Document
|
59 |
-
from langchain_community.llms import HuggingFacePipeline
|
60 |
-
from langchain.prompts import ChatPromptTemplate
|
61 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
62 |
-
from langgraph.graph import END, StateGraph
|
63 |
|
64 |
# --- Helper function for python_execution tool ---
|
65 |
def indent_code(code: str, indent: str = " ") -> str:
|
@@ -177,6 +174,7 @@ class VideoTranscriptionTool(BaseTool):
|
|
177 |
raise NotImplementedError("Async not supported for this tool.")
|
178 |
|
179 |
# --- Agent State Definition ---
|
|
|
180 |
class AgentState(TypedDict):
|
181 |
question: str
|
182 |
history: List[Union[HumanMessage, AIMessage]]
|
@@ -196,11 +194,19 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
|
|
196 |
If JSON parsing fails, it attempts heuristic parsing.
|
197 |
"""
|
198 |
try:
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
except json.JSONDecodeError:
|
205 |
print(f"WARNING: JSONDecodeError: LLM response was not valid JSON. Attempting heuristic parse: {response_content[:200]}...")
|
206 |
# Heuristic parsing for non-JSON or partial JSON responses
|
@@ -245,13 +251,22 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
|
|
245 |
def should_continue(state: AgentState) -> str:
|
246 |
"""
|
247 |
Determines if the agent should continue reasoning, use a tool, or end.
|
|
|
248 |
"""
|
249 |
-
|
|
|
250 |
|
251 |
if state.get("final_answer") is not None:
|
252 |
print("DEBUG: should_continue -> END (Final Answer set in state)")
|
253 |
return "end"
|
254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
if state.get("context", {}).get("pending_action"):
|
256 |
print("DEBUG: should_continue -> ACTION (Pending action in context)")
|
257 |
return "action"
|
@@ -259,9 +274,6 @@ def should_continue(state: AgentState) -> str:
|
|
259 |
print("DEBUG: should_continue -> REASON (Default to reasoning)")
|
260 |
return "reason"
|
261 |
|
262 |
-
# ====== NEW IMPORTS ======
|
263 |
-
# Already included at the top.
|
264 |
-
|
265 |
# ====== DOCUMENT PROCESSING SETUP ======
|
266 |
def create_vector_store():
|
267 |
"""Create vector store with predefined documents using FAISS"""
|
@@ -270,6 +282,8 @@ def create_vector_store():
|
|
270 |
Document(page_content="The capital of France is Paris.", metadata={"source": "geography"}),
|
271 |
Document(page_content="Python is a popular programming language created by Guido van Rossum.", metadata={"source": "tech"}),
|
272 |
Document(page_content="The Eiffel Tower is located in Paris, France.", metadata={"source": "landmarks"}),
|
|
|
|
|
273 |
]
|
274 |
|
275 |
# Initialize embedding model
|
@@ -302,37 +316,37 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
302 |
state.setdefault("current_task", "Understand the question and plan the next step.")
|
303 |
state.setdefault("current_thoughts", "")
|
304 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
state["context"].pop("pending_action", None)
|
306 |
|
307 |
# --- Initialize local HuggingFacePipeline ---
|
308 |
-
# Using Mistral-7B-Instruct-v0.2 for better agent performance
|
309 |
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
|
310 |
|
311 |
print(f"DEBUG: Loading local model: {model_name}...")
|
312 |
|
313 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
314 |
|
315 |
-
# Load model with optimal settings for GPU if available, else CPU
|
316 |
-
# Use bfloat16 for GPUs that support it (NVIDIA Ampere architecture and newer)
|
317 |
-
# else float16 for older GPUs or float32 for CPU/fallback.
|
318 |
-
# device_map="auto" intelligently distributes the model across available devices.
|
319 |
model = AutoModelForCausalLM.from_pretrained(
|
320 |
model_name,
|
321 |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
|
322 |
device_map="auto"
|
323 |
)
|
324 |
|
325 |
-
# Create a transformers pipeline
|
326 |
pipe = pipeline(
|
327 |
"text-generation",
|
328 |
model=model,
|
329 |
tokenizer=tokenizer,
|
330 |
-
max_new_tokens=1024,
|
331 |
-
temperature=0.1,
|
332 |
-
do_sample=True,
|
333 |
-
top_p=0.9,
|
334 |
-
repetition_penalty=1.1,
|
335 |
-
# device_map handled by model loading
|
336 |
)
|
337 |
|
338 |
llm = HuggingFacePipeline(pipeline=pipe)
|
@@ -343,24 +357,20 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
343 |
])
|
344 |
|
345 |
# ====== RAG RETRIEVAL ======
|
346 |
-
# Initialize vector store if not present
|
347 |
if "vector_store" not in state["context"]:
|
348 |
state["context"]["vector_store"] = create_vector_store()
|
349 |
|
350 |
vector_store = state["context"]["vector_store"]
|
351 |
|
352 |
-
# Perform retrieval
|
353 |
relevant_docs = vector_store.similarity_search(
|
354 |
state["question"],
|
355 |
-
k=3
|
356 |
)
|
357 |
|
358 |
-
# Format context for LLM
|
359 |
rag_context = "\n\n[Relevant Knowledge]\n"
|
360 |
rag_context += "\n---\n".join([doc.page_content for doc in relevant_docs])
|
361 |
|
362 |
# ====== MODIFIED PROMPT ======
|
363 |
-
# Add RAG context to system prompt
|
364 |
system_prompt = (
|
365 |
"You are an expert problem solver, designed to provide concise and accurate answers. "
|
366 |
"Your process involves analyzing the question, intelligently selecting and using tools, "
|
@@ -368,15 +378,27 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
368 |
"**Available Tools:**\n"
|
369 |
f"{tool_descriptions}\n\n"
|
370 |
"**Tool Usage Guidelines:**\n"
|
371 |
-
"- Use **duckduckgo_search** for current events, general facts, or quick lookups. Provide a concise search query
|
372 |
-
"- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics. Provide a concise search term
|
373 |
-
"- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information. Provide a concise search query
|
374 |
-
"- Use **document_qa** when the question explicitly refers to a specific document or when you have content to query. Input format: 'document_text||question'
|
375 |
-
"- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named '_result_value'
|
376 |
-
"- Use **transcript_video** for any question involving video or audio content (e.g., YouTube). Provide the full YouTube URL or video ID
|
377 |
-
"**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
"**Retrieved Context:**\n{rag_context}\n\n"
|
379 |
-
"**Current Context:**\n{context}\n\n"
|
380 |
"**Previous Reasoning Steps:**\n{reasoning}\n\n"
|
381 |
"**Current Task:** {current_task}\n"
|
382 |
"**Current Thoughts:** {current_thoughts}\n\n"
|
@@ -385,15 +407,15 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
385 |
"```json\n"
|
386 |
"{\n"
|
387 |
" \"Reasoning\": \"Your reasoning process to decide the next step, including why a tool is chosen or how an answer is derived.\",\n"
|
388 |
-
" \"Action\": \"The name of the tool to use (e.g., duckduckgo_search, final answer
|
389 |
" \"Action Input\": \"The input for the tool (e.g., 'What is the capital of France?', 'The final answer is Paris.').\"\n"
|
390 |
"}\n"
|
391 |
"```\n"
|
392 |
-
"Ensure your response is ONLY valid JSON and strictly follows this format."
|
393 |
)
|
394 |
|
395 |
prompt = ChatPromptTemplate.from_messages([
|
396 |
-
SystemMessage(content=system_prompt),
|
397 |
*state["history"]
|
398 |
])
|
399 |
|
@@ -406,58 +428,49 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
406 |
current_thoughts=state["current_thoughts"]
|
407 |
)
|
408 |
|
409 |
-
# Use tokenizer's chat template for optimal formatting with chat models
|
410 |
try:
|
411 |
full_input_string = tokenizer.apply_chat_template(
|
412 |
formatted_messages,
|
413 |
tokenize=False,
|
414 |
-
add_generation_prompt=True
|
415 |
)
|
416 |
except Exception as e:
|
417 |
print(f"WARNING: Failed to apply chat template: {e}. Falling back to simple string join. Model performance may be affected.")
|
418 |
full_input_string = "\n".join([msg.content for msg in formatted_messages])
|
419 |
|
420 |
-
def call_with_retry_local(inputs, retries=3):
|
421 |
for attempt in range(retries):
|
422 |
try:
|
423 |
response_text = llm.invoke(inputs)
|
424 |
-
|
425 |
-
|
426 |
-
if response_text.startswith(inputs):
|
427 |
-
content = response_text[len(inputs):].strip()
|
428 |
-
else:
|
429 |
-
content = response_text.strip()
|
430 |
|
431 |
print(f"DEBUG: RAW LOCAL LLM Response (Attempt {attempt+1}):\n---\n{content}\n---")
|
432 |
|
433 |
# Attempt to parse to validate structure
|
434 |
-
|
435 |
-
|
|
|
436 |
return AIMessage(content=content)
|
437 |
-
except json.JSONDecodeError
|
438 |
-
print(f"[Retry {attempt+1}/{retries}] Local LLM returned invalid
|
439 |
-
print(f"Invalid
|
440 |
-
state["history"].append(AIMessage(content=f"[Parsing Error] The previous LLM output was not valid
|
441 |
time.sleep(5)
|
442 |
-
except Exception as e:
|
443 |
-
print(f"[Retry {attempt+1}/{retries}] An unexpected error occurred during local LLM call: {e}.")
|
444 |
-
state["history"].append(AIMessage(content=f"[Local LLM Error] Failed to get a response from the local LLM: {e}. Trying again."))
|
445 |
-
time.sleep(10)
|
446 |
-
|
447 |
raise RuntimeError("Failed after multiple retries due to local Hugging Face model issues or invalid JSON.")
|
448 |
|
449 |
response = call_with_retry_local(full_input_string)
|
450 |
|
451 |
content = response.content
|
452 |
-
reasoning, action, action_input = parse_agent_response(content)
|
453 |
|
454 |
print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
|
455 |
|
456 |
-
|
|
|
457 |
state["history"].append(AIMessage(content=content))
|
458 |
|
459 |
-
state["reasoning"] += f"\nStep {state['iterations']
|
460 |
-
state["iterations"] += 1
|
461 |
state["current_thoughts"] = reasoning
|
462 |
|
463 |
# --- FIX: Set final_answer directly if the action is "final answer" ---
|
@@ -469,7 +482,18 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
469 |
"tool": action,
|
470 |
"input": action_input
|
471 |
}
|
472 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
473 |
|
474 |
print(f"DEBUG: Exiting reasoning_node. New history length: {len(state['history'])}")
|
475 |
return state
|
@@ -483,9 +507,14 @@ def tool_node(state: AgentState) -> AgentState:
|
|
483 |
tool_call_dict = state["context"].pop("pending_action", None)
|
484 |
|
485 |
if not tool_call_dict:
|
486 |
-
error_message = "[Tool Error] No pending_action found in context. This indicates an issue with graph flow."
|
487 |
print(f"ERROR: {error_message}")
|
488 |
state["history"].append(AIMessage(content=error_message))
|
|
|
|
|
|
|
|
|
|
|
489 |
return state
|
490 |
|
491 |
tool_name = tool_call_dict.get("tool")
|
@@ -501,20 +530,25 @@ def tool_node(state: AgentState) -> AgentState:
|
|
501 |
available_tools = state.get("tools", [])
|
502 |
tool_fn = next((t for t in available_tools if t.name == tool_name), None)
|
503 |
|
|
|
|
|
504 |
if tool_fn is None:
|
505 |
tool_output = f"[Tool Error] Tool '{tool_name}' not found or not available. Please choose from: {', '.join([t.name for t in available_tools])}"
|
506 |
print(f"ERROR: {tool_output}")
|
507 |
else:
|
508 |
try:
|
509 |
print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
|
510 |
-
|
511 |
-
if
|
512 |
tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
|
|
|
|
|
513 |
except Exception as e:
|
514 |
tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
|
515 |
print(f"ERROR: {tool_output}")
|
516 |
|
517 |
-
|
|
|
518 |
|
519 |
print(f"DEBUG: Exiting tool_node. Tool output added to history. New history length: {len(state['history'])}")
|
520 |
return state
|
@@ -546,13 +580,13 @@ def create_agent_workflow(tools: List[BaseTool]): # Use BaseTool for consistency
|
|
546 |
# ====== Agent Interface ======
|
547 |
class BasicAgent:
|
548 |
def __init__(self):
|
549 |
-
# Instantiate tools
|
550 |
self.tools = [
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
VideoTranscriptionTool()
|
557 |
]
|
558 |
|
@@ -566,10 +600,10 @@ class BasicAgent:
|
|
566 |
state = {
|
567 |
"question": question,
|
568 |
"context": {
|
569 |
-
"vector_store": self.vector_store
|
570 |
},
|
571 |
"reasoning": "",
|
572 |
-
"iterations": 0,
|
573 |
"history": [HumanMessage(content=question)],
|
574 |
"final_answer": None,
|
575 |
"current_task": "Understand the question and plan the next step.",
|
@@ -577,14 +611,8 @@ class BasicAgent:
|
|
577 |
"tools": self.tools
|
578 |
}
|
579 |
|
580 |
-
# The invoke method returns an iterator, so we need to consume it to get the final state
|
581 |
-
# LangGraph's invoke will run until the graph reaches an END node.
|
582 |
try:
|
583 |
-
#
|
584 |
-
# Note: For simple single-path graphs, `invoke` often gives the final state directly.
|
585 |
-
# For more complex graphs, streaming with `stream` and then getting the final state
|
586 |
-
# might be more appropriate if you need intermediate steps.
|
587 |
-
final_state = self.workflow.invoke(state)
|
588 |
|
589 |
if final_state.get("final_answer") is not None:
|
590 |
answer = final_state["final_answer"]
|
@@ -597,15 +625,13 @@ class BasicAgent:
|
|
597 |
print(f"Last message in history: {last_message}")
|
598 |
return f"Agent could not fully answer. Last message: {last_message}"
|
599 |
else:
|
600 |
-
|
601 |
except Exception as e:
|
602 |
print(f"--- FATAL ERROR during agent execution: {e} ---")
|
603 |
-
# In case of an unexpected error, return a helpful message
|
604 |
return f"An unexpected error occurred during agent execution: {str(e)}"
|
605 |
|
606 |
|
607 |
|
608 |
-
|
609 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
610 |
"""
|
611 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
|
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
6 |
+
import time
|
7 |
+
from typing import List, Union, Dict, Any, TypedDict # Ensure all types are imported
|
8 |
+
|
9 |
+
import torch
|
10 |
+
from langchain_core.messages import AIMessage, HumanMessage # Corrected import for message types
|
11 |
+
from langchain_core.tools import BaseTool
|
12 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
13 |
+
from langchain_community.vectorstores import FAISS
|
14 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
15 |
+
from langchain_core.documents import Document
|
16 |
+
# No longer needed: from langchain.chains.Youtubeing import load_qa_chain (as it's unused)
|
17 |
+
from langchain_community.llms import HuggingFacePipeline
|
18 |
+
from langchain.prompts import ChatPromptTemplate # SystemMessage moved to langchain_core.messages
|
19 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
20 |
+
from langgraph.graph import END, StateGraph
|
21 |
+
|
22 |
+
# --- Import for actual YouTube transcription (if you make the tool functional) ---
|
23 |
+
# from youtube_transcript_api import YouTubeTranscriptApi
|
24 |
|
25 |
|
26 |
# (Keep Constants as is)
|
|
|
56 |
|
57 |
from typing import List, Literal, TypedDict
|
58 |
|
59 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
# --- Helper function for python_execution tool ---
|
62 |
def indent_code(code: str, indent: str = " ") -> str:
|
|
|
174 |
raise NotImplementedError("Async not supported for this tool.")
|
175 |
|
176 |
# --- Agent State Definition ---
|
177 |
+
# --- Agent State ---
|
178 |
class AgentState(TypedDict):
|
179 |
question: str
|
180 |
history: List[Union[HumanMessage, AIMessage]]
|
|
|
194 |
If JSON parsing fails, it attempts heuristic parsing.
|
195 |
"""
|
196 |
try:
|
197 |
+
# Attempt to find the first valid JSON block
|
198 |
+
# This is robust to surrounding text that some LLMs might generate
|
199 |
+
json_start = response_content.find('{')
|
200 |
+
json_end = response_content.rfind('}')
|
201 |
+
if json_start != -1 and json_end != -1 and json_end > json_start:
|
202 |
+
json_str = response_content[json_start : json_end + 1]
|
203 |
+
response_json = json.loads(json_str)
|
204 |
+
reasoning = response_json.get("Reasoning", "").strip()
|
205 |
+
action = response_json.get("Action", "").strip()
|
206 |
+
action_input = response_json.get("Action Input", "").strip()
|
207 |
+
return reasoning, action, action_input
|
208 |
+
else:
|
209 |
+
raise json.JSONDecodeError("No valid JSON object found within the response.", response_content, 0)
|
210 |
except json.JSONDecodeError:
|
211 |
print(f"WARNING: JSONDecodeError: LLM response was not valid JSON. Attempting heuristic parse: {response_content[:200]}...")
|
212 |
# Heuristic parsing for non-JSON or partial JSON responses
|
|
|
251 |
def should_continue(state: AgentState) -> str:
|
252 |
"""
|
253 |
Determines if the agent should continue reasoning, use a tool, or end.
|
254 |
+
Includes a maximum iteration limit to prevent infinite loops.
|
255 |
"""
|
256 |
+
MAX_ITERATIONS = 8 # Set a sensible limit to prevent infinite loops
|
257 |
+
print(f"DEBUG: Entering should_continue. Iteration: {state['iterations']}. Current context: {state.get('context', {})}")
|
258 |
|
259 |
if state.get("final_answer") is not None:
|
260 |
print("DEBUG: should_continue -> END (Final Answer set in state)")
|
261 |
return "end"
|
262 |
|
263 |
+
if state["iterations"] >= MAX_ITERATIONS:
|
264 |
+
print(f"DEBUG: should_continue -> END (Max iterations {MAX_ITERATIONS} reached)")
|
265 |
+
# Optionally, set a final answer here indicating failure or current progress
|
266 |
+
if not state.get("final_answer"):
|
267 |
+
state["final_answer"] = "Agent terminated due to maximum iteration limit without finding a conclusive answer."
|
268 |
+
return "end"
|
269 |
+
|
270 |
if state.get("context", {}).get("pending_action"):
|
271 |
print("DEBUG: should_continue -> ACTION (Pending action in context)")
|
272 |
return "action"
|
|
|
274 |
print("DEBUG: should_continue -> REASON (Default to reasoning)")
|
275 |
return "reason"
|
276 |
|
|
|
|
|
|
|
277 |
# ====== DOCUMENT PROCESSING SETUP ======
|
278 |
def create_vector_store():
|
279 |
"""Create vector store with predefined documents using FAISS"""
|
|
|
282 |
Document(page_content="The capital of France is Paris.", metadata={"source": "geography"}),
|
283 |
Document(page_content="Python is a popular programming language created by Guido van Rossum.", metadata={"source": "tech"}),
|
284 |
Document(page_content="The Eiffel Tower is located in Paris, France.", metadata={"source": "landmarks"}),
|
285 |
+
Document(page_content="The highest mountain in New Zealand is Aoraki/Mount Cook.", metadata={"source": "geography"}),
|
286 |
+
Document(page_content="Wellington is the capital city of New Zealand.", metadata={"source": "geography"}),
|
287 |
]
|
288 |
|
289 |
# Initialize embedding model
|
|
|
316 |
state.setdefault("current_task", "Understand the question and plan the next step.")
|
317 |
state.setdefault("current_thoughts", "")
|
318 |
|
319 |
+
# Increment iterations here to track them for the current step
|
320 |
+
state["iterations"] += 1
|
321 |
+
if state["iterations"] > should_continue.__defaults__[0]: # Accessing MAX_ITERATIONS from should_continue
|
322 |
+
print(f"DEBUG: Max iterations reached in reasoning_node. Exiting gracefully.")
|
323 |
+
state["final_answer"] = "Agent halted due to exceeding maximum allowed reasoning iterations."
|
324 |
+
return state
|
325 |
+
|
326 |
state["context"].pop("pending_action", None)
|
327 |
|
328 |
# --- Initialize local HuggingFacePipeline ---
|
|
|
329 |
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
|
330 |
|
331 |
print(f"DEBUG: Loading local model: {model_name}...")
|
332 |
|
333 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
334 |
|
|
|
|
|
|
|
|
|
335 |
model = AutoModelForCausalLM.from_pretrained(
|
336 |
model_name,
|
337 |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
|
338 |
device_map="auto"
|
339 |
)
|
340 |
|
|
|
341 |
pipe = pipeline(
|
342 |
"text-generation",
|
343 |
model=model,
|
344 |
tokenizer=tokenizer,
|
345 |
+
max_new_tokens=1024,
|
346 |
+
temperature=0.1,
|
347 |
+
do_sample=True,
|
348 |
+
top_p=0.9,
|
349 |
+
repetition_penalty=1.1,
|
|
|
350 |
)
|
351 |
|
352 |
llm = HuggingFacePipeline(pipeline=pipe)
|
|
|
357 |
])
|
358 |
|
359 |
# ====== RAG RETRIEVAL ======
|
|
|
360 |
if "vector_store" not in state["context"]:
|
361 |
state["context"]["vector_store"] = create_vector_store()
|
362 |
|
363 |
vector_store = state["context"]["vector_store"]
|
364 |
|
|
|
365 |
relevant_docs = vector_store.similarity_search(
|
366 |
state["question"],
|
367 |
+
k=3
|
368 |
)
|
369 |
|
|
|
370 |
rag_context = "\n\n[Relevant Knowledge]\n"
|
371 |
rag_context += "\n---\n".join([doc.page_content for doc in relevant_docs])
|
372 |
|
373 |
# ====== MODIFIED PROMPT ======
|
|
|
374 |
system_prompt = (
|
375 |
"You are an expert problem solver, designed to provide concise and accurate answers. "
|
376 |
"Your process involves analyzing the question, intelligently selecting and using tools, "
|
|
|
378 |
"**Available Tools:**\n"
|
379 |
f"{tool_descriptions}\n\n"
|
380 |
"**Tool Usage Guidelines:**\n"
|
381 |
+
"- Use **duckduckgo_search** for current events, general facts, or quick lookups. Provide a concise search query. Example: `What is the population of New York?`\n"
|
382 |
+
"- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics. Provide a concise search term. Example: `Eiffel Tower history`\n"
|
383 |
+
"- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information. Provide a concise search query. Example: `Large Language Models recent advances`\n"
|
384 |
+
"- Use **document_qa** when the question explicitly refers to a specific document or when you have content to query. Input format: 'document_text||question'. Example: `The capital of France is Paris.||What is the capital of France?`\n"
|
385 |
+
"- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named '_result_value'. Example: `_result_value = 1 + 1`\n"
|
386 |
+
"- Use **transcript_video** for any question involving video or audio content (e.g., YouTube). Provide the full YouTube URL or video ID. Example: `youtube.com`\n\n"
|
387 |
+
"**Crucial Instructions:**\n"
|
388 |
+
"1. **Always aim to provide a definitive answer.** If you have enough information, use the 'final answer' action.\n"
|
389 |
+
"2. **To provide a final answer, use the Action 'final answer' with the complete answer in 'Action Input'.** This is how you tell me you're done. Example:\n"
|
390 |
+
" ```json\n"
|
391 |
+
" {\n"
|
392 |
+
" \"Reasoning\": \"I have found the capital of France.\",\n"
|
393 |
+
" \"Action\": \"final answer\",\n"
|
394 |
+
" \"Action Input\": \"The capital of France is Paris.\"\n"
|
395 |
+
" }\n"
|
396 |
+
" ```\n"
|
397 |
+
"3. **If you need more information or cannot answer yet, select an appropriate tool and provide a clear, concise query.**\n"
|
398 |
+
"4. **Think step-by-step.** Reflect on previous tool outputs and the question.\n"
|
399 |
+
"5. **Do NOT repeat actions or search queries unless the previous attempt yielded an error.**\n\n"
|
400 |
"**Retrieved Context:**\n{rag_context}\n\n"
|
401 |
+
"**Current Context (Tool Outputs/Intermediate Info):**\n{context}\n\n"
|
402 |
"**Previous Reasoning Steps:**\n{reasoning}\n\n"
|
403 |
"**Current Task:** {current_task}\n"
|
404 |
"**Current Thoughts:** {current_thoughts}\n\n"
|
|
|
407 |
"```json\n"
|
408 |
"{\n"
|
409 |
" \"Reasoning\": \"Your reasoning process to decide the next step, including why a tool is chosen or how an answer is derived.\",\n"
|
410 |
+
" \"Action\": \"The name of the tool to use (e.g., duckduckgo_search, final answer, No Action), if no tool is needed yet, use 'No Action'.\",\n"
|
411 |
" \"Action Input\": \"The input for the tool (e.g., 'What is the capital of France?', 'The final answer is Paris.').\"\n"
|
412 |
"}\n"
|
413 |
"```\n"
|
414 |
+
"Ensure your response is ONLY valid JSON and strictly follows this format. Begin your response with ````json`."
|
415 |
)
|
416 |
|
417 |
prompt = ChatPromptTemplate.from_messages([
|
418 |
+
SystemMessage(content=system_prompt), # SystemMessage is imported from langchain_core.messages
|
419 |
*state["history"]
|
420 |
])
|
421 |
|
|
|
428 |
current_thoughts=state["current_thoughts"]
|
429 |
)
|
430 |
|
|
|
431 |
try:
|
432 |
full_input_string = tokenizer.apply_chat_template(
|
433 |
formatted_messages,
|
434 |
tokenize=False,
|
435 |
+
add_generation_prompt=True
|
436 |
)
|
437 |
except Exception as e:
|
438 |
print(f"WARNING: Failed to apply chat template: {e}. Falling back to simple string join. Model performance may be affected.")
|
439 |
full_input_string = "\n".join([msg.content for msg in formatted_messages])
|
440 |
|
441 |
+
def call_with_retry_local(inputs, retries=3):
|
442 |
for attempt in range(retries):
|
443 |
try:
|
444 |
response_text = llm.invoke(inputs)
|
445 |
+
# Ensure the LLM response is processed correctly, removing the input prompt
|
446 |
+
content = response_text.replace(inputs, "").strip() # More robust stripping
|
|
|
|
|
|
|
|
|
447 |
|
448 |
print(f"DEBUG: RAW LOCAL LLM Response (Attempt {attempt+1}):\n---\n{content}\n---")
|
449 |
|
450 |
# Attempt to parse to validate structure
|
451 |
+
# The parse_agent_response handles JSONDecodeError, so just call it
|
452 |
+
reasoning, action, action_input = parse_agent_response(content)
|
453 |
+
# If parsing succeeded, return AIMessage
|
454 |
return AIMessage(content=content)
|
455 |
+
except Exception as e: # Catch any exception, including json.JSONDecodeError from parse_agent_response
|
456 |
+
print(f"[Retry {attempt+1}/{retries}] Local LLM returned invalid content or an error. Error: {e}. Retrying...")
|
457 |
+
print(f"Invalid content (partial): {content[:200]}...")
|
458 |
+
state["history"].append(AIMessage(content=f"[Parsing Error] The previous LLM output was not valid. Expected format: ```json{{\"Reasoning\": \"...\", \"Action\": \"...\", \"Action Input\": \"...\"}}```. Please ensure your response is ONLY valid JSON and strictly follows the format. Error: {e}"))
|
459 |
time.sleep(5)
|
|
|
|
|
|
|
|
|
|
|
460 |
raise RuntimeError("Failed after multiple retries due to local Hugging Face model issues or invalid JSON.")
|
461 |
|
462 |
response = call_with_retry_local(full_input_string)
|
463 |
|
464 |
content = response.content
|
465 |
+
reasoning, action, action_input = parse_agent_response(content) # Use the improved parser
|
466 |
|
467 |
print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
|
468 |
|
469 |
+
# Only append the LLM's raw response if it's not a retry message
|
470 |
+
if not content.startswith("[Parsing Error]") and not content.startswith("[Local LLM Error]"):
|
471 |
state["history"].append(AIMessage(content=content))
|
472 |
|
473 |
+
state["reasoning"] += f"\nStep {state['iterations']}: {reasoning}" # Use iteration number for clarity
|
|
|
474 |
state["current_thoughts"] = reasoning
|
475 |
|
476 |
# --- FIX: Set final_answer directly if the action is "final answer" ---
|
|
|
482 |
"tool": action,
|
483 |
"input": action_input
|
484 |
}
|
485 |
+
# Only append tool decision message if it's a valid action, not if LLM failed to decide
|
486 |
+
if action and action != "No Action":
|
487 |
+
state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
|
488 |
+
elif action == "No Action":
|
489 |
+
state["history"].append(AIMessage(content=f"Agent decided to take 'No Action' but needs to proceed.")) # Indicate no action taken for visibility
|
490 |
+
# If "No Action" is taken, but no final answer, it indicates a potential stuck state
|
491 |
+
# We might want to force a re-reason or provide a default answer based on current context
|
492 |
+
if not state.get("final_answer"):
|
493 |
+
state["current_task"] = "Re-evaluate the situation and attempt to find a final answer or a new tool."
|
494 |
+
state["current_thoughts"] = "The previous step resulted in 'No Action'. I need to find a way forward."
|
495 |
+
# This might lead to another reasoning cycle, which is covered by MAX_ITERATIONS
|
496 |
+
state["context"].pop("pending_action", None) # Clear pending action if it was "No Action"
|
497 |
|
498 |
print(f"DEBUG: Exiting reasoning_node. New history length: {len(state['history'])}")
|
499 |
return state
|
|
|
507 |
tool_call_dict = state["context"].pop("pending_action", None)
|
508 |
|
509 |
if not tool_call_dict:
|
510 |
+
error_message = "[Tool Error] No pending_action found in context. This indicates an issue with graph flow or a previous error."
|
511 |
print(f"ERROR: {error_message}")
|
512 |
state["history"].append(AIMessage(content=error_message))
|
513 |
+
# If no pending action, and we just came from reasoning, it means LLM failed to set one.
|
514 |
+
# Force it back to reasoning, but prevent infinite loops.
|
515 |
+
# This will be caught by MAX_ITERATIONS in should_continue.
|
516 |
+
state["current_task"] = "Re-evaluate the situation; previous tool selection failed or was missing."
|
517 |
+
state["current_thoughts"] = "No tool action was found. I need to re-think my next step."
|
518 |
return state
|
519 |
|
520 |
tool_name = tool_call_dict.get("tool")
|
|
|
530 |
available_tools = state.get("tools", [])
|
531 |
tool_fn = next((t for t in available_tools if t.name == tool_name), None)
|
532 |
|
533 |
+
tool_output = "" # Initialize tool_output
|
534 |
+
|
535 |
if tool_fn is None:
|
536 |
tool_output = f"[Tool Error] Tool '{tool_name}' not found or not available. Please choose from: {', '.join([t.name for t in available_tools])}"
|
537 |
print(f"ERROR: {tool_output}")
|
538 |
else:
|
539 |
try:
|
540 |
print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
|
541 |
+
raw_tool_output = tool_fn.run(tool_input)
|
542 |
+
if raw_tool_output is None or raw_tool_output is False or raw_tool_output == "":
|
543 |
tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
|
544 |
+
else:
|
545 |
+
tool_output = f"[{tool_name} output]\n{raw_tool_output}"
|
546 |
except Exception as e:
|
547 |
tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
|
548 |
print(f"ERROR: {tool_output}")
|
549 |
|
550 |
+
# Append tool output to history for LLM to see in next reasoning step
|
551 |
+
state["history"].append(AIMessage(content=tool_output))
|
552 |
|
553 |
print(f"DEBUG: Exiting tool_node. Tool output added to history. New history length: {len(state['history'])}")
|
554 |
return state
|
|
|
580 |
# ====== Agent Interface ======
|
581 |
class BasicAgent:
|
582 |
def __init__(self):
|
583 |
+
# Instantiate tools - using the specific BaseTool subclasses now
|
584 |
self.tools = [
|
585 |
+
DuckDuckGoSearchTool(),
|
586 |
+
WikipediaSearchTool(),
|
587 |
+
ArxivSearchTool(),
|
588 |
+
DocumentQATool(),
|
589 |
+
PythonExecutionTool(),
|
590 |
VideoTranscriptionTool()
|
591 |
]
|
592 |
|
|
|
600 |
state = {
|
601 |
"question": question,
|
602 |
"context": {
|
603 |
+
"vector_store": self.vector_store
|
604 |
},
|
605 |
"reasoning": "",
|
606 |
+
"iterations": 0, # Initialize iterations to 0
|
607 |
"history": [HumanMessage(content=question)],
|
608 |
"final_answer": None,
|
609 |
"current_task": "Understand the question and plan the next step.",
|
|
|
611 |
"tools": self.tools
|
612 |
}
|
613 |
|
|
|
|
|
614 |
try:
|
615 |
+
final_state = self.workflow.invoke(state, {"recursion_limit": 20}) # Increased recursion limit for LangGraph
|
|
|
|
|
|
|
|
|
616 |
|
617 |
if final_state.get("final_answer") is not None:
|
618 |
answer = final_state["final_answer"]
|
|
|
625 |
print(f"Last message in history: {last_message}")
|
626 |
return f"Agent could not fully answer. Last message: {last_message}"
|
627 |
else:
|
628 |
+
return "Agent finished without providing a final answer and no history messages."
|
629 |
except Exception as e:
|
630 |
print(f"--- FATAL ERROR during agent execution: {e} ---")
|
|
|
631 |
return f"An unexpected error occurred during agent execution: {str(e)}"
|
632 |
|
633 |
|
634 |
|
|
|
635 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
636 |
"""
|
637 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|