Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -24,184 +24,144 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
24 |
load_dotenv()
|
25 |
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
from
|
31 |
-
from typing import Dict, List, TypedDict, Annotated
|
32 |
-
import operator
|
33 |
-
from langchain_community.llms import HuggingFaceHub
|
34 |
-
from langchain_community.chat_models import ChatHuggingFace
|
35 |
-
|
36 |
-
|
37 |
-
from langchain.schema import HumanMessage # Or your framework's equivalent
|
38 |
-
|
39 |
-
def init_state(question: str):
|
40 |
-
return {
|
41 |
-
"question": question,
|
42 |
-
"history": [HumanMessage(content=question)],
|
43 |
-
"context": {} # <- Add this line
|
44 |
-
}
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
#
|
49 |
-
@
|
50 |
def duckduckgo_search(query: str) -> str:
|
51 |
"""Search web using DuckDuckGo. Returns top 3 results."""
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
58 |
|
59 |
-
@
|
60 |
def wikipedia_search(query: str) -> str:
|
61 |
"""Get Wikipedia summaries. Returns first 3 sentences."""
|
62 |
-
|
63 |
try:
|
64 |
return wikipedia.summary(query, sentences=3)
|
65 |
except wikipedia.DisambiguationError as e:
|
66 |
return f"Disambiguation options: {', '.join(e.options[:3])}"
|
67 |
except wikipedia.PageError:
|
68 |
-
return "
|
|
|
|
|
69 |
|
70 |
-
@
|
71 |
def arxiv_search(query: str) -> str:
|
72 |
"""Search academic papers on arXiv. Returns top 3 results."""
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
85 |
|
86 |
-
@
|
87 |
def document_qa(input_str: str) -> str:
|
88 |
"""Answer questions from documents. Input format: 'document_text||question'"""
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
-
@
|
98 |
def python_execution(code: str) -> str:
|
99 |
-
"""Execute Python code and return output.
|
|
|
|
|
|
|
|
|
100 |
try:
|
101 |
# Create isolated environment
|
102 |
env = {}
|
103 |
-
|
104 |
-
|
|
|
105 |
except Exception as e:
|
106 |
-
return f"
|
107 |
-
|
108 |
-
from typing import Optional
|
109 |
-
from langchain_core.tools import BaseTool
|
110 |
-
from youtube_transcript_api import YouTubeTranscriptApi
|
111 |
|
112 |
class VideoTranscriptionTool(BaseTool):
|
113 |
name: str = "transcript_video"
|
114 |
-
description:
|
115 |
|
116 |
-
def _run(self,
|
117 |
-
|
118 |
video_id = None
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
|
|
125 |
|
126 |
if not video_id:
|
127 |
-
return f"Invalid or unsupported YouTube URL/ID: {
|
128 |
|
129 |
try:
|
130 |
transcription = YouTubeTranscriptApi.get_transcript(video_id)
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
for part in transcription:
|
135 |
-
timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
|
136 |
-
formatted.append(f"[{timestamp}] {part['text']}")
|
137 |
-
return "\n".join(formatted)
|
138 |
-
else:
|
139 |
-
return " ".join([part['text'] for part in transcription])
|
140 |
|
141 |
except Exception as e:
|
142 |
-
|
|
|
143 |
|
144 |
def _arun(self, *args, **kwargs):
|
145 |
raise NotImplementedError("Async not supported for this tool.")
|
146 |
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
import os
|
154 |
-
import time
|
155 |
-
import json
|
156 |
-
from typing import TypedDict, List, Union, Any, Dict
|
157 |
-
from langchain_huggingface import ChatHuggingFace
|
158 |
-
from langchain_huggingface.llms import HuggingFaceEndpoint
|
159 |
-
from langchain.schema import HumanMessage, AIMessage, SystemMessage
|
160 |
-
from langchain.prompts import ChatPromptTemplate
|
161 |
-
from langgraph.graph import StateGraph, END
|
162 |
-
from langchain.tools import Tool
|
163 |
-
|
164 |
-
# Assume these tools are defined elsewhere and imported
|
165 |
-
# Placeholder for your actual tool implementations
|
166 |
-
def duckduckgo_search(query: str) -> str:
|
167 |
-
"""Performs a DuckDuckGo search for current events or general facts."""
|
168 |
-
print(f"DEBUG: duckduckgo_search called with: {query}")
|
169 |
-
return f"Search result for '{query}': Example relevant information from web."
|
170 |
-
|
171 |
-
def wikipedia_search(query: str) -> str:
|
172 |
-
"""Searches Wikipedia for encyclopedic information."""
|
173 |
-
print(f"DEBUG: wikipedia_search called with: {query}")
|
174 |
-
return f"Wikipedia result for '{query}': Found detailed article."
|
175 |
-
|
176 |
-
def arxiv_search(query: str) -> str:
|
177 |
-
"""Searches ArXiv for scientific preprints and papers."""
|
178 |
-
print(f"DEBUG: arxiv_search called with: {query}")
|
179 |
-
return f"ArXiv result for '{query}': Found relevant research paper."
|
180 |
-
|
181 |
-
def document_qa(document_path: str, question: str) -> str:
|
182 |
-
"""Answers questions based on the content of a given document file (PDF, DOCX, TXT)."""
|
183 |
-
print(f"DEBUG: document_qa called with: {document_path}, question: {question}")
|
184 |
-
return f"Document QA result for '{question}': Answer extracted from document."
|
185 |
-
|
186 |
-
def python_execution(code: str) -> str:
|
187 |
-
"""Executes Python code in a sandboxed environment for calculations or data manipulation."""
|
188 |
-
try:
|
189 |
-
exec_globals = {}
|
190 |
-
exec_locals = {}
|
191 |
-
# WARNING: This is a highly insecure way to execute arbitrary Python code.
|
192 |
-
# For production, use a secure, sandboxed environment (e.g., Docker container, dedicated service).
|
193 |
-
exec(code, exec_globals, exec_locals)
|
194 |
-
return str(exec_locals.get('result', 'Code executed, no explicit result assigned to "result" variable.'))
|
195 |
-
except Exception as e:
|
196 |
-
return f"Python execution error: {str(e)}"
|
197 |
-
|
198 |
-
class VideoTranscriptionTool:
|
199 |
-
"""Transcribes and analyzes video content from a URL or ID."""
|
200 |
-
def __call__(self, video_id_or_url: str) -> str:
|
201 |
-
print(f"DEBUG: VideoTranscriptionTool called with: {video_id_or_url}")
|
202 |
-
return f"Video transcription/analysis result for '{video_id_or_url}': Summary of video content."
|
203 |
-
|
204 |
-
|
205 |
# --- Agent State Definition ---
|
206 |
class AgentState(TypedDict):
|
207 |
question: str
|
@@ -212,8 +172,7 @@ class AgentState(TypedDict):
|
|
212 |
final_answer: Union[str, float, int, None]
|
213 |
current_task: str
|
214 |
current_thoughts: str
|
215 |
-
tools: List[Tool]
|
216 |
-
|
217 |
|
218 |
# --- Utility Functions ---
|
219 |
def parse_agent_response(response_content: str) -> tuple[str, str, str]:
|
@@ -240,15 +199,13 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
|
|
240 |
action_idx = response_content.find("Action:")
|
241 |
if reasoning_idx != -1 and action_idx != -1 and reasoning_idx < action_idx:
|
242 |
reasoning = response_content[reasoning_idx + len("Reasoning:"):action_idx].strip()
|
243 |
-
# Clean up leading/trailing quotes if present
|
244 |
if reasoning.startswith('"') and reasoning.endswith('"'):
|
245 |
reasoning = reasoning[1:-1]
|
246 |
-
elif reasoning_idx != -1:
|
247 |
reasoning = response_content[reasoning_idx + len("Reasoning:"):].strip()
|
248 |
if reasoning.startswith('"') and reasoning.endswith('"'):
|
249 |
reasoning = reasoning[1:-1]
|
250 |
|
251 |
-
|
252 |
# Attempt to find Action and Action Input
|
253 |
if action_idx != -1:
|
254 |
action_input_idx = response_content.find("Action Input:", action_idx)
|
@@ -259,15 +216,14 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
|
|
259 |
else:
|
260 |
action = response_content[action_idx + len("Action:"):].strip()
|
261 |
|
262 |
-
# Clean up action and action_input
|
263 |
if action.startswith('"') and action.endswith('"'):
|
264 |
action = action[1:-1]
|
265 |
if action_input.startswith('"') and action_input.endswith('"'):
|
266 |
action_input = action_input[1:-1]
|
267 |
|
268 |
# Final cleanup for any trailing JSON artifacts if heuristic grabs too much
|
269 |
-
action = action.split('"', 1)[0].strip()
|
270 |
-
action_input = action_input.split('"', 1)[0].strip()
|
271 |
|
272 |
return reasoning, action, action_input
|
273 |
|
@@ -300,10 +256,6 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
300 |
print(f"DEBUG: Entering reasoning_node. Iteration: {state['iterations']}")
|
301 |
print(f"DEBUG: Current history length: {len(state.get('history', []))}")
|
302 |
|
303 |
-
HF_TOKEN = os.getenv("HF_TOKEN")
|
304 |
-
if not HF_TOKEN:
|
305 |
-
raise ValueError("HF_TOKEN not set in environment variables.")
|
306 |
-
|
307 |
state.setdefault("context", {})
|
308 |
state.setdefault("reasoning", "")
|
309 |
state.setdefault("iterations", 0)
|
@@ -312,21 +264,45 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
312 |
|
313 |
state["context"].pop("pending_action", None)
|
314 |
|
315 |
-
|
|
|
|
|
316 |
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
)
|
325 |
|
|
|
|
|
|
|
326 |
tool_descriptions = "\n".join([
|
327 |
f"- **{t.name}**: {t.description}" for t in state.get("tools", [])
|
328 |
])
|
329 |
|
|
|
330 |
system_prompt = (
|
331 |
"You are an expert problem solver, designed to provide concise and accurate answers. "
|
332 |
"Your process involves analyzing the question, intelligently selecting and using tools, "
|
@@ -334,12 +310,12 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
334 |
"**Available Tools:**\n"
|
335 |
f"{tool_descriptions}\n\n"
|
336 |
"**Tool Usage Guidelines:**\n"
|
337 |
-
"- Use **duckduckgo_search** for current events, general facts, or quick lookups.\n"
|
338 |
-
"- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics.\n"
|
339 |
-
"- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information.\n"
|
340 |
-
"- Use **document_qa** when the question explicitly refers to a specific document
|
341 |
"- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named 'result' if applicable.\n"
|
342 |
-
"- Use **
|
343 |
"**Current Context:**\n{context}\n\n"
|
344 |
"**Previous Reasoning Steps:**\n{reasoning}\n\n"
|
345 |
"**Current Task:** {current_task}\n"
|
@@ -347,14 +323,14 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
347 |
"**Your Response MUST be a valid JSON object with the following keys:**\n"
|
348 |
"```json\n"
|
349 |
"{\n"
|
350 |
-
" \"Reasoning\": \"Your detailed analysis of the question and why you chose a specific action.\",\n"
|
351 |
" \"Action\": \"[Tool name OR 'Final Answer']\",\n"
|
352 |
-
" \"Action Input\": \"[Input for the selected tool OR the final
|
353 |
"}\n"
|
354 |
"```\n"
|
355 |
-
"**CRITICAL RULE: 'Action' and 'Action Input' MUST NOT be empty, unless 'Action' is 'Final Answer' and 'Action Input' is the conclusive response.**\n"
|
356 |
-
"If you cannot determine a suitable tool or a final answer, return Action: 'Final Answer' with a message like 'I cannot answer this question with the available tools.' or 'More information is needed.'\n"
|
357 |
-
"Ensure 'Action Input' is
|
358 |
)
|
359 |
|
360 |
prompt = ChatPromptTemplate.from_messages([
|
@@ -362,45 +338,61 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
362 |
*state["history"]
|
363 |
])
|
364 |
|
365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
|
367 |
-
def
|
368 |
for attempt in range(retries):
|
369 |
try:
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
except json.JSONDecodeError as e:
|
377 |
-
print(f"[Retry {attempt+1}/{retries}] LLM returned invalid JSON. Error: {e}. Retrying...")
|
378 |
-
print(f"Invalid JSON content (partial): {
|
379 |
-
# Add specific error message to history to guide LLM
|
380 |
state["history"].append(AIMessage(content=f"[Parsing Error] The previous LLM output was not valid JSON. Expected format: ```json{{\"Reasoning\": \"...\", \"Action\": \"...\", \"Action Input\": \"...\"}}```. Please ensure your response is ONLY valid JSON and strictly follows the format. Error: {e}"))
|
381 |
time.sleep(5)
|
382 |
except Exception as e:
|
383 |
-
print(f"[Retry {attempt+1}/{retries}] An unexpected error occurred during LLM call: {e}.
|
384 |
-
state["history"].append(AIMessage(content=f"[
|
385 |
-
time.sleep(
|
386 |
-
raise RuntimeError("Failed after multiple retries due to Hugging Face
|
387 |
-
|
388 |
-
response =
|
389 |
-
"context": state["context"],
|
390 |
-
"reasoning": state["reasoning"],
|
391 |
-
"question": state["question"],
|
392 |
-
"current_task": state["current_task"],
|
393 |
-
"current_thoughts": state["current_thoughts"]
|
394 |
-
})
|
395 |
|
396 |
content = response.content
|
397 |
reasoning, action, action_input = parse_agent_response(content)
|
398 |
|
399 |
print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
|
400 |
|
401 |
-
|
402 |
-
# Otherwise, the specific error message from the retry loop will already be in history.
|
403 |
-
if isinstance(response, AIMessage) and content == response.content: # Check if it's the original response, not an error message
|
404 |
state["history"].append(AIMessage(content=content))
|
405 |
|
406 |
state["reasoning"] += f"\nStep {state['iterations'] + 1}: {reasoning}"
|
@@ -414,8 +406,6 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
414 |
"tool": action,
|
415 |
"input": action_input
|
416 |
}
|
417 |
-
# Add a message to history to indicate the agent's intent for the LLM
|
418 |
-
# This will be shown to the LLM in the next turn.
|
419 |
state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
|
420 |
|
421 |
|
@@ -440,14 +430,15 @@ def tool_node(state: AgentState) -> AgentState:
|
|
440 |
tool_name = tool_call_dict.get("tool")
|
441 |
tool_input = tool_call_dict.get("input")
|
442 |
|
443 |
-
if not tool_name or tool_input is None:
|
444 |
-
error_message = f"[Tool Error] Invalid action request from LLM: Tool name '{tool_name}' or input '{tool_input}' was empty. LLM needs to provide valid 'Action' and 'Action Input'."
|
445 |
-
print(f"ERROR: {
|
446 |
state["history"].append(AIMessage(content=error_message))
|
447 |
state["context"].pop("pending_action", None)
|
448 |
return state
|
449 |
|
450 |
available_tools = state.get("tools", [])
|
|
|
451 |
tool_fn = next((t for t in available_tools if t.name == tool_name), None)
|
452 |
|
453 |
if tool_fn is None:
|
@@ -457,7 +448,7 @@ def tool_node(state: AgentState) -> AgentState:
|
|
457 |
try:
|
458 |
print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
|
459 |
tool_output = tool_fn.run(tool_input)
|
460 |
-
if not tool_output and tool_output is not False:
|
461 |
tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
|
462 |
except Exception as e:
|
463 |
tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
|
@@ -497,13 +488,14 @@ def create_agent_workflow(tools: List[Tool]):
|
|
497 |
# ====== Agent Interface ======
|
498 |
class BasicAgent:
|
499 |
def __init__(self):
|
|
|
500 |
self.tools = [
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
]
|
508 |
self.workflow = create_agent_workflow(self.tools)
|
509 |
|
@@ -519,9 +511,10 @@ class BasicAgent:
|
|
519 |
"final_answer": None,
|
520 |
"current_task": "Understand the question and plan the next step.",
|
521 |
"current_thoughts": "",
|
522 |
-
"tools": self.tools
|
523 |
}
|
524 |
|
|
|
525 |
final_state = self.workflow.invoke(state)
|
526 |
|
527 |
if final_state.get("final_answer") is not None:
|
@@ -530,7 +523,6 @@ class BasicAgent:
|
|
530 |
return answer
|
531 |
else:
|
532 |
print(f"--- ERROR: Agent finished without setting 'final_answer' for question: {question} ---")
|
533 |
-
# You might want to return the last message from history if no final answer was set
|
534 |
if final_state["history"]:
|
535 |
last_message = final_state["history"][-1].content
|
536 |
print(f"Last message in history: {last_message}")
|
|
|
24 |
load_dotenv()
|
25 |
|
26 |
|
27 |
+
import os
|
28 |
+
import time
|
29 |
+
import json
|
30 |
+
from typing import TypedDict, List, Union, Any, Dict, Optional
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
+
# LangChain and LangGraph imports
|
33 |
+
from langchain.schema import HumanMessage, AIMessage, SystemMessage
|
34 |
+
from langchain.prompts import ChatPromptTemplate
|
35 |
+
from langgraph.graph import StateGraph, END
|
36 |
+
from langchain_core.tools import BaseTool, Tool
|
37 |
+
|
38 |
+
# Hugging Face local model imports
|
39 |
+
from langchain_community.llms import HuggingFacePipeline
|
40 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
41 |
+
import torch
|
42 |
+
|
43 |
+
# Tool-specific imports
|
44 |
+
from duckduckgo_search import DDGS
|
45 |
+
import wikipedia
|
46 |
+
import arxiv
|
47 |
+
from transformers import pipeline as hf_pipeline # Renamed to avoid clash with main pipeline
|
48 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
49 |
|
50 |
+
# --- Helper function for python_execution tool ---
|
51 |
+
def indent_code(code: str, indent: str = " ") -> str:
|
52 |
+
"""Indents multi-line code for execution within a function."""
|
53 |
+
return "\n".join(indent + line for line in code.splitlines())
|
54 |
|
55 |
+
# --- Tool Definitions ---
|
56 |
+
@Tool
|
57 |
def duckduckgo_search(query: str) -> str:
|
58 |
"""Search web using DuckDuckGo. Returns top 3 results."""
|
59 |
+
print(f"DEBUG: duckduckgo_search called with: {query}")
|
60 |
+
try:
|
61 |
+
with DDGS() as ddgs:
|
62 |
+
return "\n\n".join(
|
63 |
+
f"Title: {res['title']}\nURL: {res['href']}\nSnippet: {res['body']}"
|
64 |
+
for res in ddgs.text(query, max_results=3)
|
65 |
+
)
|
66 |
+
except Exception as e:
|
67 |
+
return f"Error performing DuckDuckGo search: {str(e)}"
|
68 |
|
69 |
+
@Tool
|
70 |
def wikipedia_search(query: str) -> str:
|
71 |
"""Get Wikipedia summaries. Returns first 3 sentences."""
|
72 |
+
print(f"DEBUG: wikipedia_search called with: {query}")
|
73 |
try:
|
74 |
return wikipedia.summary(query, sentences=3)
|
75 |
except wikipedia.DisambiguationError as e:
|
76 |
return f"Disambiguation options: {', '.join(e.options[:3])}"
|
77 |
except wikipedia.PageError:
|
78 |
+
return "Wikipedia page not found."
|
79 |
+
except Exception as e:
|
80 |
+
return f"Error performing Wikipedia search: {str(e)}"
|
81 |
|
82 |
+
@Tool
|
83 |
def arxiv_search(query: str) -> str:
|
84 |
"""Search academic papers on arXiv. Returns top 3 results."""
|
85 |
+
print(f"DEBUG: arxiv_search called with: {query}")
|
86 |
+
try:
|
87 |
+
results = arxiv.Search(
|
88 |
+
query=query,
|
89 |
+
max_results=3,
|
90 |
+
sort_by=arxiv.SortCriterion.Relevance
|
91 |
+
).results()
|
92 |
+
|
93 |
+
return "\n\n".join(
|
94 |
+
f"Title: {r.title}\nAuthors: {', '.join(a.name for a in r.authors)}\n"
|
95 |
+
f"Published: {r.published.strftime('%Y-%m-%d')}\nSummary: {r.summary[:250]}..."
|
96 |
+
for r in results
|
97 |
+
)
|
98 |
+
except Exception as e:
|
99 |
+
return f"Error performing ArXiv search: {str(e)}"
|
100 |
|
101 |
+
@Tool
|
102 |
def document_qa(input_str: str) -> str:
|
103 |
"""Answer questions from documents. Input format: 'document_text||question'"""
|
104 |
+
print(f"DEBUG: document_qa called with: {input_str}")
|
105 |
+
try:
|
106 |
+
if '||' not in input_str:
|
107 |
+
return "Invalid format. Input must be: 'document_text||question'"
|
108 |
+
|
109 |
+
context, question = input_str.split('||', 1)
|
110 |
+
# Load QA model on first call or ensure it's loaded once globally
|
111 |
+
# This makes it a bit slow on first call, but avoids re-loading.
|
112 |
+
# For production, consider loading this outside the tool function.
|
113 |
+
qa_model = hf_pipeline('question-answering', model='deepset/roberta-base-squad2')
|
114 |
+
return qa_model(question=question, context=context)['answer']
|
115 |
+
except Exception as e:
|
116 |
+
return f"Error answering question from document: {str(e)}"
|
117 |
|
118 |
+
@Tool
|
119 |
def python_execution(code: str) -> str:
|
120 |
+
"""Execute Python code and return output.
|
121 |
+
The code should assign its final result to a variable named 'result'.
|
122 |
+
Example: 'result = 1 + 1'
|
123 |
+
"""
|
124 |
+
print(f"DEBUG: python_execution called with: {code}")
|
125 |
try:
|
126 |
# Create isolated environment
|
127 |
env = {}
|
128 |
+
# Wrap code in a function to isolate scope and capture 'result'
|
129 |
+
exec(f"def __exec_fn__():\n{indent_code(code)}\nresult = __exec_fn__()", globals(), env)
|
130 |
+
return str(env.get('result', 'No explicit result assigned to "result" variable.'))
|
131 |
except Exception as e:
|
132 |
+
return f"Python execution error: {str(e)}"
|
|
|
|
|
|
|
|
|
133 |
|
134 |
class VideoTranscriptionTool(BaseTool):
|
135 |
name: str = "transcript_video"
|
136 |
+
description: "Fetch text transcript from YouTube videos using URL or ID. Use for any question involving video or audio. Input is the YouTube URL or ID."
|
137 |
|
138 |
+
def _run(self, url_or_id: str) -> str:
|
139 |
+
print(f"DEBUG: transcript_video called with: {url_or_id}")
|
140 |
video_id = None
|
141 |
+
# Basic parsing for common YouTube URL formats
|
142 |
+
if "youtu.be/" in url_or_id:
|
143 |
+
video_id = url_or_id.split("youtu.be/")[1].split("?")[0]
|
144 |
+
elif "youtube.com/watch?v=" in url_or_id:
|
145 |
+
video_id = url_or_id.split("v=")[1].split("&")[0]
|
146 |
+
elif len(url_or_id.strip()) == 11 and not ("http://" in url_or_id or "https://" in url_or_id):
|
147 |
+
video_id = url_or_id.strip() # Assume it's just the ID
|
148 |
|
149 |
if not video_id:
|
150 |
+
return f"Invalid or unsupported YouTube URL/ID: {url_or_id}. Please provide a valid YouTube URL or 11-character ID."
|
151 |
|
152 |
try:
|
153 |
transcription = YouTubeTranscriptApi.get_transcript(video_id)
|
154 |
+
# You can add include_timestamps logic here if needed,
|
155 |
+
# but for simplicity, let's just return the text.
|
156 |
+
return " ".join([part['text'] for part in transcription])
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
except Exception as e:
|
159 |
+
# Catch specific errors for better messages, e.g., NoTranscriptFound
|
160 |
+
return f"Error fetching transcript for video ID '{video_id}': {str(e)}. It might not have an English transcript, or the video is unavailable."
|
161 |
|
162 |
def _arun(self, *args, **kwargs):
|
163 |
raise NotImplementedError("Async not supported for this tool.")
|
164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
# --- Agent State Definition ---
|
166 |
class AgentState(TypedDict):
|
167 |
question: str
|
|
|
172 |
final_answer: Union[str, float, int, None]
|
173 |
current_task: str
|
174 |
current_thoughts: str
|
175 |
+
tools: List[Tool] # Make sure tools are passed via state
|
|
|
176 |
|
177 |
# --- Utility Functions ---
|
178 |
def parse_agent_response(response_content: str) -> tuple[str, str, str]:
|
|
|
199 |
action_idx = response_content.find("Action:")
|
200 |
if reasoning_idx != -1 and action_idx != -1 and reasoning_idx < action_idx:
|
201 |
reasoning = response_content[reasoning_idx + len("Reasoning:"):action_idx].strip()
|
|
|
202 |
if reasoning.startswith('"') and reasoning.endswith('"'):
|
203 |
reasoning = reasoning[1:-1]
|
204 |
+
elif reasoning_idx != -1:
|
205 |
reasoning = response_content[reasoning_idx + len("Reasoning:"):].strip()
|
206 |
if reasoning.startswith('"') and reasoning.endswith('"'):
|
207 |
reasoning = reasoning[1:-1]
|
208 |
|
|
|
209 |
# Attempt to find Action and Action Input
|
210 |
if action_idx != -1:
|
211 |
action_input_idx = response_content.find("Action Input:", action_idx)
|
|
|
216 |
else:
|
217 |
action = response_content[action_idx + len("Action:"):].strip()
|
218 |
|
|
|
219 |
if action.startswith('"') and action.endswith('"'):
|
220 |
action = action[1:-1]
|
221 |
if action_input.startswith('"') and action_input.endswith('"'):
|
222 |
action_input = action_input[1:-1]
|
223 |
|
224 |
# Final cleanup for any trailing JSON artifacts if heuristic grabs too much
|
225 |
+
action = action.split('"', 1)[0].strip()
|
226 |
+
action_input = action_input.split('"', 1)[0].strip()
|
227 |
|
228 |
return reasoning, action, action_input
|
229 |
|
|
|
256 |
print(f"DEBUG: Entering reasoning_node. Iteration: {state['iterations']}")
|
257 |
print(f"DEBUG: Current history length: {len(state.get('history', []))}")
|
258 |
|
|
|
|
|
|
|
|
|
259 |
state.setdefault("context", {})
|
260 |
state.setdefault("reasoning", "")
|
261 |
state.setdefault("iterations", 0)
|
|
|
264 |
|
265 |
state["context"].pop("pending_action", None)
|
266 |
|
267 |
+
# --- Initialize local HuggingFacePipeline ---
|
268 |
+
# Using Mistral-7B-Instruct-v0.2 for better agent performance
|
269 |
+
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
|
270 |
|
271 |
+
print(f"DEBUG: Loading local model: {model_name}...")
|
272 |
+
|
273 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
274 |
+
|
275 |
+
# Load model with optimal settings for GPU if available, else CPU
|
276 |
+
# Use bfloat16 for GPUs that support it (NVIDIA Ampere architecture and newer)
|
277 |
+
# else float16 for older GPUs or float32 for CPU/fallback.
|
278 |
+
# device_map="auto" intelligently distributes the model across available devices.
|
279 |
+
model = AutoModelForCausalLM.from_pretrained(
|
280 |
+
model_name,
|
281 |
+
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
|
282 |
+
device_map="auto"
|
283 |
+
)
|
284 |
+
|
285 |
+
# Create a transformers pipeline
|
286 |
+
pipe = pipeline(
|
287 |
+
"text-generation",
|
288 |
+
model=model,
|
289 |
+
tokenizer=tokenizer,
|
290 |
+
max_new_tokens=1024, # Increased max_new_tokens for potentially longer JSON
|
291 |
+
temperature=0.1, # Keep low for factual, tool-use tasks
|
292 |
+
do_sample=True, # Allow some sampling
|
293 |
+
top_p=0.9,
|
294 |
+
repetition_penalty=1.1, # Help avoid repetitive output
|
295 |
+
# device_map handled by model loading
|
296 |
)
|
297 |
|
298 |
+
llm = HuggingFacePipeline(pipeline=pipe)
|
299 |
+
# --- END LOCAL LLM INITIALIZATION ---
|
300 |
+
|
301 |
tool_descriptions = "\n".join([
|
302 |
f"- **{t.name}**: {t.description}" for t in state.get("tools", [])
|
303 |
])
|
304 |
|
305 |
+
# Enhanced system prompt for better JSON adherence
|
306 |
system_prompt = (
|
307 |
"You are an expert problem solver, designed to provide concise and accurate answers. "
|
308 |
"Your process involves analyzing the question, intelligently selecting and using tools, "
|
|
|
310 |
"**Available Tools:**\n"
|
311 |
f"{tool_descriptions}\n\n"
|
312 |
"**Tool Usage Guidelines:**\n"
|
313 |
+
"- Use **duckduckgo_search** for current events, general facts, or quick lookups. Provide a concise search query.\n"
|
314 |
+
"- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics. Provide a concise search term.\n"
|
315 |
+
"- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information. Provide a concise search query.\n"
|
316 |
+
"- Use **document_qa** when the question explicitly refers to a specific document or when you have content to query. Input format: 'document_text||question'.\n"
|
317 |
"- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named 'result' if applicable.\n"
|
318 |
+
"- Use **transcript_video** for any question involving video or audio content (e.g., YouTube). Provide the full YouTube URL or video ID.\n\n"
|
319 |
"**Current Context:**\n{context}\n\n"
|
320 |
"**Previous Reasoning Steps:**\n{reasoning}\n\n"
|
321 |
"**Current Task:** {current_task}\n"
|
|
|
323 |
"**Your Response MUST be a valid JSON object with the following keys:**\n"
|
324 |
"```json\n"
|
325 |
"{\n"
|
326 |
+
" \"Reasoning\": \"Your detailed analysis of the question and why you chose a specific action. Focus on the logical steps.\",\n"
|
327 |
" \"Action\": \"[Tool name OR 'Final Answer']\",\n"
|
328 |
+
" \"Action Input\": \"[Input for the selected tool OR the complete final answer]\"\n"
|
329 |
"}\n"
|
330 |
"```\n"
|
331 |
+
"**CRITICAL RULE: 'Action' and 'Action Input' MUST NOT be empty strings, unless 'Action' is 'Final Answer' and 'Action Input' is the conclusive response.**\n"
|
332 |
+
"If you cannot determine a suitable tool or a conclusive final answer after exhausting options, return Action: 'Final Answer' with a message like 'I cannot answer this question with the available tools.' or 'More information is needed.'\n"
|
333 |
+
"Ensure 'Action Input' is always the complete, valid input for the chosen 'Action'. If 'Action' is 'Final Answer', provide the complete, concise answer."
|
334 |
)
|
335 |
|
336 |
prompt = ChatPromptTemplate.from_messages([
|
|
|
338 |
*state["history"]
|
339 |
])
|
340 |
|
341 |
+
formatted_messages = prompt.format_messages(
|
342 |
+
context=state["context"],
|
343 |
+
reasoning=state["reasoning"],
|
344 |
+
question=state["question"],
|
345 |
+
current_task=state["current_task"],
|
346 |
+
current_thoughts=state["current_thoughts"]
|
347 |
+
)
|
348 |
+
|
349 |
+
# Use tokenizer's chat template for optimal formatting with chat models
|
350 |
+
try:
|
351 |
+
full_input_string = tokenizer.apply_chat_template(
|
352 |
+
formatted_messages,
|
353 |
+
tokenize=False,
|
354 |
+
add_generation_prompt=True # Adds the assistant's turn start token
|
355 |
+
)
|
356 |
+
except Exception as e:
|
357 |
+
print(f"WARNING: Failed to apply chat template: {e}. Falling back to simple string join. Model performance may be affected.")
|
358 |
+
full_input_string = "\n".join([msg.content for msg in formatted_messages])
|
359 |
|
360 |
+
def call_with_retry_local(inputs, retries=3): # Reduced retries for local models as network isn't primary issue
|
361 |
for attempt in range(retries):
|
362 |
try:
|
363 |
+
response_text = llm.invoke(inputs)
|
364 |
+
|
365 |
+
# Strip the prompt from the generated text
|
366 |
+
if response_text.startswith(inputs):
|
367 |
+
content = response_text[len(inputs):].strip()
|
368 |
+
else:
|
369 |
+
content = response_text.strip()
|
370 |
+
|
371 |
+
print(f"DEBUG: RAW LOCAL LLM Response (Attempt {attempt+1}):\n---\n{content}\n---")
|
372 |
+
|
373 |
+
# Attempt to parse to validate structure
|
374 |
+
json.loads(content)
|
375 |
+
|
376 |
+
return AIMessage(content=content)
|
377 |
except json.JSONDecodeError as e:
|
378 |
+
print(f"[Retry {attempt+1}/{retries}] Local LLM returned invalid JSON. Error: {e}. Retrying...")
|
379 |
+
print(f"Invalid JSON content (partial): {content[:200]}...")
|
|
|
380 |
state["history"].append(AIMessage(content=f"[Parsing Error] The previous LLM output was not valid JSON. Expected format: ```json{{\"Reasoning\": \"...\", \"Action\": \"...\", \"Action Input\": \"...\"}}```. Please ensure your response is ONLY valid JSON and strictly follows the format. Error: {e}"))
|
381 |
time.sleep(5)
|
382 |
except Exception as e:
|
383 |
+
print(f"[Retry {attempt+1}/{retries}] An unexpected error occurred during local LLM call: {e}.")
|
384 |
+
state["history"].append(AIMessage(content=f"[Local LLM Error] Failed to get a response from the local LLM: {e}. Trying again."))
|
385 |
+
time.sleep(10)
|
386 |
+
raise RuntimeError("Failed after multiple retries due to local Hugging Face model issues or invalid JSON.")
|
387 |
+
|
388 |
+
response = call_with_retry_local(full_input_string)
|
|
|
|
|
|
|
|
|
|
|
|
|
389 |
|
390 |
content = response.content
|
391 |
reasoning, action, action_input = parse_agent_response(content)
|
392 |
|
393 |
print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
|
394 |
|
395 |
+
if isinstance(response, AIMessage) and content == response.content:
|
|
|
|
|
396 |
state["history"].append(AIMessage(content=content))
|
397 |
|
398 |
state["reasoning"] += f"\nStep {state['iterations'] + 1}: {reasoning}"
|
|
|
406 |
"tool": action,
|
407 |
"input": action_input
|
408 |
}
|
|
|
|
|
409 |
state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
|
410 |
|
411 |
|
|
|
430 |
tool_name = tool_call_dict.get("tool")
|
431 |
tool_input = tool_call_dict.get("input")
|
432 |
|
433 |
+
if not tool_name or tool_input is None: # Check for None as empty string "" might be valid input for some tools
|
434 |
+
error_message = f"[Tool Error] Invalid action request from LLM: Tool name '{tool_name}' or input '{tool_input}' was empty or None. LLM needs to provide valid 'Action' and 'Action Input'."
|
435 |
+
print(f"ERROR: {error_message}")
|
436 |
state["history"].append(AIMessage(content=error_message))
|
437 |
state["context"].pop("pending_action", None)
|
438 |
return state
|
439 |
|
440 |
available_tools = state.get("tools", [])
|
441 |
+
# Use Tool.name to match, which is what @Tool decorator sets
|
442 |
tool_fn = next((t for t in available_tools if t.name == tool_name), None)
|
443 |
|
444 |
if tool_fn is None:
|
|
|
448 |
try:
|
449 |
print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
|
450 |
tool_output = tool_fn.run(tool_input)
|
451 |
+
if not tool_output and tool_output is not False: # Ensure 'False' or 0 are not treated as empty
|
452 |
tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
|
453 |
except Exception as e:
|
454 |
tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
|
|
|
488 |
# ====== Agent Interface ======
|
489 |
class BasicAgent:
|
490 |
def __init__(self):
|
491 |
+
# Instantiate tools
|
492 |
self.tools = [
|
493 |
+
duckduckgo_search,
|
494 |
+
wikipedia_search,
|
495 |
+
arxiv_search,
|
496 |
+
document_qa,
|
497 |
+
python_execution,
|
498 |
+
VideoTranscriptionTool() # Instantiate the class-based tool
|
499 |
]
|
500 |
self.workflow = create_agent_workflow(self.tools)
|
501 |
|
|
|
511 |
"final_answer": None,
|
512 |
"current_task": "Understand the question and plan the next step.",
|
513 |
"current_thoughts": "",
|
514 |
+
"tools": self.tools # Pass tools through state
|
515 |
}
|
516 |
|
517 |
+
# The invoke method returns the final state after execution
|
518 |
final_state = self.workflow.invoke(state)
|
519 |
|
520 |
if final_state.get("final_answer") is not None:
|
|
|
523 |
return answer
|
524 |
else:
|
525 |
print(f"--- ERROR: Agent finished without setting 'final_answer' for question: {question} ---")
|
|
|
526 |
if final_state["history"]:
|
527 |
last_message = final_state["history"][-1].content
|
528 |
print(f"Last message in history: {last_message}")
|