Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -157,43 +157,37 @@ from langchain.schema import HumanMessage, AIMessage, SystemMessage
|
|
157 |
from langchain.prompts import ChatPromptTemplate
|
158 |
from langgraph.graph import StateGraph, END
|
159 |
from google.api_core.exceptions import ResourceExhausted
|
|
|
160 |
|
161 |
# Assume these tools are defined elsewhere and imported
|
162 |
# Placeholder for your actual tool implementations
|
163 |
-
# For example:
|
164 |
-
# from your_tools_module import duckduckgo_search, wikipedia_search, arxiv_search, document_qa, python_execution
|
165 |
-
# And ensure you have a proper VideoTranscriptionTool
|
166 |
def duckduckgo_search(query: str) -> str:
|
167 |
"""Performs a DuckDuckGo search for current events or general facts."""
|
168 |
-
# Placeholder for actual implementation
|
169 |
print(f"DEBUG: duckduckgo_search called with: {query}")
|
170 |
return f"Search result for '{query}': Example relevant information from web."
|
171 |
|
172 |
def wikipedia_search(query: str) -> str:
|
173 |
"""Searches Wikipedia for encyclopedic information."""
|
174 |
-
# Placeholder for actual implementation
|
175 |
print(f"DEBUG: wikipedia_search called with: {query}")
|
176 |
return f"Wikipedia result for '{query}': Found detailed article."
|
177 |
|
178 |
def arxiv_search(query: str) -> str:
|
179 |
"""Searches ArXiv for scientific preprints and papers."""
|
180 |
-
# Placeholder for actual implementation
|
181 |
print(f"DEBUG: arxiv_search called with: {query}")
|
182 |
return f"ArXiv result for '{query}': Found relevant research paper."
|
183 |
|
184 |
def document_qa(document_path: str, question: str) -> str:
|
185 |
"""Answers questions based on the content of a given document file (PDF, DOCX, TXT)."""
|
186 |
-
# Placeholder for actual implementation
|
187 |
print(f"DEBUG: document_qa called with: {document_path}, question: {question}")
|
188 |
return f"Document QA result for '{question}': Answer extracted from document."
|
189 |
|
190 |
def python_execution(code: str) -> str:
|
191 |
"""Executes Python code in a sandboxed environment for calculations or data manipulation."""
|
192 |
-
# Placeholder for actual implementation - IMPORTANT: Implement this securely!
|
193 |
-
# Example (UNSAFE for real use without proper sandboxing):
|
194 |
try:
|
195 |
exec_globals = {}
|
196 |
exec_locals = {}
|
|
|
|
|
197 |
exec(code, exec_globals, exec_locals)
|
198 |
return str(exec_locals.get('result', 'Code executed, no explicit result assigned to "result" variable.'))
|
199 |
except Exception as e:
|
@@ -202,7 +196,6 @@ def python_execution(code: str) -> str:
|
|
202 |
class VideoTranscriptionTool:
|
203 |
"""Transcribes and analyzes video content from a URL or ID."""
|
204 |
def __call__(self, video_id_or_url: str) -> str:
|
205 |
-
# Placeholder for actual implementation using youtube-transcript-api etc.
|
206 |
print(f"DEBUG: VideoTranscriptionTool called with: {video_id_or_url}")
|
207 |
return f"Video transcription/analysis result for '{video_id_or_url}': Summary of video content."
|
208 |
|
@@ -210,13 +203,15 @@ class VideoTranscriptionTool:
|
|
210 |
# --- Agent State Definition ---
|
211 |
class AgentState(TypedDict):
|
212 |
question: str
|
213 |
-
history: List[Union[HumanMessage, AIMessage
|
214 |
-
context: Dict[str, Any]
|
215 |
reasoning: str
|
216 |
iterations: int
|
217 |
final_answer: Union[str, float, int, None]
|
218 |
-
current_task: str
|
219 |
-
current_thoughts: str
|
|
|
|
|
220 |
|
221 |
# --- Utility Functions ---
|
222 |
def parse_agent_response(response_content: str) -> tuple[str, str, str]:
|
@@ -230,9 +225,8 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
|
|
230 |
action_input = response_json.get("Action Input", "").strip()
|
231 |
return reasoning, action, action_input
|
232 |
except json.JSONDecodeError:
|
233 |
-
# Fallback for when LLM doesn't return perfect JSON (less likely with good prompt)
|
234 |
print(f"WARNING: LLM response not perfectly JSON: {response_content[:200]}...")
|
235 |
-
#
|
236 |
reasoning_match = response_content.split("Reasoning:", 1)
|
237 |
reasoning = reasoning_match[1].split("Action:", 1)[0].strip() if len(reasoning_match) > 1 else ""
|
238 |
|
@@ -251,29 +245,19 @@ def should_continue(state: AgentState) -> str:
|
|
251 |
"""
|
252 |
Determines if the agent should continue reasoning, use a tool, or end.
|
253 |
"""
|
254 |
-
|
255 |
-
|
256 |
-
# Check for final answer in the last AIMessage
|
257 |
-
if history and isinstance(history[-1], AIMessage) and "FINAL ANSWER:" in history[-1].content:
|
258 |
-
print("DEBUG: should_continue -> END (Final Answer detected)")
|
259 |
-
return "end"
|
260 |
|
261 |
-
#
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
if isinstance(msg, dict) and msg.get("type") == "action_request":
|
273 |
-
print("DEBUG: should_continue -> ACTION (Action request pending)")
|
274 |
-
return "action"
|
275 |
-
|
276 |
-
# If nothing else, assume we need to reason
|
277 |
print("DEBUG: should_continue -> REASON (Default to reasoning)")
|
278 |
return "reason"
|
279 |
|
@@ -291,21 +275,20 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
291 |
if not GOOGLE_API_KEY:
|
292 |
raise ValueError("GOOGLE_API_KEY not set in environment variables.")
|
293 |
|
294 |
-
# Ensure history is well-formed for the LLM prompt
|
295 |
-
if "history" not in state or not isinstance(state["history"], list):
|
296 |
-
state["history"] = []
|
297 |
-
|
298 |
# Initialize/update state fields
|
299 |
state.setdefault("context", {})
|
300 |
state.setdefault("reasoning", "")
|
301 |
state.setdefault("iterations", 0)
|
302 |
state.setdefault("current_task", "Understand the question and plan the next step.")
|
303 |
state.setdefault("current_thoughts", "")
|
|
|
|
|
|
|
304 |
|
305 |
# Create Gemini model wrapper
|
306 |
llm = ChatGoogleGenerativeAI(
|
307 |
-
model="gemini-1.5-flash",
|
308 |
-
temperature=0.1,
|
309 |
google_api_key=GOOGLE_API_KEY
|
310 |
)
|
311 |
|
@@ -314,7 +297,6 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
314 |
f"- **{t.name}**: {t.description}" for t in state.get("tools", [])
|
315 |
])
|
316 |
|
317 |
-
# Craft a more robust and explicit system prompt
|
318 |
system_prompt = (
|
319 |
"You are an expert problem solver, designed to provide concise and accurate answers. "
|
320 |
"Your process involves analyzing the question, intelligently selecting and using tools, "
|
@@ -326,8 +308,8 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
326 |
"- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics.\n"
|
327 |
"- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information.\n"
|
328 |
"- Use **document_qa** when the question explicitly refers to a specific document file (e.g., 'Analyze this PDF').\n"
|
329 |
-
"- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code.\n"
|
330 |
-
"- Use **VideoTranscriptionTool** for any question involving video or audio content.\n\n"
|
331 |
"**Current Context:**\n{context}\n\n"
|
332 |
"**Previous Reasoning Steps:**\n{reasoning}\n\n"
|
333 |
"**Current Task:** {current_task}\n"
|
@@ -340,6 +322,8 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
340 |
" \"Action Input\": \"[Input for the selected tool OR the final response]\"\n"
|
341 |
"}\n"
|
342 |
"```\n"
|
|
|
|
|
343 |
"Ensure 'Action Input' is appropriate for the chosen 'Action'. If 'Action' is 'Final Answer', provide the complete, concise answer."
|
344 |
)
|
345 |
|
@@ -350,13 +334,12 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
350 |
|
351 |
chain = prompt | llm
|
352 |
|
353 |
-
# === Add Retry Logic ===
|
354 |
def call_with_retry(inputs, retries=3, delay=60):
|
355 |
for attempt in range(retries):
|
356 |
try:
|
357 |
response = chain.invoke(inputs)
|
358 |
# Attempt to parse immediately to catch bad JSON before returning
|
359 |
-
|
360 |
return response
|
361 |
except ResourceExhausted as e:
|
362 |
print(f"[Retry {attempt+1}/{retries}] Gemini rate limit hit. Waiting {delay}s...")
|
@@ -370,7 +353,6 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
370 |
time.sleep(delay)
|
371 |
raise RuntimeError("Failed after multiple retries due to Gemini quota limit or invalid JSON.")
|
372 |
|
373 |
-
# Call model with retry protection
|
374 |
response = call_with_retry({
|
375 |
"context": state["context"],
|
376 |
"reasoning": state["reasoning"],
|
@@ -379,12 +361,11 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
379 |
"current_thoughts": state["current_thoughts"]
|
380 |
})
|
381 |
|
382 |
-
# Parse output using the robust JSON parser
|
383 |
content = response.content
|
384 |
reasoning, action, action_input = parse_agent_response(content)
|
385 |
|
386 |
-
print(f"DEBUG: LLM Response Content: {content[:200]}...")
|
387 |
-
print(f"DEBUG: Parsed Action: {action}, Action Input: {action_input[:100]}...")
|
388 |
|
389 |
# Update state
|
390 |
state["history"].append(AIMessage(content=content)) # Store the raw LLM response
|
@@ -393,50 +374,54 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
393 |
state["current_thoughts"] = reasoning # Update current thoughts for next iteration
|
394 |
|
395 |
if "final answer" in action.lower():
|
396 |
-
state["history"].append(AIMessage(content=f"FINAL ANSWER: {action_input}"))
|
397 |
state["final_answer"] = action_input # Set final answer directly in state
|
|
|
398 |
else:
|
399 |
-
# Store the action request in
|
400 |
-
state["
|
401 |
-
"type": "action_request",
|
402 |
"tool": action,
|
403 |
"input": action_input
|
404 |
-
}
|
|
|
|
|
|
|
405 |
|
406 |
print(f"DEBUG: Exiting reasoning_node. New history length: {len(state['history'])}")
|
407 |
return state
|
408 |
|
409 |
|
410 |
def tool_node(state: AgentState) -> AgentState:
|
411 |
-
|
|
|
|
|
|
|
412 |
|
413 |
-
|
414 |
-
|
415 |
-
if isinstance(msg, dict) and msg.get("type") == "action_request":
|
416 |
-
tool_call_dict = msg
|
417 |
-
break
|
418 |
|
419 |
if not tool_call_dict:
|
420 |
-
|
421 |
-
|
|
|
|
|
|
|
|
|
422 |
|
423 |
tool_name = tool_call_dict.get("tool")
|
424 |
tool_input = tool_call_dict.get("input")
|
425 |
|
426 |
-
#
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
# Or provide a specific error message as tool output
|
434 |
-
state["history"].append(AIMessage(content=f"[Tool Error] Invalid tool call: Tool name '{tool_name}' or input was empty. LLM needs to provide valid action."))
|
435 |
return state
|
436 |
|
437 |
# Look up and invoke the tool from the state's tool list
|
438 |
available_tools = state.get("tools", [])
|
439 |
-
tool_fn = next((t for t in available_tools if t.name == tool_name), None)
|
440 |
|
441 |
if tool_fn is None:
|
442 |
# Fallback for unrecognized tool - feedback to LLM
|
@@ -445,17 +430,14 @@ def tool_node(state: AgentState) -> AgentState:
|
|
445 |
else:
|
446 |
try:
|
447 |
print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
|
448 |
-
tool_output = tool_fn.run(tool_input)
|
449 |
-
if not tool_output: #
|
450 |
tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
|
451 |
except Exception as e:
|
452 |
tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
|
453 |
print(f"ERROR: {tool_output}")
|
454 |
|
455 |
-
# Add output to history as an AIMessage
|
456 |
-
# Ensure the history only contains HumanMessage and AIMessage objects for LangGraph's internal processing.
|
457 |
-
# The action_request dict can be removed or transformed if it's no longer needed for internal state.
|
458 |
-
# For now, we'll just add the tool output.
|
459 |
state["history"].append(AIMessage(content=f"[{tool_name} output]\n{tool_output}"))
|
460 |
|
461 |
print(f"DEBUG: Exiting tool_node. Tool output added to history. New history length: {len(state['history'])}")
|
@@ -463,7 +445,7 @@ def tool_node(state: AgentState) -> AgentState:
|
|
463 |
|
464 |
|
465 |
# ====== Agent Graph ======
|
466 |
-
def create_agent_workflow(tools: List[
|
467 |
workflow = StateGraph(AgentState)
|
468 |
|
469 |
# Define nodes
|
@@ -478,43 +460,34 @@ def create_agent_workflow(tools: List[Any]): # tools are passed in now
|
|
478 |
"reason",
|
479 |
should_continue,
|
480 |
{
|
481 |
-
"action": "action",
|
482 |
-
"reason": "reason",
|
483 |
-
"end": END
|
484 |
}
|
485 |
)
|
486 |
|
487 |
-
workflow.add_edge("action", "reason")
|
488 |
|
489 |
-
# Compile the graph
|
490 |
app = workflow.compile()
|
491 |
-
|
492 |
-
# Pass tools into the state so nodes can access them.
|
493 |
-
# This is a bit of a hacky way to get them into the state, but works for now.
|
494 |
-
# A cleaner way might be to make `tool_node` receive tools as a closure or directly from agent init.
|
495 |
-
# For this example, we'll modify the initial state for each invocation.
|
496 |
return app
|
497 |
|
498 |
|
499 |
# ====== Agent Interface ======
|
500 |
class BasicAgent:
|
501 |
def __init__(self):
|
502 |
-
# Tools need to be LangChain Tool objects for name and description
|
503 |
-
from langchain.tools import Tool
|
504 |
self.tools = [
|
505 |
Tool(name="duckduckgo_search", func=duckduckgo_search, description="Performs a DuckDuckGo search for current events or general facts."),
|
506 |
Tool(name="wikipedia_search", func=wikipedia_search, description="Searches Wikipedia for encyclopedic information."),
|
507 |
Tool(name="arxiv_search", func=arxiv_search, description="Searches ArXiv for scientific preprints and papers."),
|
508 |
-
Tool(name="document_qa", func=document_qa, description="Answers questions based on the content of a given document file (PDF, DOCX, TXT). Requires '
|
509 |
Tool(name="python_execution", func=python_execution, description="Executes Python code in a sandboxed environment for complex calculations or data manipulation."),
|
510 |
Tool(name="VideoTranscriptionTool", func=VideoTranscriptionTool(), description="Transcribes and analyzes video content from a URL or ID. Use for any question involving video or audio.")
|
511 |
]
|
512 |
-
self.workflow = create_agent_workflow(self.tools)
|
513 |
|
514 |
def __call__(self, question: str) -> str:
|
515 |
-
print(f"\n--- Agent received question: {question[:
|
516 |
|
517 |
-
# Initialize state with proper structure and pass tools
|
518 |
state = {
|
519 |
"question": question,
|
520 |
"context": {},
|
@@ -524,28 +497,22 @@ class BasicAgent:
|
|
524 |
"final_answer": None,
|
525 |
"current_task": "Understand the question and plan the next step.",
|
526 |
"current_thoughts": "",
|
527 |
-
"tools": self.tools
|
528 |
}
|
529 |
|
530 |
-
#
|
|
|
531 |
final_state = self.workflow.invoke(state)
|
532 |
|
533 |
-
|
534 |
-
if final_state.get("final_answer"):
|
535 |
answer = final_state["final_answer"]
|
536 |
print(f"--- Agent returning FINAL ANSWER: {answer} ---")
|
537 |
return answer
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
print(f"--- Agent returning FINAL ANSWER (from history): {answer} ---")
|
544 |
-
return answer
|
545 |
-
|
546 |
-
print(f"--- ERROR: No FINAL ANSWER found in agent history for question: {question} ---")
|
547 |
-
raise ValueError("No FINAL ANSWER found in agent history.")
|
548 |
-
|
549 |
|
550 |
|
551 |
|
|
|
157 |
from langchain.prompts import ChatPromptTemplate
|
158 |
from langgraph.graph import StateGraph, END
|
159 |
from google.api_core.exceptions import ResourceExhausted
|
160 |
+
from langchain.tools import Tool # Import Tool for consistent tool definitions
|
161 |
|
162 |
# Assume these tools are defined elsewhere and imported
|
163 |
# Placeholder for your actual tool implementations
|
|
|
|
|
|
|
164 |
def duckduckgo_search(query: str) -> str:
|
165 |
"""Performs a DuckDuckGo search for current events or general facts."""
|
|
|
166 |
print(f"DEBUG: duckduckgo_search called with: {query}")
|
167 |
return f"Search result for '{query}': Example relevant information from web."
|
168 |
|
169 |
def wikipedia_search(query: str) -> str:
|
170 |
"""Searches Wikipedia for encyclopedic information."""
|
|
|
171 |
print(f"DEBUG: wikipedia_search called with: {query}")
|
172 |
return f"Wikipedia result for '{query}': Found detailed article."
|
173 |
|
174 |
def arxiv_search(query: str) -> str:
|
175 |
"""Searches ArXiv for scientific preprints and papers."""
|
|
|
176 |
print(f"DEBUG: arxiv_search called with: {query}")
|
177 |
return f"ArXiv result for '{query}': Found relevant research paper."
|
178 |
|
179 |
def document_qa(document_path: str, question: str) -> str:
|
180 |
"""Answers questions based on the content of a given document file (PDF, DOCX, TXT)."""
|
|
|
181 |
print(f"DEBUG: document_qa called with: {document_path}, question: {question}")
|
182 |
return f"Document QA result for '{question}': Answer extracted from document."
|
183 |
|
184 |
def python_execution(code: str) -> str:
|
185 |
"""Executes Python code in a sandboxed environment for calculations or data manipulation."""
|
|
|
|
|
186 |
try:
|
187 |
exec_globals = {}
|
188 |
exec_locals = {}
|
189 |
+
# WARNING: This is a highly insecure way to execute arbitrary Python code.
|
190 |
+
# For production, use a secure, sandboxed environment (e.g., Docker container, dedicated service).
|
191 |
exec(code, exec_globals, exec_locals)
|
192 |
return str(exec_locals.get('result', 'Code executed, no explicit result assigned to "result" variable.'))
|
193 |
except Exception as e:
|
|
|
196 |
class VideoTranscriptionTool:
|
197 |
"""Transcribes and analyzes video content from a URL or ID."""
|
198 |
def __call__(self, video_id_or_url: str) -> str:
|
|
|
199 |
print(f"DEBUG: VideoTranscriptionTool called with: {video_id_or_url}")
|
200 |
return f"Video transcription/analysis result for '{video_id_or_url}': Summary of video content."
|
201 |
|
|
|
203 |
# --- Agent State Definition ---
|
204 |
class AgentState(TypedDict):
|
205 |
question: str
|
206 |
+
history: List[Union[HumanMessage, AIMessage]] # History only contains proper messages
|
207 |
+
context: Dict[str, Any] # Use context for internal agent state
|
208 |
reasoning: str
|
209 |
iterations: int
|
210 |
final_answer: Union[str, float, int, None]
|
211 |
+
current_task: str
|
212 |
+
current_thoughts: str
|
213 |
+
tools: List[Tool] # Pass tools into state
|
214 |
+
|
215 |
|
216 |
# --- Utility Functions ---
|
217 |
def parse_agent_response(response_content: str) -> tuple[str, str, str]:
|
|
|
225 |
action_input = response_json.get("Action Input", "").strip()
|
226 |
return reasoning, action, action_input
|
227 |
except json.JSONDecodeError:
|
|
|
228 |
print(f"WARNING: LLM response not perfectly JSON: {response_content[:200]}...")
|
229 |
+
# Fallback heuristic parsing (less reliable but better than nothing)
|
230 |
reasoning_match = response_content.split("Reasoning:", 1)
|
231 |
reasoning = reasoning_match[1].split("Action:", 1)[0].strip() if len(reasoning_match) > 1 else ""
|
232 |
|
|
|
245 |
"""
|
246 |
Determines if the agent should continue reasoning, use a tool, or end.
|
247 |
"""
|
248 |
+
print(f"DEBUG: Entering should_continue. Current context: {state.get('context', {})}")
|
|
|
|
|
|
|
|
|
|
|
249 |
|
250 |
+
# End if agent has produced a final answer
|
251 |
+
if state.get("final_answer"):
|
252 |
+
print("DEBUG: should_continue -> END (Final Answer set in state)")
|
253 |
+
return "end"
|
254 |
+
|
255 |
+
# Check if a tool action is pending in context
|
256 |
+
if state.get("context", {}).get("pending_action"):
|
257 |
+
print("DEBUG: should_continue -> ACTION (Pending action in context)")
|
258 |
+
return "action"
|
259 |
+
|
260 |
+
# Otherwise, go back to reasoning (e.g., after initial question, or after tool output)
|
|
|
|
|
|
|
|
|
|
|
261 |
print("DEBUG: should_continue -> REASON (Default to reasoning)")
|
262 |
return "reason"
|
263 |
|
|
|
275 |
if not GOOGLE_API_KEY:
|
276 |
raise ValueError("GOOGLE_API_KEY not set in environment variables.")
|
277 |
|
|
|
|
|
|
|
|
|
278 |
# Initialize/update state fields
|
279 |
state.setdefault("context", {})
|
280 |
state.setdefault("reasoning", "")
|
281 |
state.setdefault("iterations", 0)
|
282 |
state.setdefault("current_task", "Understand the question and plan the next step.")
|
283 |
state.setdefault("current_thoughts", "")
|
284 |
+
|
285 |
+
# Clear any old pending action from context before generating a new one
|
286 |
+
state["context"].pop("pending_action", None)
|
287 |
|
288 |
# Create Gemini model wrapper
|
289 |
llm = ChatGoogleGenerativeAI(
|
290 |
+
model="gemini-1.5-flash",
|
291 |
+
temperature=0.1,
|
292 |
google_api_key=GOOGLE_API_KEY
|
293 |
)
|
294 |
|
|
|
297 |
f"- **{t.name}**: {t.description}" for t in state.get("tools", [])
|
298 |
])
|
299 |
|
|
|
300 |
system_prompt = (
|
301 |
"You are an expert problem solver, designed to provide concise and accurate answers. "
|
302 |
"Your process involves analyzing the question, intelligently selecting and using tools, "
|
|
|
308 |
"- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics.\n"
|
309 |
"- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information.\n"
|
310 |
"- Use **document_qa** when the question explicitly refers to a specific document file (e.g., 'Analyze this PDF').\n"
|
311 |
+
"- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named 'result' if applicable.\n"
|
312 |
+
"- Use **VideoTranscriptionTool** for any question involving video or audio content. Provide the full YouTube URL or video ID.\n\n"
|
313 |
"**Current Context:**\n{context}\n\n"
|
314 |
"**Previous Reasoning Steps:**\n{reasoning}\n\n"
|
315 |
"**Current Task:** {current_task}\n"
|
|
|
322 |
" \"Action Input\": \"[Input for the selected tool OR the final response]\"\n"
|
323 |
"}\n"
|
324 |
"```\n"
|
325 |
+
"**CRITICAL RULE: 'Action' and 'Action Input' MUST NOT be empty, unless 'Action' is 'Final Answer' and 'Action Input' is the conclusive response.**\n"
|
326 |
+
"If you cannot determine a suitable tool or a final answer, return Action: 'Final Answer' with a message like 'I cannot answer this question with the available tools.' or 'More information is needed.'\n"
|
327 |
"Ensure 'Action Input' is appropriate for the chosen 'Action'. If 'Action' is 'Final Answer', provide the complete, concise answer."
|
328 |
)
|
329 |
|
|
|
334 |
|
335 |
chain = prompt | llm
|
336 |
|
|
|
337 |
def call_with_retry(inputs, retries=3, delay=60):
|
338 |
for attempt in range(retries):
|
339 |
try:
|
340 |
response = chain.invoke(inputs)
|
341 |
# Attempt to parse immediately to catch bad JSON before returning
|
342 |
+
json.loads(response.content) # Validate JSON structure
|
343 |
return response
|
344 |
except ResourceExhausted as e:
|
345 |
print(f"[Retry {attempt+1}/{retries}] Gemini rate limit hit. Waiting {delay}s...")
|
|
|
353 |
time.sleep(delay)
|
354 |
raise RuntimeError("Failed after multiple retries due to Gemini quota limit or invalid JSON.")
|
355 |
|
|
|
356 |
response = call_with_retry({
|
357 |
"context": state["context"],
|
358 |
"reasoning": state["reasoning"],
|
|
|
361 |
"current_thoughts": state["current_thoughts"]
|
362 |
})
|
363 |
|
|
|
364 |
content = response.content
|
365 |
reasoning, action, action_input = parse_agent_response(content)
|
366 |
|
367 |
+
print(f"DEBUG: LLM Raw Response Content: {content[:200]}...")
|
368 |
+
print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
|
369 |
|
370 |
# Update state
|
371 |
state["history"].append(AIMessage(content=content)) # Store the raw LLM response
|
|
|
374 |
state["current_thoughts"] = reasoning # Update current thoughts for next iteration
|
375 |
|
376 |
if "final answer" in action.lower():
|
|
|
377 |
state["final_answer"] = action_input # Set final answer directly in state
|
378 |
+
# The should_continue check will handle ending the graph based on final_answer presence
|
379 |
else:
|
380 |
+
# Store the action request in context, not in history
|
381 |
+
state["context"]["pending_action"] = {
|
|
|
382 |
"tool": action,
|
383 |
"input": action_input
|
384 |
+
}
|
385 |
+
# Add a message to history to indicate the agent's intent for the LLM
|
386 |
+
state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
|
387 |
+
|
388 |
|
389 |
print(f"DEBUG: Exiting reasoning_node. New history length: {len(state['history'])}")
|
390 |
return state
|
391 |
|
392 |
|
393 |
def tool_node(state: AgentState) -> AgentState:
|
394 |
+
"""
|
395 |
+
Node for executing the chosen tool and returning its output.
|
396 |
+
"""
|
397 |
+
print(f"DEBUG: Entering tool_node. Iteration: {state['iterations']}")
|
398 |
|
399 |
+
# Get the pending action from context
|
400 |
+
tool_call_dict = state["context"].pop("pending_action", None)
|
|
|
|
|
|
|
401 |
|
402 |
if not tool_call_dict:
|
403 |
+
# This case should ideally not be reached if should_continue is robust,
|
404 |
+
# but provides a fallback.
|
405 |
+
error_message = "[Tool Error] No pending_action found in context. This indicates an issue with graph flow."
|
406 |
+
print(f"ERROR: {error_message}")
|
407 |
+
state["history"].append(AIMessage(content=error_message))
|
408 |
+
return state
|
409 |
|
410 |
tool_name = tool_call_dict.get("tool")
|
411 |
tool_input = tool_call_dict.get("input")
|
412 |
|
413 |
+
# Defensive check for empty tool name or input (still needed as LLM might generate empty strings)
|
414 |
+
if not tool_name or tool_input is None:
|
415 |
+
error_message = f"[Tool Error] Invalid action request from LLM: Tool name '{tool_name}' or input '{tool_input}' was empty. LLM needs to provide valid 'Action' and 'Action Input'."
|
416 |
+
print(f"ERROR: {error_message}")
|
417 |
+
state["history"].append(AIMessage(content=error_message))
|
418 |
+
# Clear any problematic pending action
|
419 |
+
state["context"].pop("pending_action", None)
|
|
|
|
|
420 |
return state
|
421 |
|
422 |
# Look up and invoke the tool from the state's tool list
|
423 |
available_tools = state.get("tools", [])
|
424 |
+
tool_fn = next((t for t in available_tools if t.name == tool_name), None)
|
425 |
|
426 |
if tool_fn is None:
|
427 |
# Fallback for unrecognized tool - feedback to LLM
|
|
|
430 |
else:
|
431 |
try:
|
432 |
print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
|
433 |
+
tool_output = tool_fn.run(tool_input)
|
434 |
+
if not tool_output and tool_output is not False: # Ensure 'False' is not treated as empty
|
435 |
tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
|
436 |
except Exception as e:
|
437 |
tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
|
438 |
print(f"ERROR: {tool_output}")
|
439 |
|
440 |
+
# Add tool output to history as an AIMessage for the LLM to process next
|
|
|
|
|
|
|
441 |
state["history"].append(AIMessage(content=f"[{tool_name} output]\n{tool_output}"))
|
442 |
|
443 |
print(f"DEBUG: Exiting tool_node. Tool output added to history. New history length: {len(state['history'])}")
|
|
|
445 |
|
446 |
|
447 |
# ====== Agent Graph ======
|
448 |
+
def create_agent_workflow(tools: List[Tool]): # tools are passed in now
|
449 |
workflow = StateGraph(AgentState)
|
450 |
|
451 |
# Define nodes
|
|
|
460 |
"reason",
|
461 |
should_continue,
|
462 |
{
|
463 |
+
"action": "action",
|
464 |
+
"reason": "reason",
|
465 |
+
"end": END
|
466 |
}
|
467 |
)
|
468 |
|
469 |
+
workflow.add_edge("action", "reason")
|
470 |
|
|
|
471 |
app = workflow.compile()
|
|
|
|
|
|
|
|
|
|
|
472 |
return app
|
473 |
|
474 |
|
475 |
# ====== Agent Interface ======
|
476 |
class BasicAgent:
|
477 |
def __init__(self):
|
|
|
|
|
478 |
self.tools = [
|
479 |
Tool(name="duckduckgo_search", func=duckduckgo_search, description="Performs a DuckDuckGo search for current events or general facts."),
|
480 |
Tool(name="wikipedia_search", func=wikipedia_search, description="Searches Wikipedia for encyclopedic information."),
|
481 |
Tool(name="arxiv_search", func=arxiv_search, description="Searches ArXiv for scientific preprints and papers."),
|
482 |
+
Tool(name="document_qa", func=document_qa, description="Answers questions based on the content of a given document file (PDF, DOCX, TXT). Requires 'document_path' and 'question' as input."),
|
483 |
Tool(name="python_execution", func=python_execution, description="Executes Python code in a sandboxed environment for complex calculations or data manipulation."),
|
484 |
Tool(name="VideoTranscriptionTool", func=VideoTranscriptionTool(), description="Transcribes and analyzes video content from a URL or ID. Use for any question involving video or audio.")
|
485 |
]
|
486 |
+
self.workflow = create_agent_workflow(self.tools)
|
487 |
|
488 |
def __call__(self, question: str) -> str:
|
489 |
+
print(f"\n--- Agent received question: {question[:80]}{'...' if len(question) > 80 else ''} ---")
|
490 |
|
|
|
491 |
state = {
|
492 |
"question": question,
|
493 |
"context": {},
|
|
|
497 |
"final_answer": None,
|
498 |
"current_task": "Understand the question and plan the next step.",
|
499 |
"current_thoughts": "",
|
500 |
+
"tools": self.tools
|
501 |
}
|
502 |
|
503 |
+
# The invoke method will now return the final state, or raise an error if it hits a dead end
|
504 |
+
# LangGraph runs are synchronous by default here.
|
505 |
final_state = self.workflow.invoke(state)
|
506 |
|
507 |
+
if final_state.get("final_answer") is not None:
|
|
|
508 |
answer = final_state["final_answer"]
|
509 |
print(f"--- Agent returning FINAL ANSWER: {answer} ---")
|
510 |
return answer
|
511 |
+
else:
|
512 |
+
# This should ideally not happen if the agent is designed to always provide a final answer
|
513 |
+
# or a specific "cannot answer" message.
|
514 |
+
print(f"--- ERROR: Agent finished without setting 'final_answer' for question: {question} ---")
|
515 |
+
raise ValueError("Agent finished without providing a final answer.")
|
|
|
|
|
|
|
|
|
|
|
|
|
516 |
|
517 |
|
518 |
|