wt002 commited on
Commit
6477f4a
·
verified ·
1 Parent(s): b868a02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +199 -207
app.py CHANGED
@@ -24,184 +24,144 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
  load_dotenv()
25
 
26
 
27
- from langgraph.graph import END, StateGraph
28
- from langchain_core.prompts import ChatPromptTemplate
29
- from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
30
- from langchain_core.tools import tool
31
- from typing import Dict, List, TypedDict, Annotated
32
- import operator
33
- from langchain_community.llms import HuggingFaceHub
34
- from langchain_community.chat_models import ChatHuggingFace
35
-
36
-
37
- from langchain.schema import HumanMessage # Or your framework's equivalent
38
-
39
- def init_state(question: str):
40
- return {
41
- "question": question,
42
- "history": [HumanMessage(content=question)],
43
- "context": {} # <- Add this line
44
- }
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
 
 
 
 
47
 
48
- # ====== Tool Definitions ======
49
- @tool
50
  def duckduckgo_search(query: str) -> str:
51
  """Search web using DuckDuckGo. Returns top 3 results."""
52
- from duckduckgo_search import DDGS
53
- with DDGS() as ddgs:
54
- return "\n\n".join(
55
- f"Title: {res['title']}\nURL: {res['href']}\nSnippet: {res['body']}"
56
- for res in ddgs.text(query, max_results=3)
57
- )
 
 
 
58
 
59
- @tool
60
  def wikipedia_search(query: str) -> str:
61
  """Get Wikipedia summaries. Returns first 3 sentences."""
62
- import wikipedia
63
  try:
64
  return wikipedia.summary(query, sentences=3)
65
  except wikipedia.DisambiguationError as e:
66
  return f"Disambiguation options: {', '.join(e.options[:3])}"
67
  except wikipedia.PageError:
68
- return "Page not found"
 
 
69
 
70
- @tool
71
  def arxiv_search(query: str) -> str:
72
  """Search academic papers on arXiv. Returns top 3 results."""
73
- import arxiv
74
- results = arxiv.Search(
75
- query=query,
76
- max_results=3,
77
- sort_by=arxiv.SortCriterion.Relevance
78
- ).results()
79
-
80
- return "\n\n".join(
81
- f"Title: {r.title}\nAuthors: {', '.join(a.name for a in r.authors)}\n"
82
- f"Published: {r.published.strftime('%Y-%m-%d')}\nSummary: {r.summary[:250]}..."
83
- for r in results
84
- )
 
 
 
85
 
86
- @tool
87
  def document_qa(input_str: str) -> str:
88
  """Answer questions from documents. Input format: 'document_text||question'"""
89
- from transformers import pipeline
90
- if '||' not in input_str:
91
- return "Invalid format. Use: 'document_text||question'"
92
-
93
- context, question = input_str.split('||', 1)
94
- qa_model = pipeline('question-answering', model='deepset/roberta-base-squad2')
95
- return qa_model(question=question, context=context)['answer']
 
 
 
 
 
 
96
 
97
- @tool
98
  def python_execution(code: str) -> str:
99
- """Execute Python code and return output."""
 
 
 
 
100
  try:
101
  # Create isolated environment
102
  env = {}
103
- exec(f"def __exec_fn__():\n {indent_code(code)}\nresult = __exec_fn__()", env)
104
- return str(env.get('result', 'No output'))
 
105
  except Exception as e:
106
- return f"Error: {str(e)}"
107
-
108
- from typing import Optional
109
- from langchain_core.tools import BaseTool
110
- from youtube_transcript_api import YouTubeTranscriptApi
111
 
112
  class VideoTranscriptionTool(BaseTool):
113
  name: str = "transcript_video"
114
- description: str = "Fetch text transcript from YouTube videos using URL or ID. Optionally include timestamps."
115
 
116
- def _run(self, url: str, include_timestamps: Optional[bool] = False) -> str:
117
- # Extract video ID
118
  video_id = None
119
- if "youtube.com/watch?v=" in url:
120
- video_id = url.split("v=")[1].split("&")[0]
121
- elif "youtu.be/" in url:
122
- video_id = url.split("youtu.be/")[1].split("?")[0]
123
- elif len(url.strip()) == 11 and not ("http://" in url or "https://" in url):
124
- video_id = url.strip()
 
125
 
126
  if not video_id:
127
- return f"Invalid or unsupported YouTube URL/ID: {url}"
128
 
129
  try:
130
  transcription = YouTubeTranscriptApi.get_transcript(video_id)
131
-
132
- if include_timestamps:
133
- formatted = []
134
- for part in transcription:
135
- timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
136
- formatted.append(f"[{timestamp}] {part['text']}")
137
- return "\n".join(formatted)
138
- else:
139
- return " ".join([part['text'] for part in transcription])
140
 
141
  except Exception as e:
142
- return f"Error fetching transcript: {str(e)}"
 
143
 
144
  def _arun(self, *args, **kwargs):
145
  raise NotImplementedError("Async not supported for this tool.")
146
 
147
-
148
-
149
-
150
-
151
-
152
-
153
- import os
154
- import time
155
- import json
156
- from typing import TypedDict, List, Union, Any, Dict
157
- from langchain_huggingface import ChatHuggingFace
158
- from langchain_huggingface.llms import HuggingFaceEndpoint
159
- from langchain.schema import HumanMessage, AIMessage, SystemMessage
160
- from langchain.prompts import ChatPromptTemplate
161
- from langgraph.graph import StateGraph, END
162
- from langchain.tools import Tool
163
-
164
- # Assume these tools are defined elsewhere and imported
165
- # Placeholder for your actual tool implementations
166
- def duckduckgo_search(query: str) -> str:
167
- """Performs a DuckDuckGo search for current events or general facts."""
168
- print(f"DEBUG: duckduckgo_search called with: {query}")
169
- return f"Search result for '{query}': Example relevant information from web."
170
-
171
- def wikipedia_search(query: str) -> str:
172
- """Searches Wikipedia for encyclopedic information."""
173
- print(f"DEBUG: wikipedia_search called with: {query}")
174
- return f"Wikipedia result for '{query}': Found detailed article."
175
-
176
- def arxiv_search(query: str) -> str:
177
- """Searches ArXiv for scientific preprints and papers."""
178
- print(f"DEBUG: arxiv_search called with: {query}")
179
- return f"ArXiv result for '{query}': Found relevant research paper."
180
-
181
- def document_qa(document_path: str, question: str) -> str:
182
- """Answers questions based on the content of a given document file (PDF, DOCX, TXT)."""
183
- print(f"DEBUG: document_qa called with: {document_path}, question: {question}")
184
- return f"Document QA result for '{question}': Answer extracted from document."
185
-
186
- def python_execution(code: str) -> str:
187
- """Executes Python code in a sandboxed environment for calculations or data manipulation."""
188
- try:
189
- exec_globals = {}
190
- exec_locals = {}
191
- # WARNING: This is a highly insecure way to execute arbitrary Python code.
192
- # For production, use a secure, sandboxed environment (e.g., Docker container, dedicated service).
193
- exec(code, exec_globals, exec_locals)
194
- return str(exec_locals.get('result', 'Code executed, no explicit result assigned to "result" variable.'))
195
- except Exception as e:
196
- return f"Python execution error: {str(e)}"
197
-
198
- class VideoTranscriptionTool:
199
- """Transcribes and analyzes video content from a URL or ID."""
200
- def __call__(self, video_id_or_url: str) -> str:
201
- print(f"DEBUG: VideoTranscriptionTool called with: {video_id_or_url}")
202
- return f"Video transcription/analysis result for '{video_id_or_url}': Summary of video content."
203
-
204
-
205
  # --- Agent State Definition ---
206
  class AgentState(TypedDict):
207
  question: str
@@ -212,8 +172,7 @@ class AgentState(TypedDict):
212
  final_answer: Union[str, float, int, None]
213
  current_task: str
214
  current_thoughts: str
215
- tools: List[Tool]
216
-
217
 
218
  # --- Utility Functions ---
219
  def parse_agent_response(response_content: str) -> tuple[str, str, str]:
@@ -240,15 +199,13 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
240
  action_idx = response_content.find("Action:")
241
  if reasoning_idx != -1 and action_idx != -1 and reasoning_idx < action_idx:
242
  reasoning = response_content[reasoning_idx + len("Reasoning:"):action_idx].strip()
243
- # Clean up leading/trailing quotes if present
244
  if reasoning.startswith('"') and reasoning.endswith('"'):
245
  reasoning = reasoning[1:-1]
246
- elif reasoning_idx != -1: # If only reasoning is found
247
  reasoning = response_content[reasoning_idx + len("Reasoning:"):].strip()
248
  if reasoning.startswith('"') and reasoning.endswith('"'):
249
  reasoning = reasoning[1:-1]
250
 
251
-
252
  # Attempt to find Action and Action Input
253
  if action_idx != -1:
254
  action_input_idx = response_content.find("Action Input:", action_idx)
@@ -259,15 +216,14 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
259
  else:
260
  action = response_content[action_idx + len("Action:"):].strip()
261
 
262
- # Clean up action and action_input
263
  if action.startswith('"') and action.endswith('"'):
264
  action = action[1:-1]
265
  if action_input.startswith('"') and action_input.endswith('"'):
266
  action_input = action_input[1:-1]
267
 
268
  # Final cleanup for any trailing JSON artifacts if heuristic grabs too much
269
- action = action.split('"', 1)[0].strip() # Stop at first quote for safety if it's "Action": "tool_name",
270
- action_input = action_input.split('"', 1)[0].strip() # Similar for input
271
 
272
  return reasoning, action, action_input
273
 
@@ -300,10 +256,6 @@ def reasoning_node(state: AgentState) -> AgentState:
300
  print(f"DEBUG: Entering reasoning_node. Iteration: {state['iterations']}")
301
  print(f"DEBUG: Current history length: {len(state.get('history', []))}")
302
 
303
- HF_TOKEN = os.getenv("HF_TOKEN")
304
- if not HF_TOKEN:
305
- raise ValueError("HF_TOKEN not set in environment variables.")
306
-
307
  state.setdefault("context", {})
308
  state.setdefault("reasoning", "")
309
  state.setdefault("iterations", 0)
@@ -312,21 +264,45 @@ def reasoning_node(state: AgentState) -> AgentState:
312
 
313
  state["context"].pop("pending_action", None)
314
 
315
- model_id = "mistralai/Mistral-7B-Instruct-v0.2"
 
 
316
 
317
- llm = ChatHuggingFace(
318
- llm=HuggingFaceEndpoint(
319
- repo_id=model_id,
320
- max_new_tokens=1024, # Increased max_new_tokens
321
- temperature=0.1,
322
- huggingfacehub_api_token=HF_TOKEN,
323
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  )
325
 
 
 
 
326
  tool_descriptions = "\n".join([
327
  f"- **{t.name}**: {t.description}" for t in state.get("tools", [])
328
  ])
329
 
 
330
  system_prompt = (
331
  "You are an expert problem solver, designed to provide concise and accurate answers. "
332
  "Your process involves analyzing the question, intelligently selecting and using tools, "
@@ -334,12 +310,12 @@ def reasoning_node(state: AgentState) -> AgentState:
334
  "**Available Tools:**\n"
335
  f"{tool_descriptions}\n\n"
336
  "**Tool Usage Guidelines:**\n"
337
- "- Use **duckduckgo_search** for current events, general facts, or quick lookups.\n"
338
- "- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics.\n"
339
- "- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information.\n"
340
- "- Use **document_qa** when the question explicitly refers to a specific document file (e.g., 'Analyze this PDF').\n"
341
  "- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named 'result' if applicable.\n"
342
- "- Use **VideoTranscriptionTool** for any question involving video or audio content. Provide the full YouTube URL or video ID.\n\n"
343
  "**Current Context:**\n{context}\n\n"
344
  "**Previous Reasoning Steps:**\n{reasoning}\n\n"
345
  "**Current Task:** {current_task}\n"
@@ -347,14 +323,14 @@ def reasoning_node(state: AgentState) -> AgentState:
347
  "**Your Response MUST be a valid JSON object with the following keys:**\n"
348
  "```json\n"
349
  "{\n"
350
- " \"Reasoning\": \"Your detailed analysis of the question and why you chose a specific action.\",\n"
351
  " \"Action\": \"[Tool name OR 'Final Answer']\",\n"
352
- " \"Action Input\": \"[Input for the selected tool OR the final response]\"\n"
353
  "}\n"
354
  "```\n"
355
- "**CRITICAL RULE: 'Action' and 'Action Input' MUST NOT be empty, unless 'Action' is 'Final Answer' and 'Action Input' is the conclusive response.**\n"
356
- "If you cannot determine a suitable tool or a final answer, return Action: 'Final Answer' with a message like 'I cannot answer this question with the available tools.' or 'More information is needed.'\n"
357
- "Ensure 'Action Input' is appropriate for the chosen 'Action'. If 'Action' is 'Final Answer', provide the complete, concise answer."
358
  )
359
 
360
  prompt = ChatPromptTemplate.from_messages([
@@ -362,45 +338,61 @@ def reasoning_node(state: AgentState) -> AgentState:
362
  *state["history"]
363
  ])
364
 
365
- chain = prompt | llm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
- def call_with_retry(inputs, retries=3, delay=30):
368
  for attempt in range(retries):
369
  try:
370
- response = chain.invoke(inputs)
371
- # --- NEW DEBUGGING PRINT ---
372
- print(f"DEBUG: RAW LLM Response (Attempt {attempt+1}):\n---\n{response.content}\n---")
373
- # --- END NEW DEBUGGING PRINT ---
374
- json.loads(response.content) # Attempt to parse to validate structure
375
- return response
 
 
 
 
 
 
 
 
376
  except json.JSONDecodeError as e:
377
- print(f"[Retry {attempt+1}/{retries}] LLM returned invalid JSON. Error: {e}. Retrying...")
378
- print(f"Invalid JSON content (partial): {response.content[:200]}...")
379
- # Add specific error message to history to guide LLM
380
  state["history"].append(AIMessage(content=f"[Parsing Error] The previous LLM output was not valid JSON. Expected format: ```json{{\"Reasoning\": \"...\", \"Action\": \"...\", \"Action Input\": \"...\"}}```. Please ensure your response is ONLY valid JSON and strictly follows the format. Error: {e}"))
381
  time.sleep(5)
382
  except Exception as e:
383
- print(f"[Retry {attempt+1}/{retries}] An unexpected error occurred during LLM call: {e}. Waiting {delay}s...")
384
- state["history"].append(AIMessage(content=f"[API Error] Failed to get a response from the LLM due to an API error: {e}. Trying again."))
385
- time.sleep(delay)
386
- raise RuntimeError("Failed after multiple retries due to Hugging Face API issues or invalid JSON.")
387
-
388
- response = call_with_retry({
389
- "context": state["context"],
390
- "reasoning": state["reasoning"],
391
- "question": state["question"],
392
- "current_task": state["current_task"],
393
- "current_thoughts": state["current_thoughts"]
394
- })
395
 
396
  content = response.content
397
  reasoning, action, action_input = parse_agent_response(content)
398
 
399
  print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
400
 
401
- # Only append the LLM's raw output if it was valid JSON and processed successfully
402
- # Otherwise, the specific error message from the retry loop will already be in history.
403
- if isinstance(response, AIMessage) and content == response.content: # Check if it's the original response, not an error message
404
  state["history"].append(AIMessage(content=content))
405
 
406
  state["reasoning"] += f"\nStep {state['iterations'] + 1}: {reasoning}"
@@ -414,8 +406,6 @@ def reasoning_node(state: AgentState) -> AgentState:
414
  "tool": action,
415
  "input": action_input
416
  }
417
- # Add a message to history to indicate the agent's intent for the LLM
418
- # This will be shown to the LLM in the next turn.
419
  state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
420
 
421
 
@@ -440,14 +430,15 @@ def tool_node(state: AgentState) -> AgentState:
440
  tool_name = tool_call_dict.get("tool")
441
  tool_input = tool_call_dict.get("input")
442
 
443
- if not tool_name or tool_input is None:
444
- error_message = f"[Tool Error] Invalid action request from LLM: Tool name '{tool_name}' or input '{tool_input}' was empty. LLM needs to provide valid 'Action' and 'Action Input'."
445
- print(f"ERROR: {error_output}") # Print the error message for debugging
446
  state["history"].append(AIMessage(content=error_message))
447
  state["context"].pop("pending_action", None)
448
  return state
449
 
450
  available_tools = state.get("tools", [])
 
451
  tool_fn = next((t for t in available_tools if t.name == tool_name), None)
452
 
453
  if tool_fn is None:
@@ -457,7 +448,7 @@ def tool_node(state: AgentState) -> AgentState:
457
  try:
458
  print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
459
  tool_output = tool_fn.run(tool_input)
460
- if not tool_output and tool_output is not False:
461
  tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
462
  except Exception as e:
463
  tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
@@ -497,13 +488,14 @@ def create_agent_workflow(tools: List[Tool]):
497
  # ====== Agent Interface ======
498
  class BasicAgent:
499
  def __init__(self):
 
500
  self.tools = [
501
- Tool(name="duckduckgo_search", func=duckduckgo_search, description="Performs a DuckDuckGo search for current events or general facts."),
502
- Tool(name="wikipedia_search", func=wikipedia_search, description="Searches Wikipedia for encyclopedic information."),
503
- Tool(name="arxiv_search", func=arxiv_search, description="Searches ArXiv for scientific preprints and papers."),
504
- Tool(name="document_qa", func=document_qa, description="Answers questions based on the content of a given document file (PDF, DOCX, TXT). Requires 'document_path' and 'question' as input."),
505
- Tool(name="python_execution", func=python_execution, description="Executes Python code in a sandboxed environment for complex calculations or data manipulation."),
506
- Tool(name="VideoTranscriptionTool", func=VideoTranscriptionTool(), description="Transcribes and analyzes video content from a URL or ID. Use for any question involving video or audio.")
507
  ]
508
  self.workflow = create_agent_workflow(self.tools)
509
 
@@ -519,9 +511,10 @@ class BasicAgent:
519
  "final_answer": None,
520
  "current_task": "Understand the question and plan the next step.",
521
  "current_thoughts": "",
522
- "tools": self.tools
523
  }
524
 
 
525
  final_state = self.workflow.invoke(state)
526
 
527
  if final_state.get("final_answer") is not None:
@@ -530,7 +523,6 @@ class BasicAgent:
530
  return answer
531
  else:
532
  print(f"--- ERROR: Agent finished without setting 'final_answer' for question: {question} ---")
533
- # You might want to return the last message from history if no final answer was set
534
  if final_state["history"]:
535
  last_message = final_state["history"][-1].content
536
  print(f"Last message in history: {last_message}")
 
24
  load_dotenv()
25
 
26
 
27
+ import os
28
+ import time
29
+ import json
30
+ from typing import TypedDict, List, Union, Any, Dict, Optional
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ # LangChain and LangGraph imports
33
+ from langchain.schema import HumanMessage, AIMessage, SystemMessage
34
+ from langchain.prompts import ChatPromptTemplate
35
+ from langgraph.graph import StateGraph, END
36
+ from langchain_core.tools import BaseTool, Tool
37
+
38
+ # Hugging Face local model imports
39
+ from langchain_community.llms import HuggingFacePipeline
40
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
41
+ import torch
42
+
43
+ # Tool-specific imports
44
+ from duckduckgo_search import DDGS
45
+ import wikipedia
46
+ import arxiv
47
+ from transformers import pipeline as hf_pipeline # Renamed to avoid clash with main pipeline
48
+ from youtube_transcript_api import YouTubeTranscriptApi
49
 
50
+ # --- Helper function for python_execution tool ---
51
+ def indent_code(code: str, indent: str = " ") -> str:
52
+ """Indents multi-line code for execution within a function."""
53
+ return "\n".join(indent + line for line in code.splitlines())
54
 
55
+ # --- Tool Definitions ---
56
+ @Tool
57
  def duckduckgo_search(query: str) -> str:
58
  """Search web using DuckDuckGo. Returns top 3 results."""
59
+ print(f"DEBUG: duckduckgo_search called with: {query}")
60
+ try:
61
+ with DDGS() as ddgs:
62
+ return "\n\n".join(
63
+ f"Title: {res['title']}\nURL: {res['href']}\nSnippet: {res['body']}"
64
+ for res in ddgs.text(query, max_results=3)
65
+ )
66
+ except Exception as e:
67
+ return f"Error performing DuckDuckGo search: {str(e)}"
68
 
69
+ @Tool
70
  def wikipedia_search(query: str) -> str:
71
  """Get Wikipedia summaries. Returns first 3 sentences."""
72
+ print(f"DEBUG: wikipedia_search called with: {query}")
73
  try:
74
  return wikipedia.summary(query, sentences=3)
75
  except wikipedia.DisambiguationError as e:
76
  return f"Disambiguation options: {', '.join(e.options[:3])}"
77
  except wikipedia.PageError:
78
+ return "Wikipedia page not found."
79
+ except Exception as e:
80
+ return f"Error performing Wikipedia search: {str(e)}"
81
 
82
+ @Tool
83
  def arxiv_search(query: str) -> str:
84
  """Search academic papers on arXiv. Returns top 3 results."""
85
+ print(f"DEBUG: arxiv_search called with: {query}")
86
+ try:
87
+ results = arxiv.Search(
88
+ query=query,
89
+ max_results=3,
90
+ sort_by=arxiv.SortCriterion.Relevance
91
+ ).results()
92
+
93
+ return "\n\n".join(
94
+ f"Title: {r.title}\nAuthors: {', '.join(a.name for a in r.authors)}\n"
95
+ f"Published: {r.published.strftime('%Y-%m-%d')}\nSummary: {r.summary[:250]}..."
96
+ for r in results
97
+ )
98
+ except Exception as e:
99
+ return f"Error performing ArXiv search: {str(e)}"
100
 
101
+ @Tool
102
  def document_qa(input_str: str) -> str:
103
  """Answer questions from documents. Input format: 'document_text||question'"""
104
+ print(f"DEBUG: document_qa called with: {input_str}")
105
+ try:
106
+ if '||' not in input_str:
107
+ return "Invalid format. Input must be: 'document_text||question'"
108
+
109
+ context, question = input_str.split('||', 1)
110
+ # Load QA model on first call or ensure it's loaded once globally
111
+ # This makes it a bit slow on first call, but avoids re-loading.
112
+ # For production, consider loading this outside the tool function.
113
+ qa_model = hf_pipeline('question-answering', model='deepset/roberta-base-squad2')
114
+ return qa_model(question=question, context=context)['answer']
115
+ except Exception as e:
116
+ return f"Error answering question from document: {str(e)}"
117
 
118
+ @Tool
119
  def python_execution(code: str) -> str:
120
+ """Execute Python code and return output.
121
+ The code should assign its final result to a variable named 'result'.
122
+ Example: 'result = 1 + 1'
123
+ """
124
+ print(f"DEBUG: python_execution called with: {code}")
125
  try:
126
  # Create isolated environment
127
  env = {}
128
+ # Wrap code in a function to isolate scope and capture 'result'
129
+ exec(f"def __exec_fn__():\n{indent_code(code)}\nresult = __exec_fn__()", globals(), env)
130
+ return str(env.get('result', 'No explicit result assigned to "result" variable.'))
131
  except Exception as e:
132
+ return f"Python execution error: {str(e)}"
 
 
 
 
133
 
134
  class VideoTranscriptionTool(BaseTool):
135
  name: str = "transcript_video"
136
+ description: "Fetch text transcript from YouTube videos using URL or ID. Use for any question involving video or audio. Input is the YouTube URL or ID."
137
 
138
+ def _run(self, url_or_id: str) -> str:
139
+ print(f"DEBUG: transcript_video called with: {url_or_id}")
140
  video_id = None
141
+ # Basic parsing for common YouTube URL formats
142
+ if "youtu.be/" in url_or_id:
143
+ video_id = url_or_id.split("youtu.be/")[1].split("?")[0]
144
+ elif "youtube.com/watch?v=" in url_or_id:
145
+ video_id = url_or_id.split("v=")[1].split("&")[0]
146
+ elif len(url_or_id.strip()) == 11 and not ("http://" in url_or_id or "https://" in url_or_id):
147
+ video_id = url_or_id.strip() # Assume it's just the ID
148
 
149
  if not video_id:
150
+ return f"Invalid or unsupported YouTube URL/ID: {url_or_id}. Please provide a valid YouTube URL or 11-character ID."
151
 
152
  try:
153
  transcription = YouTubeTranscriptApi.get_transcript(video_id)
154
+ # You can add include_timestamps logic here if needed,
155
+ # but for simplicity, let's just return the text.
156
+ return " ".join([part['text'] for part in transcription])
 
 
 
 
 
 
157
 
158
  except Exception as e:
159
+ # Catch specific errors for better messages, e.g., NoTranscriptFound
160
+ return f"Error fetching transcript for video ID '{video_id}': {str(e)}. It might not have an English transcript, or the video is unavailable."
161
 
162
  def _arun(self, *args, **kwargs):
163
  raise NotImplementedError("Async not supported for this tool.")
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  # --- Agent State Definition ---
166
  class AgentState(TypedDict):
167
  question: str
 
172
  final_answer: Union[str, float, int, None]
173
  current_task: str
174
  current_thoughts: str
175
+ tools: List[Tool] # Make sure tools are passed via state
 
176
 
177
  # --- Utility Functions ---
178
  def parse_agent_response(response_content: str) -> tuple[str, str, str]:
 
199
  action_idx = response_content.find("Action:")
200
  if reasoning_idx != -1 and action_idx != -1 and reasoning_idx < action_idx:
201
  reasoning = response_content[reasoning_idx + len("Reasoning:"):action_idx].strip()
 
202
  if reasoning.startswith('"') and reasoning.endswith('"'):
203
  reasoning = reasoning[1:-1]
204
+ elif reasoning_idx != -1:
205
  reasoning = response_content[reasoning_idx + len("Reasoning:"):].strip()
206
  if reasoning.startswith('"') and reasoning.endswith('"'):
207
  reasoning = reasoning[1:-1]
208
 
 
209
  # Attempt to find Action and Action Input
210
  if action_idx != -1:
211
  action_input_idx = response_content.find("Action Input:", action_idx)
 
216
  else:
217
  action = response_content[action_idx + len("Action:"):].strip()
218
 
 
219
  if action.startswith('"') and action.endswith('"'):
220
  action = action[1:-1]
221
  if action_input.startswith('"') and action_input.endswith('"'):
222
  action_input = action_input[1:-1]
223
 
224
  # Final cleanup for any trailing JSON artifacts if heuristic grabs too much
225
+ action = action.split('"', 1)[0].strip()
226
+ action_input = action_input.split('"', 1)[0].strip()
227
 
228
  return reasoning, action, action_input
229
 
 
256
  print(f"DEBUG: Entering reasoning_node. Iteration: {state['iterations']}")
257
  print(f"DEBUG: Current history length: {len(state.get('history', []))}")
258
 
 
 
 
 
259
  state.setdefault("context", {})
260
  state.setdefault("reasoning", "")
261
  state.setdefault("iterations", 0)
 
264
 
265
  state["context"].pop("pending_action", None)
266
 
267
+ # --- Initialize local HuggingFacePipeline ---
268
+ # Using Mistral-7B-Instruct-v0.2 for better agent performance
269
+ model_name = "mistralai/Mistral-7B-Instruct-v0.2"
270
 
271
+ print(f"DEBUG: Loading local model: {model_name}...")
272
+
273
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
274
+
275
+ # Load model with optimal settings for GPU if available, else CPU
276
+ # Use bfloat16 for GPUs that support it (NVIDIA Ampere architecture and newer)
277
+ # else float16 for older GPUs or float32 for CPU/fallback.
278
+ # device_map="auto" intelligently distributes the model across available devices.
279
+ model = AutoModelForCausalLM.from_pretrained(
280
+ model_name,
281
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
282
+ device_map="auto"
283
+ )
284
+
285
+ # Create a transformers pipeline
286
+ pipe = pipeline(
287
+ "text-generation",
288
+ model=model,
289
+ tokenizer=tokenizer,
290
+ max_new_tokens=1024, # Increased max_new_tokens for potentially longer JSON
291
+ temperature=0.1, # Keep low for factual, tool-use tasks
292
+ do_sample=True, # Allow some sampling
293
+ top_p=0.9,
294
+ repetition_penalty=1.1, # Help avoid repetitive output
295
+ # device_map handled by model loading
296
  )
297
 
298
+ llm = HuggingFacePipeline(pipeline=pipe)
299
+ # --- END LOCAL LLM INITIALIZATION ---
300
+
301
  tool_descriptions = "\n".join([
302
  f"- **{t.name}**: {t.description}" for t in state.get("tools", [])
303
  ])
304
 
305
+ # Enhanced system prompt for better JSON adherence
306
  system_prompt = (
307
  "You are an expert problem solver, designed to provide concise and accurate answers. "
308
  "Your process involves analyzing the question, intelligently selecting and using tools, "
 
310
  "**Available Tools:**\n"
311
  f"{tool_descriptions}\n\n"
312
  "**Tool Usage Guidelines:**\n"
313
+ "- Use **duckduckgo_search** for current events, general facts, or quick lookups. Provide a concise search query.\n"
314
+ "- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics. Provide a concise search term.\n"
315
+ "- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information. Provide a concise search query.\n"
316
+ "- Use **document_qa** when the question explicitly refers to a specific document or when you have content to query. Input format: 'document_text||question'.\n"
317
  "- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named 'result' if applicable.\n"
318
+ "- Use **transcript_video** for any question involving video or audio content (e.g., YouTube). Provide the full YouTube URL or video ID.\n\n"
319
  "**Current Context:**\n{context}\n\n"
320
  "**Previous Reasoning Steps:**\n{reasoning}\n\n"
321
  "**Current Task:** {current_task}\n"
 
323
  "**Your Response MUST be a valid JSON object with the following keys:**\n"
324
  "```json\n"
325
  "{\n"
326
+ " \"Reasoning\": \"Your detailed analysis of the question and why you chose a specific action. Focus on the logical steps.\",\n"
327
  " \"Action\": \"[Tool name OR 'Final Answer']\",\n"
328
+ " \"Action Input\": \"[Input for the selected tool OR the complete final answer]\"\n"
329
  "}\n"
330
  "```\n"
331
+ "**CRITICAL RULE: 'Action' and 'Action Input' MUST NOT be empty strings, unless 'Action' is 'Final Answer' and 'Action Input' is the conclusive response.**\n"
332
+ "If you cannot determine a suitable tool or a conclusive final answer after exhausting options, return Action: 'Final Answer' with a message like 'I cannot answer this question with the available tools.' or 'More information is needed.'\n"
333
+ "Ensure 'Action Input' is always the complete, valid input for the chosen 'Action'. If 'Action' is 'Final Answer', provide the complete, concise answer."
334
  )
335
 
336
  prompt = ChatPromptTemplate.from_messages([
 
338
  *state["history"]
339
  ])
340
 
341
+ formatted_messages = prompt.format_messages(
342
+ context=state["context"],
343
+ reasoning=state["reasoning"],
344
+ question=state["question"],
345
+ current_task=state["current_task"],
346
+ current_thoughts=state["current_thoughts"]
347
+ )
348
+
349
+ # Use tokenizer's chat template for optimal formatting with chat models
350
+ try:
351
+ full_input_string = tokenizer.apply_chat_template(
352
+ formatted_messages,
353
+ tokenize=False,
354
+ add_generation_prompt=True # Adds the assistant's turn start token
355
+ )
356
+ except Exception as e:
357
+ print(f"WARNING: Failed to apply chat template: {e}. Falling back to simple string join. Model performance may be affected.")
358
+ full_input_string = "\n".join([msg.content for msg in formatted_messages])
359
 
360
+ def call_with_retry_local(inputs, retries=3): # Reduced retries for local models as network isn't primary issue
361
  for attempt in range(retries):
362
  try:
363
+ response_text = llm.invoke(inputs)
364
+
365
+ # Strip the prompt from the generated text
366
+ if response_text.startswith(inputs):
367
+ content = response_text[len(inputs):].strip()
368
+ else:
369
+ content = response_text.strip()
370
+
371
+ print(f"DEBUG: RAW LOCAL LLM Response (Attempt {attempt+1}):\n---\n{content}\n---")
372
+
373
+ # Attempt to parse to validate structure
374
+ json.loads(content)
375
+
376
+ return AIMessage(content=content)
377
  except json.JSONDecodeError as e:
378
+ print(f"[Retry {attempt+1}/{retries}] Local LLM returned invalid JSON. Error: {e}. Retrying...")
379
+ print(f"Invalid JSON content (partial): {content[:200]}...")
 
380
  state["history"].append(AIMessage(content=f"[Parsing Error] The previous LLM output was not valid JSON. Expected format: ```json{{\"Reasoning\": \"...\", \"Action\": \"...\", \"Action Input\": \"...\"}}```. Please ensure your response is ONLY valid JSON and strictly follows the format. Error: {e}"))
381
  time.sleep(5)
382
  except Exception as e:
383
+ print(f"[Retry {attempt+1}/{retries}] An unexpected error occurred during local LLM call: {e}.")
384
+ state["history"].append(AIMessage(content=f"[Local LLM Error] Failed to get a response from the local LLM: {e}. Trying again."))
385
+ time.sleep(10)
386
+ raise RuntimeError("Failed after multiple retries due to local Hugging Face model issues or invalid JSON.")
387
+
388
+ response = call_with_retry_local(full_input_string)
 
 
 
 
 
 
389
 
390
  content = response.content
391
  reasoning, action, action_input = parse_agent_response(content)
392
 
393
  print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
394
 
395
+ if isinstance(response, AIMessage) and content == response.content:
 
 
396
  state["history"].append(AIMessage(content=content))
397
 
398
  state["reasoning"] += f"\nStep {state['iterations'] + 1}: {reasoning}"
 
406
  "tool": action,
407
  "input": action_input
408
  }
 
 
409
  state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
410
 
411
 
 
430
  tool_name = tool_call_dict.get("tool")
431
  tool_input = tool_call_dict.get("input")
432
 
433
+ if not tool_name or tool_input is None: # Check for None as empty string "" might be valid input for some tools
434
+ error_message = f"[Tool Error] Invalid action request from LLM: Tool name '{tool_name}' or input '{tool_input}' was empty or None. LLM needs to provide valid 'Action' and 'Action Input'."
435
+ print(f"ERROR: {error_message}")
436
  state["history"].append(AIMessage(content=error_message))
437
  state["context"].pop("pending_action", None)
438
  return state
439
 
440
  available_tools = state.get("tools", [])
441
+ # Use Tool.name to match, which is what @Tool decorator sets
442
  tool_fn = next((t for t in available_tools if t.name == tool_name), None)
443
 
444
  if tool_fn is None:
 
448
  try:
449
  print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
450
  tool_output = tool_fn.run(tool_input)
451
+ if not tool_output and tool_output is not False: # Ensure 'False' or 0 are not treated as empty
452
  tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
453
  except Exception as e:
454
  tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
 
488
  # ====== Agent Interface ======
489
  class BasicAgent:
490
  def __init__(self):
491
+ # Instantiate tools
492
  self.tools = [
493
+ duckduckgo_search,
494
+ wikipedia_search,
495
+ arxiv_search,
496
+ document_qa,
497
+ python_execution,
498
+ VideoTranscriptionTool() # Instantiate the class-based tool
499
  ]
500
  self.workflow = create_agent_workflow(self.tools)
501
 
 
511
  "final_answer": None,
512
  "current_task": "Understand the question and plan the next step.",
513
  "current_thoughts": "",
514
+ "tools": self.tools # Pass tools through state
515
  }
516
 
517
+ # The invoke method returns the final state after execution
518
  final_state = self.workflow.invoke(state)
519
 
520
  if final_state.get("final_answer") is not None:
 
523
  return answer
524
  else:
525
  print(f"--- ERROR: Agent finished without setting 'final_answer' for question: {question} ---")
 
526
  if final_state["history"]:
527
  last_message = final_state["history"][-1].content
528
  print(f"Last message in history: {last_message}")