Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -33,10 +33,12 @@ from typing import TypedDict, List, Union, Any, Dict, Optional
|
|
33 |
from langchain.schema import HumanMessage, AIMessage, SystemMessage
|
34 |
from langchain.prompts import ChatPromptTemplate
|
35 |
from langgraph.graph import StateGraph, END
|
36 |
-
from
|
|
|
|
|
|
|
37 |
|
38 |
# Hugging Face local model imports
|
39 |
-
from langchain_community.llms import HuggingFacePipeline
|
40 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
41 |
import torch
|
42 |
|
@@ -53,7 +55,7 @@ def indent_code(code: str, indent: str = " ") -> str:
|
|
53 |
return "\n".join(indent + line for line in code.splitlines())
|
54 |
|
55 |
# --- Tool Definitions ---
|
56 |
-
@
|
57 |
def duckduckgo_search(query: str) -> str:
|
58 |
"""Search web using DuckDuckGo. Returns top 3 results."""
|
59 |
print(f"DEBUG: duckduckgo_search called with: {query}")
|
@@ -66,7 +68,7 @@ def duckduckgo_search(query: str) -> str:
|
|
66 |
except Exception as e:
|
67 |
return f"Error performing DuckDuckGo search: {str(e)}"
|
68 |
|
69 |
-
@
|
70 |
def wikipedia_search(query: str) -> str:
|
71 |
"""Get Wikipedia summaries. Returns first 3 sentences."""
|
72 |
print(f"DEBUG: wikipedia_search called with: {query}")
|
@@ -79,7 +81,7 @@ def wikipedia_search(query: str) -> str:
|
|
79 |
except Exception as e:
|
80 |
return f"Error performing Wikipedia search: {str(e)}"
|
81 |
|
82 |
-
@
|
83 |
def arxiv_search(query: str) -> str:
|
84 |
"""Search academic papers on arXiv. Returns top 3 results."""
|
85 |
print(f"DEBUG: arxiv_search called with: {query}")
|
@@ -98,7 +100,7 @@ def arxiv_search(query: str) -> str:
|
|
98 |
except Exception as e:
|
99 |
return f"Error performing ArXiv search: {str(e)}"
|
100 |
|
101 |
-
@
|
102 |
def document_qa(input_str: str) -> str:
|
103 |
"""Answer questions from documents. Input format: 'document_text||question'"""
|
104 |
print(f"DEBUG: document_qa called with: {input_str}")
|
@@ -107,15 +109,15 @@ def document_qa(input_str: str) -> str:
|
|
107 |
return "Invalid format. Input must be: 'document_text||question'"
|
108 |
|
109 |
context, question = input_str.split('||', 1)
|
110 |
-
# Load QA model on first call or ensure it's loaded once globally
|
111 |
-
#
|
112 |
-
#
|
113 |
qa_model = hf_pipeline('question-answering', model='deepset/roberta-base-squad2')
|
114 |
return qa_model(question=question, context=context)['answer']
|
115 |
except Exception as e:
|
116 |
return f"Error answering question from document: {str(e)}"
|
117 |
|
118 |
-
@
|
119 |
def python_execution(code: str) -> str:
|
120 |
"""Execute Python code and return output.
|
121 |
The code should assign its final result to a variable named 'result'.
|
@@ -126,8 +128,10 @@ def python_execution(code: str) -> str:
|
|
126 |
# Create isolated environment
|
127 |
env = {}
|
128 |
# Wrap code in a function to isolate scope and capture 'result'
|
129 |
-
exec
|
130 |
-
|
|
|
|
|
131 |
except Exception as e:
|
132 |
return f"Python execution error: {str(e)}"
|
133 |
|
@@ -139,10 +143,10 @@ class VideoTranscriptionTool(BaseTool):
|
|
139 |
print(f"DEBUG: transcript_video called with: {url_or_id}")
|
140 |
video_id = None
|
141 |
# Basic parsing for common YouTube URL formats
|
142 |
-
if "
|
143 |
-
video_id = url_or_id.split("youtu.be/")[1].split("?")[0]
|
144 |
-
elif "youtube.com/watch?v=" in url_or_id:
|
145 |
video_id = url_or_id.split("v=")[1].split("&")[0]
|
|
|
|
|
146 |
elif len(url_or_id.strip()) == 11 and not ("http://" in url_or_id or "https://" in url_or_id):
|
147 |
video_id = url_or_id.strip() # Assume it's just the ID
|
148 |
|
@@ -151,12 +155,9 @@ class VideoTranscriptionTool(BaseTool):
|
|
151 |
|
152 |
try:
|
153 |
transcription = YouTubeTranscriptApi.get_transcript(video_id)
|
154 |
-
# You can add include_timestamps logic here if needed,
|
155 |
-
# but for simplicity, let's just return the text.
|
156 |
return " ".join([part['text'] for part in transcription])
|
157 |
|
158 |
except Exception as e:
|
159 |
-
# Catch specific errors for better messages, e.g., NoTranscriptFound
|
160 |
return f"Error fetching transcript for video ID '{video_id}': {str(e)}. It might not have an English transcript, or the video is unavailable."
|
161 |
|
162 |
def _arun(self, *args, **kwargs):
|
@@ -172,7 +173,7 @@ class AgentState(TypedDict):
|
|
172 |
final_answer: Union[str, float, int, None]
|
173 |
current_task: str
|
174 |
current_thoughts: str
|
175 |
-
tools: List[
|
176 |
|
177 |
# --- Utility Functions ---
|
178 |
def parse_agent_response(response_content: str) -> tuple[str, str, str]:
|
@@ -290,8 +291,8 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
290 |
max_new_tokens=1024, # Increased max_new_tokens for potentially longer JSON
|
291 |
temperature=0.1, # Keep low for factual, tool-use tasks
|
292 |
do_sample=True, # Allow some sampling
|
293 |
-
top_p=0.9,
|
294 |
-
repetition_penalty=1.1, #
|
295 |
# device_map handled by model loading
|
296 |
)
|
297 |
|
@@ -314,7 +315,7 @@ def reasoning_node(state: AgentState) -> AgentState:
|
|
314 |
"- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics. Provide a concise search term.\n"
|
315 |
"- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information. Provide a concise search query.\n"
|
316 |
"- Use **document_qa** when the question explicitly refers to a specific document or when you have content to query. Input format: 'document_text||question'.\n"
|
317 |
-
"- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named '
|
318 |
"- Use **transcript_video** for any question involving video or audio content (e.g., YouTube). Provide the full YouTube URL or video ID.\n\n"
|
319 |
"**Current Context:**\n{context}\n\n"
|
320 |
"**Previous Reasoning Steps:**\n{reasoning}\n\n"
|
@@ -430,7 +431,7 @@ def tool_node(state: AgentState) -> AgentState:
|
|
430 |
tool_name = tool_call_dict.get("tool")
|
431 |
tool_input = tool_call_dict.get("input")
|
432 |
|
433 |
-
if not tool_name or tool_input is None:
|
434 |
error_message = f"[Tool Error] Invalid action request from LLM: Tool name '{tool_name}' or input '{tool_input}' was empty or None. LLM needs to provide valid 'Action' and 'Action Input'."
|
435 |
print(f"ERROR: {error_message}")
|
436 |
state["history"].append(AIMessage(content=error_message))
|
@@ -438,7 +439,6 @@ def tool_node(state: AgentState) -> AgentState:
|
|
438 |
return state
|
439 |
|
440 |
available_tools = state.get("tools", [])
|
441 |
-
# Use Tool.name to match, which is what @Tool decorator sets
|
442 |
tool_fn = next((t for t in available_tools if t.name == tool_name), None)
|
443 |
|
444 |
if tool_fn is None:
|
@@ -448,7 +448,7 @@ def tool_node(state: AgentState) -> AgentState:
|
|
448 |
try:
|
449 |
print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
|
450 |
tool_output = tool_fn.run(tool_input)
|
451 |
-
if not tool_output and tool_output is not False:
|
452 |
tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
|
453 |
except Exception as e:
|
454 |
tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
|
@@ -461,7 +461,7 @@ def tool_node(state: AgentState) -> AgentState:
|
|
461 |
|
462 |
|
463 |
# ====== Agent Graph ======
|
464 |
-
def create_agent_workflow(tools: List[
|
465 |
workflow = StateGraph(AgentState)
|
466 |
|
467 |
workflow.add_node("reason", reasoning_node)
|
@@ -495,7 +495,7 @@ class BasicAgent:
|
|
495 |
arxiv_search,
|
496 |
document_qa,
|
497 |
python_execution,
|
498 |
-
VideoTranscriptionTool()
|
499 |
]
|
500 |
self.workflow = create_agent_workflow(self.tools)
|
501 |
|
@@ -511,10 +511,9 @@ class BasicAgent:
|
|
511 |
"final_answer": None,
|
512 |
"current_task": "Understand the question and plan the next step.",
|
513 |
"current_thoughts": "",
|
514 |
-
"tools": self.tools
|
515 |
}
|
516 |
|
517 |
-
# The invoke method returns the final state after execution
|
518 |
final_state = self.workflow.invoke(state)
|
519 |
|
520 |
if final_state.get("final_answer") is not None:
|
|
|
33 |
from langchain.schema import HumanMessage, AIMessage, SystemMessage
|
34 |
from langchain.prompts import ChatPromptTemplate
|
35 |
from langgraph.graph import StateGraph, END
|
36 |
+
from langchain_community.llms import HuggingFacePipeline
|
37 |
+
|
38 |
+
# Corrected Tool import: Use 'tool' (lowercase)
|
39 |
+
from langchain_core.tools import BaseTool, tool # <--- CHANGED HERE
|
40 |
|
41 |
# Hugging Face local model imports
|
|
|
42 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
43 |
import torch
|
44 |
|
|
|
55 |
return "\n".join(indent + line for line in code.splitlines())
|
56 |
|
57 |
# --- Tool Definitions ---
|
58 |
+
@tool # <--- CHANGED HERE
|
59 |
def duckduckgo_search(query: str) -> str:
|
60 |
"""Search web using DuckDuckGo. Returns top 3 results."""
|
61 |
print(f"DEBUG: duckduckgo_search called with: {query}")
|
|
|
68 |
except Exception as e:
|
69 |
return f"Error performing DuckDuckGo search: {str(e)}"
|
70 |
|
71 |
+
@tool # <--- CHANGED HERE
|
72 |
def wikipedia_search(query: str) -> str:
|
73 |
"""Get Wikipedia summaries. Returns first 3 sentences."""
|
74 |
print(f"DEBUG: wikipedia_search called with: {query}")
|
|
|
81 |
except Exception as e:
|
82 |
return f"Error performing Wikipedia search: {str(e)}"
|
83 |
|
84 |
+
@tool # <--- CHANGED HERE
|
85 |
def arxiv_search(query: str) -> str:
|
86 |
"""Search academic papers on arXiv. Returns top 3 results."""
|
87 |
print(f"DEBUG: arxiv_search called with: {query}")
|
|
|
100 |
except Exception as e:
|
101 |
return f"Error performing ArXiv search: {str(e)}"
|
102 |
|
103 |
+
@tool # <--- CHANGED HERE
|
104 |
def document_qa(input_str: str) -> str:
|
105 |
"""Answer questions from documents. Input format: 'document_text||question'"""
|
106 |
print(f"DEBUG: document_qa called with: {input_str}")
|
|
|
109 |
return "Invalid format. Input must be: 'document_text||question'"
|
110 |
|
111 |
context, question = input_str.split('||', 1)
|
112 |
+
# Load QA model on first call or ensure it's loaded once globally.
|
113 |
+
# It's better to load once in __init__ for BasicAgent if possible,
|
114 |
+
# but this lazy loading prevents initial heavy load if tool is not used.
|
115 |
qa_model = hf_pipeline('question-answering', model='deepset/roberta-base-squad2')
|
116 |
return qa_model(question=question, context=context)['answer']
|
117 |
except Exception as e:
|
118 |
return f"Error answering question from document: {str(e)}"
|
119 |
|
120 |
+
@tool # <--- CHANGED HERE
|
121 |
def python_execution(code: str) -> str:
|
122 |
"""Execute Python code and return output.
|
123 |
The code should assign its final result to a variable named 'result'.
|
|
|
128 |
# Create isolated environment
|
129 |
env = {}
|
130 |
# Wrap code in a function to isolate scope and capture 'result'
|
131 |
+
# The exec function is used carefully here. In a production environment,
|
132 |
+
# consider a more robust and secure sandbox (e.g., Docker, dedicated service).
|
133 |
+
exec(f"def __exec_fn__():\n{indent_code(code)}\n_result_value = __exec_fn__()", globals(), env)
|
134 |
+
return str(env.get('_result_value', 'No explicit result assigned to "_result_value" variable.'))
|
135 |
except Exception as e:
|
136 |
return f"Python execution error: {str(e)}"
|
137 |
|
|
|
143 |
print(f"DEBUG: transcript_video called with: {url_or_id}")
|
144 |
video_id = None
|
145 |
# Basic parsing for common YouTube URL formats
|
146 |
+
if "youtube.com/watch?v=" in url_or_id:
|
|
|
|
|
147 |
video_id = url_or_id.split("v=")[1].split("&")[0]
|
148 |
+
elif "youtu.be/" in url_or_id:
|
149 |
+
video_id = url_or_id.split("youtu.be/")[1].split("?")[0]
|
150 |
elif len(url_or_id.strip()) == 11 and not ("http://" in url_or_id or "https://" in url_or_id):
|
151 |
video_id = url_or_id.strip() # Assume it's just the ID
|
152 |
|
|
|
155 |
|
156 |
try:
|
157 |
transcription = YouTubeTranscriptApi.get_transcript(video_id)
|
|
|
|
|
158 |
return " ".join([part['text'] for part in transcription])
|
159 |
|
160 |
except Exception as e:
|
|
|
161 |
return f"Error fetching transcript for video ID '{video_id}': {str(e)}. It might not have an English transcript, or the video is unavailable."
|
162 |
|
163 |
def _arun(self, *args, **kwargs):
|
|
|
173 |
final_answer: Union[str, float, int, None]
|
174 |
current_task: str
|
175 |
current_thoughts: str
|
176 |
+
tools: List[BaseTool] # Make sure tools are passed via state, using BaseTool type
|
177 |
|
178 |
# --- Utility Functions ---
|
179 |
def parse_agent_response(response_content: str) -> tuple[str, str, str]:
|
|
|
291 |
max_new_tokens=1024, # Increased max_new_tokens for potentially longer JSON
|
292 |
temperature=0.1, # Keep low for factual, tool-use tasks
|
293 |
do_sample=True, # Allow some sampling
|
294 |
+
top_p=0.9, # Nucleus sampling
|
295 |
+
repetition_penalty=1.1, # Avoid repetition
|
296 |
# device_map handled by model loading
|
297 |
)
|
298 |
|
|
|
315 |
"- Use **wikipedia_search** for encyclopedic information, historical context, or detailed topics. Provide a concise search term.\n"
|
316 |
"- Use **arxiv_search** for scientific papers, research, or cutting-edge technical information. Provide a concise search query.\n"
|
317 |
"- Use **document_qa** when the question explicitly refers to a specific document or when you have content to query. Input format: 'document_text||question'.\n"
|
318 |
+
"- Use **python_execution** for complex calculations, data manipulation, or logical operations that cannot be done with simple reasoning. Always provide the full Python code, ensuring it's valid and executable, and assign the final result to a variable named '_result_value' (e.g., '_result_value = 1 + 1').\n"
|
319 |
"- Use **transcript_video** for any question involving video or audio content (e.g., YouTube). Provide the full YouTube URL or video ID.\n\n"
|
320 |
"**Current Context:**\n{context}\n\n"
|
321 |
"**Previous Reasoning Steps:**\n{reasoning}\n\n"
|
|
|
431 |
tool_name = tool_call_dict.get("tool")
|
432 |
tool_input = tool_call_dict.get("input")
|
433 |
|
434 |
+
if not tool_name or tool_input is None:
|
435 |
error_message = f"[Tool Error] Invalid action request from LLM: Tool name '{tool_name}' or input '{tool_input}' was empty or None. LLM needs to provide valid 'Action' and 'Action Input'."
|
436 |
print(f"ERROR: {error_message}")
|
437 |
state["history"].append(AIMessage(content=error_message))
|
|
|
439 |
return state
|
440 |
|
441 |
available_tools = state.get("tools", [])
|
|
|
442 |
tool_fn = next((t for t in available_tools if t.name == tool_name), None)
|
443 |
|
444 |
if tool_fn is None:
|
|
|
448 |
try:
|
449 |
print(f"DEBUG: Invoking tool '{tool_name}' with input: '{tool_input[:100]}...'")
|
450 |
tool_output = tool_fn.run(tool_input)
|
451 |
+
if not tool_output and tool_output is not False:
|
452 |
tool_output = f"[{tool_name} output] No specific result found for '{tool_input}'. The tool might have returned an empty response."
|
453 |
except Exception as e:
|
454 |
tool_output = f"[Tool Error] An error occurred while running '{tool_name}': {str(e)}"
|
|
|
461 |
|
462 |
|
463 |
# ====== Agent Graph ======
|
464 |
+
def create_agent_workflow(tools: List[BaseTool]): # Use BaseTool for consistency
|
465 |
workflow = StateGraph(AgentState)
|
466 |
|
467 |
workflow.add_node("reason", reasoning_node)
|
|
|
495 |
arxiv_search,
|
496 |
document_qa,
|
497 |
python_execution,
|
498 |
+
VideoTranscriptionTool()
|
499 |
]
|
500 |
self.workflow = create_agent_workflow(self.tools)
|
501 |
|
|
|
511 |
"final_answer": None,
|
512 |
"current_task": "Understand the question and plan the next step.",
|
513 |
"current_thoughts": "",
|
514 |
+
"tools": self.tools
|
515 |
}
|
516 |
|
|
|
517 |
final_state = self.workflow.invoke(state)
|
518 |
|
519 |
if final_state.get("final_answer") is not None:
|