Spaces:
Sleeping
Sleeping
EtienneB
commited on
Commit
·
de96b54
1
Parent(s):
4a5a82f
updates
Browse files
agent.py
CHANGED
@@ -10,13 +10,15 @@ from langgraph.prebuilt import ToolNode, tools_condition
|
|
10 |
|
11 |
from tools import (absolute, add, analyze_csv_file, analyze_excel_file,
|
12 |
arvix_search, audio_transcription, compound_interest,
|
13 |
-
convert_temperature, divide, exponential,
|
14 |
-
factorial, floor_divide,
|
|
|
|
|
15 |
greatest_common_divisor, is_prime, least_common_multiple,
|
16 |
logarithm, modulus, multiply, percentage_calculator, power,
|
17 |
python_code_parser, reverse_sentence,
|
18 |
roman_calculator_converter, square_root, subtract,
|
19 |
-
web_search, wiki_search)
|
20 |
|
21 |
# Load Constants
|
22 |
load_dotenv()
|
@@ -32,7 +34,8 @@ tools = [
|
|
32 |
is_prime, least_common_multiple, percentage_calculator,
|
33 |
wiki_search, analyze_excel_file, arvix_search,
|
34 |
audio_transcription, python_code_parser, analyze_csv_file,
|
35 |
-
|
|
|
36 |
]
|
37 |
|
38 |
# Load system prompt
|
|
|
10 |
|
11 |
from tools import (absolute, add, analyze_csv_file, analyze_excel_file,
|
12 |
arvix_search, audio_transcription, compound_interest,
|
13 |
+
convert_temperature, divide, exponential,
|
14 |
+
extract_text_from_image, factorial, floor_divide,
|
15 |
+
get_current_time_in_timezone,
|
16 |
+
get_max_bird_species_count_from_video,
|
17 |
greatest_common_divisor, is_prime, least_common_multiple,
|
18 |
logarithm, modulus, multiply, percentage_calculator, power,
|
19 |
python_code_parser, reverse_sentence,
|
20 |
roman_calculator_converter, square_root, subtract,
|
21 |
+
web_content_extract, web_search, wiki_search)
|
22 |
|
23 |
# Load Constants
|
24 |
load_dotenv()
|
|
|
34 |
is_prime, least_common_multiple, percentage_calculator,
|
35 |
wiki_search, analyze_excel_file, arvix_search,
|
36 |
audio_transcription, python_code_parser, analyze_csv_file,
|
37 |
+
extract_text_from_image, reverse_sentence, web_content_extract,
|
38 |
+
get_max_bird_species_count_from_video
|
39 |
]
|
40 |
|
41 |
# Load system prompt
|
tools.py
CHANGED
@@ -18,6 +18,8 @@ from langchain_community.tools import DuckDuckGoSearchRun
|
|
18 |
from langchain_core.tools import tool
|
19 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
20 |
from pytube import YouTube
|
|
|
|
|
21 |
|
22 |
|
23 |
@tool
|
@@ -228,7 +230,7 @@ def exponential(x: Union[int, float]) -> Union[float, str]:
|
|
228 |
except Exception as e:
|
229 |
return f"Error in exponential calculation: {str(e)}"
|
230 |
|
231 |
-
|
232 |
@tool
|
233 |
def web_search(query: str) -> str:
|
234 |
"""Performs a DuckDuckGo search for the given query and returns the results.
|
@@ -253,7 +255,7 @@ def web_search(query: str) -> str:
|
|
253 |
return results
|
254 |
except Exception as e:
|
255 |
return f"Error performing web search: {str(e)}"
|
256 |
-
|
257 |
|
258 |
@tool
|
259 |
def roman_calculator_converter(value1: int, value2: int, oper: str) -> str:
|
@@ -700,7 +702,8 @@ def analyze_csv_file(file_path: str, query: str) -> str:
|
|
700 |
# Extract Text Tool
|
701 |
vision_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0)
|
702 |
|
703 |
-
|
|
|
704 |
"""
|
705 |
Extract text from an image file using a multimodal model.
|
706 |
|
@@ -828,3 +831,58 @@ def get_max_bird_species_count_from_video(url: str) -> Dict:
|
|
828 |
"timestamp": f"{max_species_frame_time}s",
|
829 |
"species_list": species_at_max
|
830 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
from langchain_core.tools import tool
|
19 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
20 |
from pytube import YouTube
|
21 |
+
from langchain_tavily import TavilySearch
|
22 |
+
from bs4 import BeautifulSoup
|
23 |
|
24 |
|
25 |
@tool
|
|
|
230 |
except Exception as e:
|
231 |
return f"Error in exponential calculation: {str(e)}"
|
232 |
|
233 |
+
"""
|
234 |
@tool
|
235 |
def web_search(query: str) -> str:
|
236 |
"""Performs a DuckDuckGo search for the given query and returns the results.
|
|
|
255 |
return results
|
256 |
except Exception as e:
|
257 |
return f"Error performing web search: {str(e)}"
|
258 |
+
"""
|
259 |
|
260 |
@tool
|
261 |
def roman_calculator_converter(value1: int, value2: int, oper: str) -> str:
|
|
|
702 |
# Extract Text Tool
|
703 |
vision_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0)
|
704 |
|
705 |
+
@tool
|
706 |
+
def extract_text_from_image(img_path: str) -> str:
|
707 |
"""
|
708 |
Extract text from an image file using a multimodal model.
|
709 |
|
|
|
831 |
"timestamp": f"{max_species_frame_time}s",
|
832 |
"species_list": species_at_max
|
833 |
}
|
834 |
+
|
835 |
+
@tool
|
836 |
+
def web_search(query: str) -> str:
|
837 |
+
"""
|
838 |
+
Searches the web and returns a list of the most relevant URLs.
|
839 |
+
Use this FIRST for complex queries, metadata questions, or to find the right sources.
|
840 |
+
Then follow up with web_content_extract on the most promising URL.
|
841 |
+
"""
|
842 |
+
try:
|
843 |
+
tavily_search = TavilySearch(
|
844 |
+
max_results=5,
|
845 |
+
topic="general",
|
846 |
+
search_depth="advanced",
|
847 |
+
include_raw_content=False, # Just URLs and snippets
|
848 |
+
)
|
849 |
+
|
850 |
+
results = tavily_search.invoke(query)
|
851 |
+
# Format results to show URLs and brief descriptions
|
852 |
+
web_search_results = "Search Results:\n"
|
853 |
+
for i, result in enumerate(results["results"], 1):
|
854 |
+
web_search_results += f"{i}. {result['title']}: {result['url']}\n {result['content'][:150]}...\n\n"
|
855 |
+
|
856 |
+
return web_search_results
|
857 |
+
except Exception as e:
|
858 |
+
return f"web_search tool error: {str(e)}"
|
859 |
+
|
860 |
+
@tool
|
861 |
+
def web_content_extract(url: str) -> str:
|
862 |
+
"""
|
863 |
+
Extracts and analyzes specific content from a URL using BeautifulSoup.
|
864 |
+
Particularly effective for Wikipedia metadata pages, discussion pages,
|
865 |
+
and structured web content.
|
866 |
+
Can be used after web_search to get detailed information.
|
867 |
+
"""
|
868 |
+
try:
|
869 |
+
|
870 |
+
headers = {
|
871 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
872 |
+
}
|
873 |
+
|
874 |
+
response = requests.get(url, headers=headers, timeout=10)
|
875 |
+
response.raise_for_status() # Raise exception for 4XX/5XX responses
|
876 |
+
|
877 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
878 |
+
for element in soup.select('script, style, footer, nav, header'):
|
879 |
+
if element:
|
880 |
+
element.decompose()
|
881 |
+
text = soup.body.get_text(separator='\n', strip=True) if soup.body else soup.get_text(separator='\n', strip=True)
|
882 |
+
|
883 |
+
# Limit content length for response
|
884 |
+
return f"Content extracted from {url}:\n\n{text[:10000]}..." if len(text) > 10000 else text
|
885 |
+
|
886 |
+
except Exception as e:
|
887 |
+
return f"web_content_extract tool error: {str(e)}"
|
888 |
+
|