EtienneB commited on
Commit
de96b54
·
1 Parent(s): 4a5a82f
Files changed (2) hide show
  1. agent.py +7 -4
  2. tools.py +61 -3
agent.py CHANGED
@@ -10,13 +10,15 @@ from langgraph.prebuilt import ToolNode, tools_condition
10
 
11
  from tools import (absolute, add, analyze_csv_file, analyze_excel_file,
12
  arvix_search, audio_transcription, compound_interest,
13
- convert_temperature, divide, exponential, extract_text,
14
- factorial, floor_divide, get_current_time_in_timezone,
 
 
15
  greatest_common_divisor, is_prime, least_common_multiple,
16
  logarithm, modulus, multiply, percentage_calculator, power,
17
  python_code_parser, reverse_sentence,
18
  roman_calculator_converter, square_root, subtract,
19
- web_search, wiki_search)
20
 
21
  # Load Constants
22
  load_dotenv()
@@ -32,7 +34,8 @@ tools = [
32
  is_prime, least_common_multiple, percentage_calculator,
33
  wiki_search, analyze_excel_file, arvix_search,
34
  audio_transcription, python_code_parser, analyze_csv_file,
35
- extract_text, reverse_sentence
 
36
  ]
37
 
38
  # Load system prompt
 
10
 
11
  from tools import (absolute, add, analyze_csv_file, analyze_excel_file,
12
  arvix_search, audio_transcription, compound_interest,
13
+ convert_temperature, divide, exponential,
14
+ extract_text_from_image, factorial, floor_divide,
15
+ get_current_time_in_timezone,
16
+ get_max_bird_species_count_from_video,
17
  greatest_common_divisor, is_prime, least_common_multiple,
18
  logarithm, modulus, multiply, percentage_calculator, power,
19
  python_code_parser, reverse_sentence,
20
  roman_calculator_converter, square_root, subtract,
21
+ web_content_extract, web_search, wiki_search)
22
 
23
  # Load Constants
24
  load_dotenv()
 
34
  is_prime, least_common_multiple, percentage_calculator,
35
  wiki_search, analyze_excel_file, arvix_search,
36
  audio_transcription, python_code_parser, analyze_csv_file,
37
+ extract_text_from_image, reverse_sentence, web_content_extract,
38
+ get_max_bird_species_count_from_video
39
  ]
40
 
41
  # Load system prompt
tools.py CHANGED
@@ -18,6 +18,8 @@ from langchain_community.tools import DuckDuckGoSearchRun
18
  from langchain_core.tools import tool
19
  from langchain_google_genai import ChatGoogleGenerativeAI
20
  from pytube import YouTube
 
 
21
 
22
 
23
  @tool
@@ -228,7 +230,7 @@ def exponential(x: Union[int, float]) -> Union[float, str]:
228
  except Exception as e:
229
  return f"Error in exponential calculation: {str(e)}"
230
 
231
-
232
  @tool
233
  def web_search(query: str) -> str:
234
  """Performs a DuckDuckGo search for the given query and returns the results.
@@ -253,7 +255,7 @@ def web_search(query: str) -> str:
253
  return results
254
  except Exception as e:
255
  return f"Error performing web search: {str(e)}"
256
-
257
 
258
  @tool
259
  def roman_calculator_converter(value1: int, value2: int, oper: str) -> str:
@@ -700,7 +702,8 @@ def analyze_csv_file(file_path: str, query: str) -> str:
700
  # Extract Text Tool
701
  vision_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0)
702
 
703
- def extract_text(img_path: str) -> str:
 
704
  """
705
  Extract text from an image file using a multimodal model.
706
 
@@ -828,3 +831,58 @@ def get_max_bird_species_count_from_video(url: str) -> Dict:
828
  "timestamp": f"{max_species_frame_time}s",
829
  "species_list": species_at_max
830
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  from langchain_core.tools import tool
19
  from langchain_google_genai import ChatGoogleGenerativeAI
20
  from pytube import YouTube
21
+ from langchain_tavily import TavilySearch
22
+ from bs4 import BeautifulSoup
23
 
24
 
25
  @tool
 
230
  except Exception as e:
231
  return f"Error in exponential calculation: {str(e)}"
232
 
233
+ """
234
  @tool
235
  def web_search(query: str) -> str:
236
  """Performs a DuckDuckGo search for the given query and returns the results.
 
255
  return results
256
  except Exception as e:
257
  return f"Error performing web search: {str(e)}"
258
+ """
259
 
260
  @tool
261
  def roman_calculator_converter(value1: int, value2: int, oper: str) -> str:
 
702
  # Extract Text Tool
703
  vision_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0)
704
 
705
+ @tool
706
+ def extract_text_from_image(img_path: str) -> str:
707
  """
708
  Extract text from an image file using a multimodal model.
709
 
 
831
  "timestamp": f"{max_species_frame_time}s",
832
  "species_list": species_at_max
833
  }
834
+
835
+ @tool
836
+ def web_search(query: str) -> str:
837
+ """
838
+ Searches the web and returns a list of the most relevant URLs.
839
+ Use this FIRST for complex queries, metadata questions, or to find the right sources.
840
+ Then follow up with web_content_extract on the most promising URL.
841
+ """
842
+ try:
843
+ tavily_search = TavilySearch(
844
+ max_results=5,
845
+ topic="general",
846
+ search_depth="advanced",
847
+ include_raw_content=False, # Just URLs and snippets
848
+ )
849
+
850
+ results = tavily_search.invoke(query)
851
+ # Format results to show URLs and brief descriptions
852
+ web_search_results = "Search Results:\n"
853
+ for i, result in enumerate(results["results"], 1):
854
+ web_search_results += f"{i}. {result['title']}: {result['url']}\n {result['content'][:150]}...\n\n"
855
+
856
+ return web_search_results
857
+ except Exception as e:
858
+ return f"web_search tool error: {str(e)}"
859
+
860
+ @tool
861
+ def web_content_extract(url: str) -> str:
862
+ """
863
+ Extracts and analyzes specific content from a URL using BeautifulSoup.
864
+ Particularly effective for Wikipedia metadata pages, discussion pages,
865
+ and structured web content.
866
+ Can be used after web_search to get detailed information.
867
+ """
868
+ try:
869
+
870
+ headers = {
871
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
872
+ }
873
+
874
+ response = requests.get(url, headers=headers, timeout=10)
875
+ response.raise_for_status() # Raise exception for 4XX/5XX responses
876
+
877
+ soup = BeautifulSoup(response.text, 'html.parser')
878
+ for element in soup.select('script, style, footer, nav, header'):
879
+ if element:
880
+ element.decompose()
881
+ text = soup.body.get_text(separator='\n', strip=True) if soup.body else soup.get_text(separator='\n', strip=True)
882
+
883
+ # Limit content length for response
884
+ return f"Content extracted from {url}:\n\n{text[:10000]}..." if len(text) > 10000 else text
885
+
886
+ except Exception as e:
887
+ return f"web_content_extract tool error: {str(e)}"
888
+