EtienneB commited on
Commit
a8f7fb2
·
1 Parent(s): 0a05d57
Files changed (2) hide show
  1. agent.py +2 -1
  2. tools.py +40 -40
agent.py CHANGED
@@ -23,6 +23,7 @@ load_dotenv()
23
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
24
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
25
 
 
26
  tools = [
27
  multiply, add, subtract, power, divide, modulus,
28
  square_root, floor_divide, absolute, logarithm,
@@ -117,7 +118,7 @@ You are an advanced AI agent equipped with multiple tools to solve complex, mult
117
  ### Information Retrieval
118
  - **web_search**: Search the web for information
119
  - **web_content_extract**: Extract content from web pages
120
- - **wikipedia_search**: Search Wikipedia for information
121
  - **arvix_search**: Search academic papers on arXiv
122
 
123
  ### Utilities
 
23
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
24
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
25
 
26
+
27
  tools = [
28
  multiply, add, subtract, power, divide, modulus,
29
  square_root, floor_divide, absolute, logarithm,
 
118
  ### Information Retrieval
119
  - **web_search**: Search the web for information
120
  - **web_content_extract**: Extract content from web pages
121
+ - **wikipedia_search**: Search Wikipedia for information, whenever the question refers to wikipedia
122
  - **arvix_search**: Search academic papers on arXiv
123
 
124
  ### Utilities
tools.py CHANGED
@@ -5,6 +5,8 @@ import os
5
  import urllib.parse
6
  from pathlib import Path
7
  from typing import Dict, Union
 
 
8
 
9
  import pandas
10
  import pytz
@@ -22,6 +24,10 @@ from langchain_google_genai import ChatGoogleGenerativeAI
22
  from langchain_openai import ChatOpenAI
23
  from langchain_tavily import TavilySearch
24
 
 
 
 
 
25
 
26
  @tool
27
  def download_file(url: str, filename: str = None) -> str:
@@ -746,62 +752,56 @@ def analyze_csv_file(file_path: str, query: str) -> str:
746
  return f"Error analyzing CSV file: {str(e)}"
747
 
748
 
749
- vision_llm = ChatOpenAI(model="gpt-4o")
750
-
751
  @tool
752
  def extract_text(img_path: str) -> str:
753
  """
754
  Extract text from an image file using a multimodal model.
755
 
756
- This allows me to properly analyze the contents.
757
  """
 
758
  all_text = ""
759
- try:
760
- # Read image and encode as base64
761
- with open(img_path, "rb") as image_file:
762
- image_bytes = image_file.read()
763
-
764
- image_base64 = base64.b64encode(image_bytes).decode("utf-8")
765
-
766
- # Prepare the prompt including the base64 image data
767
- message = [
768
- HumanMessage(
769
- content=[
770
- {
771
- "type": "text",
772
- "text": (
773
- "Extract all the text from this image. "
774
- "Return only the extracted text, no explanations."
775
- ),
776
- },
777
- {
778
- "type": "image_url",
779
- "image_url": {
780
- "url": f"data:image/png;base64,{image_base64}"
781
- },
782
  },
783
- ]
784
- )
785
- ]
 
786
 
787
- # Call the vision-capable model
788
- response = vision_llm.invoke(message)
789
 
790
- # Append extracted text
791
- all_text += response.content + "\n\n"
792
 
793
- return all_text.strip()
794
- except Exception as e:
795
- # A butler should handle errors gracefully
796
- error_msg = f"Error extracting text: {str(e)}"
797
- print(error_msg)
798
- return ""
799
 
800
 
801
  @tool
802
  def reverse_sentence(text: str) -> str:
803
  """
804
- Reverses the input text.
805
  Args:
806
  text (str): The input string to be reversed.
807
  Returns:
 
5
  import urllib.parse
6
  from pathlib import Path
7
  from typing import Dict, Union
8
+ from dotenv import load_dotenv
9
+
10
 
11
  import pandas
12
  import pytz
 
24
  from langchain_openai import ChatOpenAI
25
  from langchain_tavily import TavilySearch
26
 
27
+ load_dotenv()
28
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
29
+ os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
30
+
31
 
32
  @tool
33
  def download_file(url: str, filename: str = None) -> str:
 
752
  return f"Error analyzing CSV file: {str(e)}"
753
 
754
 
 
 
755
  @tool
756
  def extract_text(img_path: str) -> str:
757
  """
758
  Extract text from an image file using a multimodal model.
759
 
760
+ This allowsto properly analyze the contents.
761
  """
762
+ vision_llm = ChatOpenAI(model="gpt-4o")
763
  all_text = ""
764
+
765
+ # Read image and encode as base64
766
+ with open(img_path, "rb") as image_file:
767
+ image_bytes = image_file.read()
768
+
769
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
770
+
771
+ # Prepare the prompt including the base64 image data
772
+ message = [
773
+ HumanMessage(
774
+ content=[
775
+ {
776
+ "type": "text",
777
+ "text": (
778
+ "Extract all the text from this image. "
779
+ "Return only the extracted text, no explanations."
780
+ ),
781
+ },
782
+ {
783
+ "type": "image_url",
784
+ "image_url": {
785
+ "url": f"data:image/png;base64,{image_base64}"
 
786
  },
787
+ },
788
+ ]
789
+ )
790
+ ]
791
 
792
+ # Call the vision-capable model
793
+ response = vision_llm.invoke(message)
794
 
795
+ # Append extracted text
796
+ all_text += response.content + "\n\n"
797
 
798
+ return all_text.strip()
 
 
 
 
 
799
 
800
 
801
  @tool
802
  def reverse_sentence(text: str) -> str:
803
  """
804
+ Reverses the input text. In case a question is written in reversed text, it can be corrected with this tool.
805
  Args:
806
  text (str): The input string to be reversed.
807
  Returns: