agents_final_assignment

Sleeping

App Files Files Community

guillaumefrd commited on May 3

Commit

26aec96

1 Parent(s): 4754c75

get_webpage_content handles PDF + LLM decide if query_image should use reasoning model

Browse files

Files changed (2) hide show

langgraph_dir/agent.py +3 -2
langgraph_dir/custom_tools.py +27 -13

langgraph_dir/agent.py CHANGED Viewed

@@ -28,7 +28,7 @@ class LangGraphAgent:
         community_tools = [
             BraveSearch.from_api_key(   # Web search (more performant than DuckDuckGo)
                 api_key=os.getenv("BRAVE_SEARCH_API_KEY"), # needs BRAVE_SEARCH_API_KEY in env
-                search_kwargs={"count": 3}),
         ]
         custom_tools = [
             multiply, add, subtract, divide, modulus, power,  # Basic arithmetic
@@ -124,7 +124,8 @@ class LangGraphAgent:
         # Invoke
         messages = [HumanMessage(content=question)]
-        messages = self.agent.invoke({"messages": messages})
         for m in messages["messages"]:
             m.pretty_print()

         community_tools = [
             BraveSearch.from_api_key(   # Web search (more performant than DuckDuckGo)
                 api_key=os.getenv("BRAVE_SEARCH_API_KEY"), # needs BRAVE_SEARCH_API_KEY in env
+                search_kwargs={"count": 5}),
         ]
         custom_tools = [
             multiply, add, subtract, divide, modulus, power,  # Basic arithmetic
         # Invoke
         messages = [HumanMessage(content=question)]
+        messages = self.agent.invoke({"messages": messages},
+                                     {"recursion_limit": 30}) # maximum number of steps before hitting a stop condition
         for m in messages["messages"]:
             m.pretty_print()

langgraph_dir/custom_tools.py CHANGED Viewed

@@ -6,6 +6,8 @@ from bs4 import BeautifulSoup
 from markdownify import markdownify as md
 from langchain_core.tools import tool, Tool
 from langchain_experimental.utilities import PythonREPL
 # --- Basic operations --- #
@@ -81,12 +83,13 @@ def power(a: float, b: float) -> float:
 # --- Functions --- #
 @tool
-def query_image(query: str, image_url: str) -> str:
     """Ask anything about an image using a Vision Language Model
     Args:
-        query (str): the query about the image, e.g. how many persons are on the image?
-        image_url (str): the URL to the image
     """
     # PROVIDER = 'huggingface'
@@ -120,11 +123,13 @@ def query_image(query: str, image_url: str) -> str:
             return completion.choices[0].message
         elif PROVIDER == 'openai':
-            from .config import QUERY_IMAGE_MODEL_NAME
             client = OpenAI()
             response = client.responses.create(
-                model=QUERY_IMAGE_MODEL_NAME,
                 input=[{
                     "role": "user",
                     "content": [
@@ -196,14 +201,23 @@ def get_webpage_content(page_url: str) -> str:
     """
     try:
         r = requests.get(page_url)
-        soup = BeautifulSoup((r.text), 'html.parser')
-        if soup.body:
-            # convert to markdown
-            out = md(str(soup.body))
         else:
-            # return the raw content
-            out = r.text
-        return out
     except Exception as e:
         return f"get_webpage_content failed: {e}"

 from markdownify import markdownify as md
 from langchain_core.tools import tool, Tool
 from langchain_experimental.utilities import PythonREPL
+from pypdf import PdfReader
+from io import BytesIO
 # --- Basic operations --- #
 # --- Functions --- #
 @tool
+def query_image(query: str, image_url: str, need_reasoning: bool = False) -> str:
     """Ask anything about an image using a Vision Language Model
     Args:
+        query (str): The query about the image, e.g. how many persons are on the image?
+        image_url (str): The URL to the image
+        need_reasoning (bool): Set to True for complex query that require a reasoning model to answer properly. Set to False otherwise.
     """
     # PROVIDER = 'huggingface'
             return completion.choices[0].message
         elif PROVIDER == 'openai':
+            if need_reasoning:
+                model_name = "o4-mini"
+            else:
+                model_name = "gpt-4.1-mini"
             client = OpenAI()
             response = client.responses.create(
+                model=model_name,
                 input=[{
                     "role": "user",
                     "content": [
     """
     try:
         r = requests.get(page_url)
+        r.raise_for_status()
+        text = ""
+        # special case if page is a PDF file
+        if r.headers.get('Content-Type', '') == 'application/pdf':
+            pdf_file = BytesIO(r.content)
+            reader = PdfReader(pdf_file)
+            for page in reader.pages:
+                text += page.extract_text()
         else:
+            soup = BeautifulSoup((r.text), 'html.parser')
+            if soup.body:
+                # convert to markdown
+                text = md(str(soup.body))
+            else:
+                # return the raw content
+                text = r.text
+        return text
     except Exception as e:
         return f"get_webpage_content failed: {e}"