guillaumefrd commited on
Commit
26aec96
·
1 Parent(s): 4754c75

get_webpage_content handles PDF + LLM decide if query_image should use reasoning model

Browse files
langgraph_dir/agent.py CHANGED
@@ -28,7 +28,7 @@ class LangGraphAgent:
28
  community_tools = [
29
  BraveSearch.from_api_key( # Web search (more performant than DuckDuckGo)
30
  api_key=os.getenv("BRAVE_SEARCH_API_KEY"), # needs BRAVE_SEARCH_API_KEY in env
31
- search_kwargs={"count": 3}),
32
  ]
33
  custom_tools = [
34
  multiply, add, subtract, divide, modulus, power, # Basic arithmetic
@@ -124,7 +124,8 @@ class LangGraphAgent:
124
 
125
  # Invoke
126
  messages = [HumanMessage(content=question)]
127
- messages = self.agent.invoke({"messages": messages})
 
128
  for m in messages["messages"]:
129
  m.pretty_print()
130
 
 
28
  community_tools = [
29
  BraveSearch.from_api_key( # Web search (more performant than DuckDuckGo)
30
  api_key=os.getenv("BRAVE_SEARCH_API_KEY"), # needs BRAVE_SEARCH_API_KEY in env
31
+ search_kwargs={"count": 5}),
32
  ]
33
  custom_tools = [
34
  multiply, add, subtract, divide, modulus, power, # Basic arithmetic
 
124
 
125
  # Invoke
126
  messages = [HumanMessage(content=question)]
127
+ messages = self.agent.invoke({"messages": messages},
128
+ {"recursion_limit": 30}) # maximum number of steps before hitting a stop condition
129
  for m in messages["messages"]:
130
  m.pretty_print()
131
 
langgraph_dir/custom_tools.py CHANGED
@@ -6,6 +6,8 @@ from bs4 import BeautifulSoup
6
  from markdownify import markdownify as md
7
  from langchain_core.tools import tool, Tool
8
  from langchain_experimental.utilities import PythonREPL
 
 
9
 
10
 
11
  # --- Basic operations --- #
@@ -81,12 +83,13 @@ def power(a: float, b: float) -> float:
81
  # --- Functions --- #
82
 
83
  @tool
84
- def query_image(query: str, image_url: str) -> str:
85
  """Ask anything about an image using a Vision Language Model
86
 
87
  Args:
88
- query (str): the query about the image, e.g. how many persons are on the image?
89
- image_url (str): the URL to the image
 
90
  """
91
 
92
  # PROVIDER = 'huggingface'
@@ -120,11 +123,13 @@ def query_image(query: str, image_url: str) -> str:
120
  return completion.choices[0].message
121
 
122
  elif PROVIDER == 'openai':
123
- from .config import QUERY_IMAGE_MODEL_NAME
124
-
 
 
125
  client = OpenAI()
126
  response = client.responses.create(
127
- model=QUERY_IMAGE_MODEL_NAME,
128
  input=[{
129
  "role": "user",
130
  "content": [
@@ -196,14 +201,23 @@ def get_webpage_content(page_url: str) -> str:
196
  """
197
  try:
198
  r = requests.get(page_url)
199
- soup = BeautifulSoup((r.text), 'html.parser')
200
- if soup.body:
201
- # convert to markdown
202
- out = md(str(soup.body))
 
 
 
 
203
  else:
204
- # return the raw content
205
- out = r.text
206
- return out
 
 
 
 
 
207
  except Exception as e:
208
  return f"get_webpage_content failed: {e}"
209
 
 
6
  from markdownify import markdownify as md
7
  from langchain_core.tools import tool, Tool
8
  from langchain_experimental.utilities import PythonREPL
9
+ from pypdf import PdfReader
10
+ from io import BytesIO
11
 
12
 
13
  # --- Basic operations --- #
 
83
  # --- Functions --- #
84
 
85
  @tool
86
+ def query_image(query: str, image_url: str, need_reasoning: bool = False) -> str:
87
  """Ask anything about an image using a Vision Language Model
88
 
89
  Args:
90
+ query (str): The query about the image, e.g. how many persons are on the image?
91
+ image_url (str): The URL to the image
92
+ need_reasoning (bool): Set to True for complex query that require a reasoning model to answer properly. Set to False otherwise.
93
  """
94
 
95
  # PROVIDER = 'huggingface'
 
123
  return completion.choices[0].message
124
 
125
  elif PROVIDER == 'openai':
126
+ if need_reasoning:
127
+ model_name = "o4-mini"
128
+ else:
129
+ model_name = "gpt-4.1-mini"
130
  client = OpenAI()
131
  response = client.responses.create(
132
+ model=model_name,
133
  input=[{
134
  "role": "user",
135
  "content": [
 
201
  """
202
  try:
203
  r = requests.get(page_url)
204
+ r.raise_for_status()
205
+ text = ""
206
+ # special case if page is a PDF file
207
+ if r.headers.get('Content-Type', '') == 'application/pdf':
208
+ pdf_file = BytesIO(r.content)
209
+ reader = PdfReader(pdf_file)
210
+ for page in reader.pages:
211
+ text += page.extract_text()
212
  else:
213
+ soup = BeautifulSoup((r.text), 'html.parser')
214
+ if soup.body:
215
+ # convert to markdown
216
+ text = md(str(soup.body))
217
+ else:
218
+ # return the raw content
219
+ text = r.text
220
+ return text
221
  except Exception as e:
222
  return f"get_webpage_content failed: {e}"
223