guillaumefrd commited on
Commit
4754c75
·
1 Parent(s): b527097

add tool to get webpage content + replace DDG by Brave search + replace buggy remote code interpreter by local runtime + avoid limit of TPM

Browse files
.gitignore CHANGED
@@ -1 +1,3 @@
1
- __pycache__
 
 
 
1
+ __pycache__
2
+ .DS_Store
3
+ tmp*
app.py CHANGED
@@ -2,6 +2,8 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
 
5
 
6
 
7
  # (Keep Constants as is)
@@ -89,7 +91,7 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
89
  if file_name:
90
  # add the URL of the data source to the question (so that the agent can deal with it)
91
  file_url = f"{DEFAULT_API_URL}/files/{task_id}"
92
- question_text += f"\nFile URL: {file_url}"
93
  # get the extension of the file to help the agent
94
  try:
95
  ext = file_name.split('.')[-1]
@@ -104,6 +106,11 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
104
  submitted_answer = agent(question_text)
105
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
106
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
107
  except Exception as e:
108
  print(f"Error running agent on task {task_id}: {e}")
109
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ from time import sleep
6
+ from tqdm import tqdm
7
 
8
 
9
  # (Keep Constants as is)
 
91
  if file_name:
92
  # add the URL of the data source to the question (so that the agent can deal with it)
93
  file_url = f"{DEFAULT_API_URL}/files/{task_id}"
94
+ question_text += f'\nFile URL: "{file_url}"'
95
  # get the extension of the file to help the agent
96
  try:
97
  ext = file_name.split('.')[-1]
 
106
  submitted_answer = agent(question_text)
107
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
108
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
109
+
110
+ # wait 1 minute before next call to avoid reaching limit of token per minute (TPM)
111
+ print('\n\n-> Sleeping for 1 minute to avoid reaching limit of token per minute (TPM)')
112
+ for _ in tqdm(range(60)): # tqdm to see time we have to wait
113
+ sleep(1)
114
  except Exception as e:
115
  print(f"Error running agent on task {task_id}: {e}")
116
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
langgraph_dir/agent.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import json
2
 
3
  from typing import Literal
@@ -6,11 +7,11 @@ from langgraph.graph import MessagesState
6
  from langchain_core.messages import SystemMessage, HumanMessage, ToolMessage
7
  from langgraph.graph import StateGraph, START, END
8
  from langchain.agents import load_tools
9
- from langchain_community.tools.riza.command import ExecPython
10
 
11
  from .prompt import system_prompt
12
  from .custom_tools import (multiply, add, subtract, divide, modulus, power,
13
- query_image, automatic_speech_recognition)
14
 
15
 
16
  class LangGraphAgent:
@@ -20,20 +21,21 @@ class LangGraphAgent:
20
  show_prompt=True):
21
 
22
  # =========== LLM definition ===========
23
- llm = ChatOpenAI(model=model_name, temperature=0) # needs OPENAI_API_KEY
24
  print(f"LangGraphAgent initialized with model \"{model_name}\"")
25
 
26
  # =========== Augment the LLM with tools ===========
27
- community_tool_names = [
28
- "ddg-search", # DuckDuckGo search
29
- "wikipedia",
 
30
  ]
31
- community_tools = load_tools(community_tool_names)
32
- community_tools += [ExecPython(runtime_revision_id='01JT97GJ20BC83Y75WMAS364ZT')] # Riza code interpreter (needs RIZA_API_KEY) (not supported by load_tools, custom runtime with basic packages (pandas, numpy, etc.))
33
  custom_tools = [
34
- multiply, add, subtract, divide, modulus, power, # basic arithmetic
35
  query_image, # Ask anything about an image using a VLM
36
  automatic_speech_recognition, # Transcribe an audio file to text
 
 
37
  ]
38
 
39
  tools = community_tools + custom_tools
 
1
+ import os
2
  import json
3
 
4
  from typing import Literal
 
7
  from langchain_core.messages import SystemMessage, HumanMessage, ToolMessage
8
  from langgraph.graph import StateGraph, START, END
9
  from langchain.agents import load_tools
10
+ from langchain_community.tools import BraveSearch
11
 
12
  from .prompt import system_prompt
13
  from .custom_tools import (multiply, add, subtract, divide, modulus, power,
14
+ query_image, automatic_speech_recognition, get_webpage_content, python_repl_tool)
15
 
16
 
17
  class LangGraphAgent:
 
21
  show_prompt=True):
22
 
23
  # =========== LLM definition ===========
24
+ llm = ChatOpenAI(model=model_name, temperature=0) # needs OPENAI_API_KEY in env
25
  print(f"LangGraphAgent initialized with model \"{model_name}\"")
26
 
27
  # =========== Augment the LLM with tools ===========
28
+ community_tools = [
29
+ BraveSearch.from_api_key( # Web search (more performant than DuckDuckGo)
30
+ api_key=os.getenv("BRAVE_SEARCH_API_KEY"), # needs BRAVE_SEARCH_API_KEY in env
31
+ search_kwargs={"count": 3}),
32
  ]
 
 
33
  custom_tools = [
34
+ multiply, add, subtract, divide, modulus, power, # Basic arithmetic
35
  query_image, # Ask anything about an image using a VLM
36
  automatic_speech_recognition, # Transcribe an audio file to text
37
+ get_webpage_content, # Load a web page and return it to markdown
38
+ python_repl_tool, # Python code interpreter
39
  ]
40
 
41
  tools = community_tools + custom_tools
langgraph_dir/config.py CHANGED
@@ -1,3 +1,6 @@
1
  # OPENAI_MODEL_NAME = "gpt-4.1-nano" # Overall Score: 10.0% (2/20 correct)
2
  OPENAI_MODEL_NAME = "gpt-4.1-mini"
3
- # OPENAI_MODEL_NAME = "gpt-4.1"
 
 
 
 
1
  # OPENAI_MODEL_NAME = "gpt-4.1-nano" # Overall Score: 10.0% (2/20 correct)
2
  OPENAI_MODEL_NAME = "gpt-4.1-mini"
3
+ # OPENAI_MODEL_NAME = "gpt-4.1"
4
+
5
+ # QUERY_IMAGE_MODEL_NAME = "gpt-4.1-mini"
6
+ QUERY_IMAGE_MODEL_NAME = "o4-mini"
langgraph_dir/custom_tools.py CHANGED
@@ -1,7 +1,11 @@
1
  import requests
2
- from langchain_core.tools import tool
3
  from huggingface_hub import InferenceClient
4
  from openai import OpenAI
 
 
 
 
5
 
6
 
7
  # --- Basic operations --- #
@@ -116,10 +120,11 @@ def query_image(query: str, image_url: str) -> str:
116
  return completion.choices[0].message
117
 
118
  elif PROVIDER == 'openai':
119
- client = OpenAI()
120
 
 
121
  response = client.responses.create(
122
- model="gpt-4.1-mini",
123
  input=[{
124
  "role": "user",
125
  "content": [
@@ -180,3 +185,46 @@ def automatic_speech_recognition(file_url: str, file_extension: str) -> str:
180
 
181
  except Exception as e:
182
  return f"automatic_speech_recognition failed: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import requests
2
+ from pydantic import BaseModel, Field
3
  from huggingface_hub import InferenceClient
4
  from openai import OpenAI
5
+ from bs4 import BeautifulSoup
6
+ from markdownify import markdownify as md
7
+ from langchain_core.tools import tool, Tool
8
+ from langchain_experimental.utilities import PythonREPL
9
 
10
 
11
  # --- Basic operations --- #
 
120
  return completion.choices[0].message
121
 
122
  elif PROVIDER == 'openai':
123
+ from .config import QUERY_IMAGE_MODEL_NAME
124
 
125
+ client = OpenAI()
126
  response = client.responses.create(
127
+ model=QUERY_IMAGE_MODEL_NAME,
128
  input=[{
129
  "role": "user",
130
  "content": [
 
185
 
186
  except Exception as e:
187
  return f"automatic_speech_recognition failed: {e}"
188
+
189
+
190
+ @tool
191
+ def get_webpage_content(page_url: str) -> str:
192
+ """Load a web page and return it to markdown if possible
193
+
194
+ Args:
195
+ page_url (str): the URL of web page to get
196
+ """
197
+ try:
198
+ r = requests.get(page_url)
199
+ soup = BeautifulSoup((r.text), 'html.parser')
200
+ if soup.body:
201
+ # convert to markdown
202
+ out = md(str(soup.body))
203
+ else:
204
+ # return the raw content
205
+ out = r.text
206
+ return out
207
+ except Exception as e:
208
+ return f"get_webpage_content failed: {e}"
209
+
210
+
211
+ # ======= Python code interpreter =======
212
+ # WARNING: Python REPL can execute arbitrary code on the host machine (e.g., delete files, make network requests). Use with caution.
213
+
214
+ class PythonREPLInput(BaseModel):
215
+ code: str = Field(description="The Python code string to execute.")
216
+
217
+ python_repl = PythonREPL()
218
+
219
+ python_repl_tool = Tool(
220
+ name="python_repl",
221
+ description="""A Python REPL shell (Read-Eval-Print Loop).
222
+ Use this to execute single or multi-line python commands.
223
+ Input should be syntactically valid Python code.
224
+ Always end your code with `print(...)` to see the output.
225
+ Do NOT execute code that could be harmful to the host system.
226
+ You are allowed to download files from URLs.
227
+ Do NOT send commands that block indefinitely (e.g., `input()`).""",
228
+ func=python_repl.run,
229
+ args_schema=PythonREPLInput
230
+ )
langgraph_dir/prompt.py CHANGED
@@ -6,4 +6,5 @@ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma sepa
6
  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
7
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
8
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
 
9
  """
 
6
  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
7
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
8
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
9
+ If you use the python_repl tool (code interpreter), always end your code with `print(...)` to see the output.
10
  """
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  gradio
2
  requests
3
  llama-index
4
- llama-index-llms-huggingface-api @ git+https://github.com/guillaumefrd/llama_index.git@add-provider-HF-API#subdirectory=llama-index-integrations/llms/llama-index-llms-huggingface-api
5
  llama_index.tools.wikipedia
6
  llama_index.tools.duckduckgo
7
  llama_index.tools.code_interpreter
@@ -10,4 +10,6 @@ langgraph
10
  langchain-openai
11
  langchain-community
12
  duckduckgo-search
13
- rizaio
 
 
 
1
  gradio
2
  requests
3
  llama-index
4
+ llama-index-llms-huggingface-api
5
  llama_index.tools.wikipedia
6
  llama_index.tools.duckduckgo
7
  llama_index.tools.code_interpreter
 
10
  langchain-openai
11
  langchain-community
12
  duckduckgo-search
13
+ markdownify
14
+ beautifulsoup4
15
+ langchain_experimental