guillaumefrd commited on
Commit
3568413
·
1 Parent(s): 283e426

add more advanced tools (query image, ASR, code interpreter)

Browse files
app.py CHANGED
@@ -104,7 +104,6 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
104
  submitted_answer = agent(question_text)
105
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
106
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
107
- agent.ctx.clear() # clear context for next question
108
  except Exception as e:
109
  print(f"Error running agent on task {task_id}: {e}")
110
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
104
  submitted_answer = agent(question_text)
105
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
106
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
107
  except Exception as e:
108
  print(f"Error running agent on task {task_id}: {e}")
109
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
langgraph_dir/agent.py CHANGED
@@ -9,7 +9,8 @@ from langchain.agents import load_tools
9
  from langchain_community.tools.riza.command import ExecPython
10
 
11
  from .prompt import system_prompt
12
- from .custom_tools import multiply, add, subtract, divide, modulus, power
 
13
 
14
 
15
  class LangGraphAgent:
@@ -28,18 +29,17 @@ class LangGraphAgent:
28
  "wikipedia",
29
  ]
30
  community_tools = load_tools(community_tool_names)
31
- community_tools += [ExecPython()] # Riza code interpreter (needs RIZA_API_KEY) (not supported by load_tools)
32
- custom_tools = [multiply, add, subtract, divide, modulus, power]
 
 
 
 
 
33
  tools = community_tools + custom_tools
34
  tools_by_name = {tool.name: tool for tool in tools}
35
  llm_with_tools = llm.bind_tools(tools)
36
 
37
- # tool_spec_list += WikipediaToolSpec().to_tool_list()
38
- # tool_spec_list += DuckDuckGoSearchToolSpec().to_tool_list()
39
- # tool_spec_list += CodeInterpreterToolSpec().to_tool_list()
40
- # tool_spec_list += [query_image_tool, automatic_speech_recognition_tool]
41
-
42
-
43
  # =========== Agent definition ===========
44
 
45
  # Nodes
 
9
  from langchain_community.tools.riza.command import ExecPython
10
 
11
  from .prompt import system_prompt
12
+ from .custom_tools import (multiply, add, subtract, divide, modulus, power,
13
+ query_image, automatic_speech_recognition)
14
 
15
 
16
  class LangGraphAgent:
 
29
  "wikipedia",
30
  ]
31
  community_tools = load_tools(community_tool_names)
32
+ community_tools += [ExecPython(runtime_revision_id='01JT97GJ20BC83Y75WMAS364ZT')] # Riza code interpreter (needs RIZA_API_KEY) (not supported by load_tools, custom runtime with basic packages (pandas, numpy, etc.))
33
+ custom_tools = [
34
+ multiply, add, subtract, divide, modulus, power, # basic arithmetic
35
+ query_image, # Ask anything about an image using a VLM
36
+ automatic_speech_recognition, # Transcribe an audio file to text
37
+ ]
38
+
39
  tools = community_tools + custom_tools
40
  tools_by_name = {tool.name: tool for tool in tools}
41
  llm_with_tools = llm.bind_tools(tools)
42
 
 
 
 
 
 
 
43
  # =========== Agent definition ===========
44
 
45
  # Nodes
langgraph_dir/custom_tools.py CHANGED
@@ -1,9 +1,12 @@
1
  from langchain_core.tools import tool
 
 
 
2
 
3
  @tool
4
  def multiply(a: float, b: float) -> float:
5
- """
6
- Multiplies two numbers.
7
  Args:
8
  a (float): the first number
9
  b (float): the second number
@@ -13,8 +16,8 @@ def multiply(a: float, b: float) -> float:
13
 
14
  @tool
15
  def add(a: float, b: float) -> float:
16
- """
17
- Adds two numbers.
18
  Args:
19
  a (float): the first number
20
  b (float): the second number
@@ -24,8 +27,8 @@ def add(a: float, b: float) -> float:
24
 
25
  @tool
26
  def subtract(a: float, b: float) -> int:
27
- """
28
- Subtracts two numbers.
29
  Args:
30
  a (float): the first number
31
  b (float): the second number
@@ -35,8 +38,8 @@ def subtract(a: float, b: float) -> int:
35
 
36
  @tool
37
  def divide(a: float, b: float) -> float:
38
- """
39
- Divides two numbers.
40
  Args:
41
  a (float): the first float number
42
  b (float): the second float number
@@ -48,8 +51,8 @@ def divide(a: float, b: float) -> float:
48
 
49
  @tool
50
  def modulus(a: int, b: int) -> int:
51
- """
52
- Get the modulus of two numbers.
53
  Args:
54
  a (int): the first number
55
  b (int): the second number
@@ -59,10 +62,64 @@ def modulus(a: int, b: int) -> int:
59
 
60
  @tool
61
  def power(a: float, b: float) -> float:
62
- """
63
- Get the power of two numbers.
64
  Args:
65
  a (float): the first number
66
  b (float): the second number
67
  """
68
  return a**b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from langchain_core.tools import tool
2
+ from huggingface_hub import InferenceClient
3
+
4
+ # --- Basic operations --- #
5
 
6
  @tool
7
  def multiply(a: float, b: float) -> float:
8
+ """Multiplies two numbers.
9
+
10
  Args:
11
  a (float): the first number
12
  b (float): the second number
 
16
 
17
  @tool
18
  def add(a: float, b: float) -> float:
19
+ """Adds two numbers.
20
+
21
  Args:
22
  a (float): the first number
23
  b (float): the second number
 
27
 
28
  @tool
29
  def subtract(a: float, b: float) -> int:
30
+ """Subtracts two numbers.
31
+
32
  Args:
33
  a (float): the first number
34
  b (float): the second number
 
38
 
39
  @tool
40
  def divide(a: float, b: float) -> float:
41
+ """Divides two numbers.
42
+
43
  Args:
44
  a (float): the first float number
45
  b (float): the second float number
 
51
 
52
  @tool
53
  def modulus(a: int, b: int) -> int:
54
+ """Get the modulus of two numbers.
55
+
56
  Args:
57
  a (int): the first number
58
  b (int): the second number
 
62
 
63
  @tool
64
  def power(a: float, b: float) -> float:
65
+ """Get the power of two numbers.
66
+
67
  Args:
68
  a (float): the first number
69
  b (float): the second number
70
  """
71
  return a**b
72
+
73
+
74
+ # --- Functions --- #
75
+
76
+ @tool
77
+ def query_image(query: str, image_url: str) -> str:
78
+ """Ask anything about an image using a Vision Language Model
79
+
80
+ Args:
81
+ query (str): the query about the image, e.g. how many persons are on the image?
82
+ image_url (str): the URL to the image
83
+ """
84
+
85
+ client = InferenceClient(provider="nebius")
86
+ try:
87
+ completion = client.chat.completions.create(
88
+ # model="google/gemma-3-27b-it",
89
+ model="Qwen/Qwen2.5-VL-72B-Instruct",
90
+ messages=[
91
+ {
92
+ "role": "user",
93
+ "content": [
94
+ {
95
+ "type": "text",
96
+ "text": query
97
+ },
98
+ {
99
+ "type": "image_url",
100
+ "image_url": {
101
+ "url": image_url
102
+ }
103
+ }
104
+ ]
105
+ }
106
+ ],
107
+ max_tokens=512,
108
+ )
109
+ return completion.choices[0].message
110
+
111
+ except Exception as e:
112
+ return f"query_image failed: {e}"
113
+
114
+ @tool
115
+ def automatic_speech_recognition(file_url: str) -> str:
116
+ """Transcribe an audio file to text
117
+
118
+ Args:
119
+ file_url (str): the URL to the audio file
120
+ """
121
+ client = InferenceClient(provider="fal-ai")
122
+ try:
123
+ return client.automatic_speech_recognition(file_url, model="openai/whisper-large-v3")
124
+ except Exception as e:
125
+ return f"automatic_speech_recognition failed: {e}"
llamaindex_dir/agent.py CHANGED
@@ -71,4 +71,8 @@ class LLamaIndexAgent:
71
  print('Could not split response on "FINAL ANSWER:"')
72
  print("\n\n"+"-"*50)
73
  print(f"Agent returning with answer: {response}")
 
 
 
 
74
  return response
 
71
  print('Could not split response on "FINAL ANSWER:"')
72
  print("\n\n"+"-"*50)
73
  print(f"Agent returning with answer: {response}")
74
+
75
+ # clear context for next question before returning
76
+ self.ctx.clear()
77
+
78
  return response