Facelook commited on
Commit
65a76ae
·
1 Parent(s): 0aa432c

Added langsearch.

Browse files
Files changed (2) hide show
  1. agent_tools.py +102 -11
  2. app.py +39 -16
agent_tools.py CHANGED
@@ -1,16 +1,18 @@
1
  import requests
 
2
  from bs4 import BeautifulSoup
3
 
4
- def duckduckgo_search(query: str, num_results: int = 3) -> list:
 
5
  """
6
  Perform a search using DuckDuckGo and return the results.
7
 
8
  Args:
9
  query: The search query string
10
- num_results: Maximum number of results to return (default: 3)
11
 
12
  Returns:
13
- List of dictionaries containing search results with title, url, and snippet
14
  """
15
  print(f"Performing DuckDuckGo search for: {query}")
16
 
@@ -21,7 +23,8 @@ def duckduckgo_search(query: str, num_results: int = 3) -> list:
21
 
22
  # Format the query for the URL
23
  formatted_query = query.replace(' ', '+')
24
- url = f"https://html.duckduckgo.com/html/?q={formatted_query}"
 
25
 
26
  # Send the request
27
  response = requests.get(url, headers=headers, timeout=10)
@@ -42,24 +45,86 @@ def duckduckgo_search(query: str, num_results: int = 3) -> list:
42
  url = link_elem.get('href') if link_elem.get('href') else link_elem.get_text(strip=True)
43
  snippet = snippet_elem.get_text(strip=True) if snippet_elem else ""
44
 
 
 
 
 
 
 
45
  results.append({
46
- "title": title,
47
- "url": url,
48
- "snippet": snippet
49
  })
50
 
51
- if len(results) >= num_results:
52
  break
53
 
54
- print(f"Found {len(results)} results for query: {query}")
55
  return results
56
  except Exception as e:
57
  print(f"Error during DuckDuckGo search: {e}")
58
  return []
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  # Dictionary mapping tool names to their functions
61
  TOOLS_MAPPING = {
62
- "duckduckgo_search": duckduckgo_search
 
63
  }
64
 
65
  # Tool definitions for LLM API
@@ -85,5 +150,31 @@ TOOLS_DEFINITION = [
85
  "required": ["query"]
86
  }
87
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  }
89
- ]
 
1
  import requests
2
+ import json
3
  from bs4 import BeautifulSoup
4
 
5
+
6
+ def duckduckgo_search(query: str, count: int = 3) -> list:
7
  """
8
  Perform a search using DuckDuckGo and return the results.
9
 
10
  Args:
11
  query: The search query string
12
+ count: Maximum number of results to return (default: 3)
13
 
14
  Returns:
15
+ List of search results
16
  """
17
  print(f"Performing DuckDuckGo search for: {query}")
18
 
 
23
 
24
  # Format the query for the URL
25
  formatted_query = query.replace(' ', '+')
26
+ # Format the URL with query and parameter to increase snippet size
27
+ url = f"https://html.duckduckgo.com/html/?q={formatted_query}&kl=wt-wt"
28
 
29
  # Send the request
30
  response = requests.get(url, headers=headers, timeout=10)
 
45
  url = link_elem.get('href') if link_elem.get('href') else link_elem.get_text(strip=True)
46
  snippet = snippet_elem.get_text(strip=True) if snippet_elem else ""
47
 
48
+ # results.append({
49
+ # "title": title,
50
+ # "url": url,
51
+ # "snippet": snippet
52
+ # })
53
+
54
  results.append({
55
+ "summary": snippet
 
 
56
  })
57
 
58
+ if len(results) >= count:
59
  break
60
 
61
+ print(f"DuckDuckGo results: {results}")
62
  return results
63
  except Exception as e:
64
  print(f"Error during DuckDuckGo search: {e}")
65
  return []
66
 
67
+
68
+ def langsearch_search(query: str, count: int = 5) -> list:
69
+ """
70
+ Perform a search using LangSearch API and return the results.
71
+
72
+ Args:
73
+ query: The search query string
74
+ count: Maximum number of results to return (default: 5)
75
+ api_key: LangSearch API key (default: None, will look for env variable)
76
+
77
+ Returns:
78
+ List of search results
79
+ """
80
+ print(f"Performing LangSearch search for: {query}")
81
+
82
+ try:
83
+ import os
84
+ # Use API key from parameters or environment variable
85
+ api_key = os.environ.get("LS_TOKEN")
86
+
87
+ if not api_key:
88
+ print("Warning: No LangSearch API key provided. Set LS_TOKEN environment variable.")
89
+ return []
90
+
91
+ headers = {
92
+ "Content-Type": "application/json",
93
+ "Authorization": f"Bearer {api_key}"
94
+ }
95
+
96
+ payload = json.dumps({
97
+ "query": query,
98
+ "freshness": "noLimit",
99
+ "summary": True,
100
+ "count": count
101
+ })
102
+
103
+ url = "https://api.langsearch.com/v1/web-search"
104
+
105
+ response = requests.post(url, headers=headers, data=payload, timeout=30)
106
+ response.raise_for_status()
107
+ print(f"LangSearch response status code: {response.status_code}")
108
+ if response.status_code != 200:
109
+ print(f"LangSearch API error: {response.text}")
110
+ return []
111
+ response = response.json()
112
+ results = []
113
+ for result in response["data"]["webPages"]["value"]:
114
+ results.append({
115
+ "summary": result["summary"]
116
+ })
117
+ print(f"LangSearch results: {results}")
118
+ return results
119
+ except Exception as e:
120
+ print(f"Error during LangSearch search: {e}")
121
+ return []
122
+
123
+
124
  # Dictionary mapping tool names to their functions
125
  TOOLS_MAPPING = {
126
+ "duckduckgo_search": duckduckgo_search,
127
+ "langsearch_search": langsearch_search
128
  }
129
 
130
  # Tool definitions for LLM API
 
150
  "required": ["query"]
151
  }
152
  }
153
+ },
154
+ {
155
+ "type": "function",
156
+ "function": {
157
+ "name": "langsearch_search",
158
+ "description": "Search the web using LangSearch API for more relevant results with deeper context",
159
+ "parameters": {
160
+ "type": "object",
161
+ "properties": {
162
+ "query": {
163
+ "type": "string",
164
+ "description": "The search query string"
165
+ },
166
+ "top_k": {
167
+ "type": "integer",
168
+ "description": "Maximum number of results to return",
169
+ "default": 5
170
+ },
171
+ "api_key": {
172
+ "type": "string",
173
+ "description": "LangSearch API key (optional, will use LANGSEARCH_API_KEY env var if not provided)"
174
+ }
175
+ },
176
+ "required": ["query"]
177
+ }
178
+ }
179
  }
180
+ ]
app.py CHANGED
@@ -4,33 +4,57 @@ import requests
4
  import json
5
  import pandas as pd
6
  from openai import OpenAI
7
- from agent_tools import duckduckgo_search, TOOLS_MAPPING, TOOLS_DEFINITION
8
 
9
 
10
- # (Keep Constants as is)
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
  # --- Basic Agent Definition ---
15
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
 
16
  class BasicAgent:
17
  def __init__(self):
18
  print("BasicAgent initialized.")
19
  self.client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=os.getenv("OR_TOKEN"))
20
 
21
- def duckduckgo_search(self, query: str, num_results: int = 3) -> list:
22
- """Wrapper that calls the external duckduckgo_search function"""
23
- return duckduckgo_search(query, num_results)
24
-
25
  def __call__(self, question: str) -> str:
26
  print(f"Agent received question: {question}")
27
 
28
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  messages = [
30
  {
31
  "role": "system",
32
- # "content": "You are a general AI assistant. I will ask you a question. Read the question carefully. Break down the question into multiple questions and use the tools available to you to answer the question. Do not report your thoughts, explanations, reasoning, or conclusion. Give only YOUR FINAL ANSWER. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
33
- "content": "Read the question carefully. Do not report your thoughts, explanations, reasoning, or conclusion. If you know the answer, give only YOUR FINAL ANSWER. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. On the other hand, if you don't know the answer, break down the question and list all questions that you want to search in a string array.",
34
  },
35
  {
36
  "role": "user",
@@ -49,7 +73,6 @@ class BasicAgent:
49
  }
50
  ]
51
 
52
- # Execute once
53
  for _ in range(3):
54
  # Generate response
55
  print("Using Inference API for generation...")
@@ -65,7 +88,7 @@ class BasicAgent:
65
  # model="mistralai/mistral-small-3.1-24b-instruct:free",
66
  # model="deepseek/deepseek-chat-v3-0324:free",
67
  model="deepseek/deepseek-r1",
68
- #tools=TOOLS_DEFINITION, # Use imported tools definition
69
  messages=messages,
70
  temperature=0.0,
71
  max_tokens=1024,
@@ -149,12 +172,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
149
  # print(f"An unexpected error occurred fetching questions: {e}")
150
  # return f"An unexpected error occurred fetching questions: {e}", None
151
  questions_data = [
152
- # {
153
- # 'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be',
154
- # 'question': 'How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.',
155
- # 'Level': '1',
156
- # 'file_name': ''
157
- # },
158
  # {
159
  # 'task_id': 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6',
160
  # 'question': 'In the video https:\\/\\/www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?',
 
4
  import json
5
  import pandas as pd
6
  from openai import OpenAI
7
+ from agent_tools import duckduckgo_search, langsearch_search, TOOLS_MAPPING, TOOLS_DEFINITION
8
 
9
 
 
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
13
  # --- Basic Agent Definition ---
14
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
15
+
16
+
17
  class BasicAgent:
18
  def __init__(self):
19
  print("BasicAgent initialized.")
20
  self.client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=os.getenv("OR_TOKEN"))
21
 
 
 
 
 
22
  def __call__(self, question: str) -> str:
23
  print(f"Agent received question: {question}")
24
 
25
  try:
26
+ content = "You are an assistant that has access to the following set of tools. Read the question carefully and do not report your thoughts, explanations, reasoning, or conclusion. Always use RAG. If you know the answer, give only YOUR FINAL ANSWER. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. On the other hand, if you really don't know the answer after your best efforts, break down the question and list all search queries in a string array."
27
+
28
+ count = 0
29
+
30
+ # Call duckduckgo_search function
31
+ # search_results = duckduckgo_search(query=question, count=10)
32
+ # if len(search_results) > 0:
33
+ # # Convert search results to a readable text format
34
+ # search_results_text = ""
35
+ # for i, result in enumerate(search_results, 1):
36
+ # count += 1
37
+ # search_results_text += f"\n\n---SEARCH RESULT #{count}---\n"
38
+ # search_results_text += f"{search_results[i - 1]}"
39
+ # content += f"\n\nThe following are the results from the DuckDuckGo API, you may use it as reference on top of your knowledge base: {search_results_text}"
40
+
41
+ # Call langsearch_search function
42
+ search_results = langsearch_search(query=question, count=5)
43
+ if len(search_results) > 0:
44
+ # Convert search results to a readable text format
45
+ search_results_text = ""
46
+ for i, result in enumerate(search_results, 1):
47
+ count += 1
48
+ search_results_text += f"\n\n---SEARCH RESULT #{count}---\n"
49
+ search_results_text += f"{search_results[i - 1]}"
50
+ content += f"\n\nThe following are the results from the LangSearch API, you may use it as reference on top of your knowledge base: {search_results_text}"
51
+
52
+ #print(f"Content for system message: {content}")
53
+
54
  messages = [
55
  {
56
  "role": "system",
57
+ "content": content
 
58
  },
59
  {
60
  "role": "user",
 
73
  }
74
  ]
75
 
 
76
  for _ in range(3):
77
  # Generate response
78
  print("Using Inference API for generation...")
 
88
  # model="mistralai/mistral-small-3.1-24b-instruct:free",
89
  # model="deepseek/deepseek-chat-v3-0324:free",
90
  model="deepseek/deepseek-r1",
91
+ # tools=TOOLS_DEFINITION, # Use imported tools definition
92
  messages=messages,
93
  temperature=0.0,
94
  max_tokens=1024,
 
172
  # print(f"An unexpected error occurred fetching questions: {e}")
173
  # return f"An unexpected error occurred fetching questions: {e}", None
174
  questions_data = [
175
+ {
176
+ 'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be',
177
+ 'question': 'How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.',
178
+ 'Level': '1',
179
+ 'file_name': ''
180
+ },
181
  # {
182
  # 'task_id': 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6',
183
  # 'question': 'In the video https:\\/\\/www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?',