Spaces:
Sleeping
Sleeping
Added langsearch.
Browse files- agent_tools.py +102 -11
- app.py +39 -16
agent_tools.py
CHANGED
@@ -1,16 +1,18 @@
|
|
1 |
import requests
|
|
|
2 |
from bs4 import BeautifulSoup
|
3 |
|
4 |
-
|
|
|
5 |
"""
|
6 |
Perform a search using DuckDuckGo and return the results.
|
7 |
|
8 |
Args:
|
9 |
query: The search query string
|
10 |
-
|
11 |
|
12 |
Returns:
|
13 |
-
List of
|
14 |
"""
|
15 |
print(f"Performing DuckDuckGo search for: {query}")
|
16 |
|
@@ -21,7 +23,8 @@ def duckduckgo_search(query: str, num_results: int = 3) -> list:
|
|
21 |
|
22 |
# Format the query for the URL
|
23 |
formatted_query = query.replace(' ', '+')
|
24 |
-
|
|
|
25 |
|
26 |
# Send the request
|
27 |
response = requests.get(url, headers=headers, timeout=10)
|
@@ -42,24 +45,86 @@ def duckduckgo_search(query: str, num_results: int = 3) -> list:
|
|
42 |
url = link_elem.get('href') if link_elem.get('href') else link_elem.get_text(strip=True)
|
43 |
snippet = snippet_elem.get_text(strip=True) if snippet_elem else ""
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
results.append({
|
46 |
-
"
|
47 |
-
"url": url,
|
48 |
-
"snippet": snippet
|
49 |
})
|
50 |
|
51 |
-
if len(results) >=
|
52 |
break
|
53 |
|
54 |
-
print(f"
|
55 |
return results
|
56 |
except Exception as e:
|
57 |
print(f"Error during DuckDuckGo search: {e}")
|
58 |
return []
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
# Dictionary mapping tool names to their functions
|
61 |
TOOLS_MAPPING = {
|
62 |
-
"duckduckgo_search": duckduckgo_search
|
|
|
63 |
}
|
64 |
|
65 |
# Tool definitions for LLM API
|
@@ -85,5 +150,31 @@ TOOLS_DEFINITION = [
|
|
85 |
"required": ["query"]
|
86 |
}
|
87 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
}
|
89 |
-
]
|
|
|
1 |
import requests
|
2 |
+
import json
|
3 |
from bs4 import BeautifulSoup
|
4 |
|
5 |
+
|
6 |
+
def duckduckgo_search(query: str, count: int = 3) -> list:
|
7 |
"""
|
8 |
Perform a search using DuckDuckGo and return the results.
|
9 |
|
10 |
Args:
|
11 |
query: The search query string
|
12 |
+
count: Maximum number of results to return (default: 3)
|
13 |
|
14 |
Returns:
|
15 |
+
List of search results
|
16 |
"""
|
17 |
print(f"Performing DuckDuckGo search for: {query}")
|
18 |
|
|
|
23 |
|
24 |
# Format the query for the URL
|
25 |
formatted_query = query.replace(' ', '+')
|
26 |
+
# Format the URL with query and parameter to increase snippet size
|
27 |
+
url = f"https://html.duckduckgo.com/html/?q={formatted_query}&kl=wt-wt"
|
28 |
|
29 |
# Send the request
|
30 |
response = requests.get(url, headers=headers, timeout=10)
|
|
|
45 |
url = link_elem.get('href') if link_elem.get('href') else link_elem.get_text(strip=True)
|
46 |
snippet = snippet_elem.get_text(strip=True) if snippet_elem else ""
|
47 |
|
48 |
+
# results.append({
|
49 |
+
# "title": title,
|
50 |
+
# "url": url,
|
51 |
+
# "snippet": snippet
|
52 |
+
# })
|
53 |
+
|
54 |
results.append({
|
55 |
+
"summary": snippet
|
|
|
|
|
56 |
})
|
57 |
|
58 |
+
if len(results) >= count:
|
59 |
break
|
60 |
|
61 |
+
print(f"DuckDuckGo results: {results}")
|
62 |
return results
|
63 |
except Exception as e:
|
64 |
print(f"Error during DuckDuckGo search: {e}")
|
65 |
return []
|
66 |
|
67 |
+
|
68 |
+
def langsearch_search(query: str, count: int = 5) -> list:
|
69 |
+
"""
|
70 |
+
Perform a search using LangSearch API and return the results.
|
71 |
+
|
72 |
+
Args:
|
73 |
+
query: The search query string
|
74 |
+
count: Maximum number of results to return (default: 5)
|
75 |
+
api_key: LangSearch API key (default: None, will look for env variable)
|
76 |
+
|
77 |
+
Returns:
|
78 |
+
List of search results
|
79 |
+
"""
|
80 |
+
print(f"Performing LangSearch search for: {query}")
|
81 |
+
|
82 |
+
try:
|
83 |
+
import os
|
84 |
+
# Use API key from parameters or environment variable
|
85 |
+
api_key = os.environ.get("LS_TOKEN")
|
86 |
+
|
87 |
+
if not api_key:
|
88 |
+
print("Warning: No LangSearch API key provided. Set LS_TOKEN environment variable.")
|
89 |
+
return []
|
90 |
+
|
91 |
+
headers = {
|
92 |
+
"Content-Type": "application/json",
|
93 |
+
"Authorization": f"Bearer {api_key}"
|
94 |
+
}
|
95 |
+
|
96 |
+
payload = json.dumps({
|
97 |
+
"query": query,
|
98 |
+
"freshness": "noLimit",
|
99 |
+
"summary": True,
|
100 |
+
"count": count
|
101 |
+
})
|
102 |
+
|
103 |
+
url = "https://api.langsearch.com/v1/web-search"
|
104 |
+
|
105 |
+
response = requests.post(url, headers=headers, data=payload, timeout=30)
|
106 |
+
response.raise_for_status()
|
107 |
+
print(f"LangSearch response status code: {response.status_code}")
|
108 |
+
if response.status_code != 200:
|
109 |
+
print(f"LangSearch API error: {response.text}")
|
110 |
+
return []
|
111 |
+
response = response.json()
|
112 |
+
results = []
|
113 |
+
for result in response["data"]["webPages"]["value"]:
|
114 |
+
results.append({
|
115 |
+
"summary": result["summary"]
|
116 |
+
})
|
117 |
+
print(f"LangSearch results: {results}")
|
118 |
+
return results
|
119 |
+
except Exception as e:
|
120 |
+
print(f"Error during LangSearch search: {e}")
|
121 |
+
return []
|
122 |
+
|
123 |
+
|
124 |
# Dictionary mapping tool names to their functions
|
125 |
TOOLS_MAPPING = {
|
126 |
+
"duckduckgo_search": duckduckgo_search,
|
127 |
+
"langsearch_search": langsearch_search
|
128 |
}
|
129 |
|
130 |
# Tool definitions for LLM API
|
|
|
150 |
"required": ["query"]
|
151 |
}
|
152 |
}
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"type": "function",
|
156 |
+
"function": {
|
157 |
+
"name": "langsearch_search",
|
158 |
+
"description": "Search the web using LangSearch API for more relevant results with deeper context",
|
159 |
+
"parameters": {
|
160 |
+
"type": "object",
|
161 |
+
"properties": {
|
162 |
+
"query": {
|
163 |
+
"type": "string",
|
164 |
+
"description": "The search query string"
|
165 |
+
},
|
166 |
+
"top_k": {
|
167 |
+
"type": "integer",
|
168 |
+
"description": "Maximum number of results to return",
|
169 |
+
"default": 5
|
170 |
+
},
|
171 |
+
"api_key": {
|
172 |
+
"type": "string",
|
173 |
+
"description": "LangSearch API key (optional, will use LANGSEARCH_API_KEY env var if not provided)"
|
174 |
+
}
|
175 |
+
},
|
176 |
+
"required": ["query"]
|
177 |
+
}
|
178 |
+
}
|
179 |
}
|
180 |
+
]
|
app.py
CHANGED
@@ -4,33 +4,57 @@ import requests
|
|
4 |
import json
|
5 |
import pandas as pd
|
6 |
from openai import OpenAI
|
7 |
-
from agent_tools import duckduckgo_search, TOOLS_MAPPING, TOOLS_DEFINITION
|
8 |
|
9 |
|
10 |
-
# (Keep Constants as is)
|
11 |
# --- Constants ---
|
12 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
13 |
|
14 |
# --- Basic Agent Definition ---
|
15 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
|
|
|
|
16 |
class BasicAgent:
|
17 |
def __init__(self):
|
18 |
print("BasicAgent initialized.")
|
19 |
self.client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=os.getenv("OR_TOKEN"))
|
20 |
|
21 |
-
def duckduckgo_search(self, query: str, num_results: int = 3) -> list:
|
22 |
-
"""Wrapper that calls the external duckduckgo_search function"""
|
23 |
-
return duckduckgo_search(query, num_results)
|
24 |
-
|
25 |
def __call__(self, question: str) -> str:
|
26 |
print(f"Agent received question: {question}")
|
27 |
|
28 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
messages = [
|
30 |
{
|
31 |
"role": "system",
|
32 |
-
|
33 |
-
"content": "Read the question carefully. Do not report your thoughts, explanations, reasoning, or conclusion. If you know the answer, give only YOUR FINAL ANSWER. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. On the other hand, if you don't know the answer, break down the question and list all questions that you want to search in a string array.",
|
34 |
},
|
35 |
{
|
36 |
"role": "user",
|
@@ -49,7 +73,6 @@ class BasicAgent:
|
|
49 |
}
|
50 |
]
|
51 |
|
52 |
-
# Execute once
|
53 |
for _ in range(3):
|
54 |
# Generate response
|
55 |
print("Using Inference API for generation...")
|
@@ -65,7 +88,7 @@ class BasicAgent:
|
|
65 |
# model="mistralai/mistral-small-3.1-24b-instruct:free",
|
66 |
# model="deepseek/deepseek-chat-v3-0324:free",
|
67 |
model="deepseek/deepseek-r1",
|
68 |
-
#tools=TOOLS_DEFINITION, # Use imported tools definition
|
69 |
messages=messages,
|
70 |
temperature=0.0,
|
71 |
max_tokens=1024,
|
@@ -149,12 +172,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
149 |
# print(f"An unexpected error occurred fetching questions: {e}")
|
150 |
# return f"An unexpected error occurred fetching questions: {e}", None
|
151 |
questions_data = [
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
# {
|
159 |
# 'task_id': 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6',
|
160 |
# 'question': 'In the video https:\\/\\/www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?',
|
|
|
4 |
import json
|
5 |
import pandas as pd
|
6 |
from openai import OpenAI
|
7 |
+
from agent_tools import duckduckgo_search, langsearch_search, TOOLS_MAPPING, TOOLS_DEFINITION
|
8 |
|
9 |
|
|
|
10 |
# --- Constants ---
|
11 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
12 |
|
13 |
# --- Basic Agent Definition ---
|
14 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
15 |
+
|
16 |
+
|
17 |
class BasicAgent:
|
18 |
def __init__(self):
|
19 |
print("BasicAgent initialized.")
|
20 |
self.client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=os.getenv("OR_TOKEN"))
|
21 |
|
|
|
|
|
|
|
|
|
22 |
def __call__(self, question: str) -> str:
|
23 |
print(f"Agent received question: {question}")
|
24 |
|
25 |
try:
|
26 |
+
content = "You are an assistant that has access to the following set of tools. Read the question carefully and do not report your thoughts, explanations, reasoning, or conclusion. Always use RAG. If you know the answer, give only YOUR FINAL ANSWER. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. On the other hand, if you really don't know the answer after your best efforts, break down the question and list all search queries in a string array."
|
27 |
+
|
28 |
+
count = 0
|
29 |
+
|
30 |
+
# Call duckduckgo_search function
|
31 |
+
# search_results = duckduckgo_search(query=question, count=10)
|
32 |
+
# if len(search_results) > 0:
|
33 |
+
# # Convert search results to a readable text format
|
34 |
+
# search_results_text = ""
|
35 |
+
# for i, result in enumerate(search_results, 1):
|
36 |
+
# count += 1
|
37 |
+
# search_results_text += f"\n\n---SEARCH RESULT #{count}---\n"
|
38 |
+
# search_results_text += f"{search_results[i - 1]}"
|
39 |
+
# content += f"\n\nThe following are the results from the DuckDuckGo API, you may use it as reference on top of your knowledge base: {search_results_text}"
|
40 |
+
|
41 |
+
# Call langsearch_search function
|
42 |
+
search_results = langsearch_search(query=question, count=5)
|
43 |
+
if len(search_results) > 0:
|
44 |
+
# Convert search results to a readable text format
|
45 |
+
search_results_text = ""
|
46 |
+
for i, result in enumerate(search_results, 1):
|
47 |
+
count += 1
|
48 |
+
search_results_text += f"\n\n---SEARCH RESULT #{count}---\n"
|
49 |
+
search_results_text += f"{search_results[i - 1]}"
|
50 |
+
content += f"\n\nThe following are the results from the LangSearch API, you may use it as reference on top of your knowledge base: {search_results_text}"
|
51 |
+
|
52 |
+
#print(f"Content for system message: {content}")
|
53 |
+
|
54 |
messages = [
|
55 |
{
|
56 |
"role": "system",
|
57 |
+
"content": content
|
|
|
58 |
},
|
59 |
{
|
60 |
"role": "user",
|
|
|
73 |
}
|
74 |
]
|
75 |
|
|
|
76 |
for _ in range(3):
|
77 |
# Generate response
|
78 |
print("Using Inference API for generation...")
|
|
|
88 |
# model="mistralai/mistral-small-3.1-24b-instruct:free",
|
89 |
# model="deepseek/deepseek-chat-v3-0324:free",
|
90 |
model="deepseek/deepseek-r1",
|
91 |
+
# tools=TOOLS_DEFINITION, # Use imported tools definition
|
92 |
messages=messages,
|
93 |
temperature=0.0,
|
94 |
max_tokens=1024,
|
|
|
172 |
# print(f"An unexpected error occurred fetching questions: {e}")
|
173 |
# return f"An unexpected error occurred fetching questions: {e}", None
|
174 |
questions_data = [
|
175 |
+
{
|
176 |
+
'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be',
|
177 |
+
'question': 'How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.',
|
178 |
+
'Level': '1',
|
179 |
+
'file_name': ''
|
180 |
+
},
|
181 |
# {
|
182 |
# 'task_id': 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6',
|
183 |
# 'question': 'In the video https:\\/\\/www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?',
|