errchh commited on
Commit
c233fe5
·
1 Parent(s): 350738c

update llm, search tools

Browse files
__pycache__/agent.cpython-312.pyc CHANGED
Binary files a/__pycache__/agent.cpython-312.pyc and b/__pycache__/agent.cpython-312.pyc differ
 
__pycache__/prompts.cpython-312.pyc ADDED
Binary file (1.08 kB). View file
 
agent.py CHANGED
@@ -14,23 +14,19 @@ from langchain_core.messages.ai import subtract_usage
14
 
15
  from langchain.tools import Tool
16
  from langchain_core.tools import tool
17
- from langchain_community.tools import WikipediaQueryRun
18
- from langchain_community.utilities import WikipediaAPIWrapper
19
- from langchain_community.tools import DuckDuckGoSearchResults
20
- from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
21
- from langchain_community.utilities import ArxivAPIWrapper
22
  from langchain_community.retrievers import BM25Retriever
23
 
24
  from langgraph.prebuilt import ToolNode, tools_condition
25
 
 
 
 
26
 
27
  # load environment variables
28
  load_dotenv()
29
- HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
30
- print(f"DEBUG: HUGGINGFACEHUB_API_TOKEN = {HUGGINGFACEHUB_API_TOKEN}")
31
- GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
32
- print(f"DEBUG: GOOGLE_API_KEY = {GOOGLE_API_KEY}")
33
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
34
 
35
 
36
  # maths tool
@@ -92,53 +88,56 @@ def modulus(a:int, b:int) -> int:
92
 
93
  # wikipedia search tool
94
  @tool
95
- def search_wiki(query: str) -> Dict[str, str]:
96
- """search wikipedia with a query
97
- args:
98
- query: a search query
99
- """
100
- docs = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
101
- docs.run(query)
102
- formatted_result = f'<Document source="{docs.metadata["source"]}" page="{docs.metadata.get("page", "")}"/>\n{docs.page_content}\n</Document>'
103
- return formatted_result
 
 
 
104
 
105
 
106
  # internet search tool
107
  @tool
108
- def search_web(query: str) -> Dict[str, str]:
109
- """search internet with a query
110
- args:
111
- query: a search query
112
- """
113
- wrapper = DuckDuckGoSearchAPIWrapper(region="en-us", max_results=2)
114
- docs = DuckDuckGoSearchResults(api_wrapper=wrapper)
115
- docs.invoke(query)
116
- formatted_result = f'<Document source="{docs.metadata["source"]}" page="{docs.metadata.get("page", "")}"/>\n{docs.page_content}\n</Document>'
117
- return formatted_result
 
 
118
 
119
 
120
  # ArXiv search tool
121
  @tool
122
- def search_arxiv(query: str) -> Dict[str, str]:
123
- """search ArXiv for the paper with the given identifier
124
- args:
125
- query: a search identifier
126
- """
127
- arxiv = ArxivAPIWrapper()
128
- docs = arxiv.run(query)
129
- formatted_result = f'<Document source="{docs.metadata["source"]}" page="{docs.metadata.get("page", "")}"/>\n{docs.page_content}\n</Document>'
130
- return formatted_result
 
 
 
131
 
132
 
133
  # build retriever
134
  # bm25_retriever = BM25Retriever.from_documents(docs)
135
 
136
 
137
- # load system prompt from file
138
- with open("system_prompt.txt", "r", encoding="utf-8") as f:
139
- system_prompt = f.read()
140
-
141
-
142
  # init system message
143
  sys_msg = SystemMessage(content=system_prompt)
144
 
@@ -159,7 +158,7 @@ tools = [
159
  def build_graph():
160
  # llm
161
  llm = ChatGroq(
162
- model="qwen-qwq-32b",
163
  temperature=0,
164
  )
165
  print(f"DEBUG: llm object = {llm}")
 
14
 
15
  from langchain.tools import Tool
16
  from langchain_core.tools import tool
17
+ from langchain_community.tools.tavily_search import TavilySearchResults
18
+ from langchain_community.document_loaders import WikipediaLoader
19
+ from langchain_community.document_loaders import ArxivLoader
 
 
20
  from langchain_community.retrievers import BM25Retriever
21
 
22
  from langgraph.prebuilt import ToolNode, tools_condition
23
 
24
+ # load system prompt
25
+ from prompts import system_prompt
26
+
27
 
28
  # load environment variables
29
  load_dotenv()
 
 
 
 
 
30
 
31
 
32
  # maths tool
 
88
 
89
  # wikipedia search tool
90
  @tool
91
+ def search_wiki(query: str) -> str:
92
+ """Search Wikipedia for a query and return maximum 2 results.
93
+
94
+ Args:
95
+ query: The search query."""
96
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
97
+ formatted_search_docs = "\n\n---\n\n".join(
98
+ [
99
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
100
+ for doc in search_docs
101
+ ])
102
+ return {"wiki_results": formatted_search_docs}
103
 
104
 
105
  # internet search tool
106
  @tool
107
+ def search_web(query: str) -> str:
108
+ """Search Tavily for a query and return maximum 3 results.
109
+
110
+ Args:
111
+ query: The search query."""
112
+ search_docs = TavilySearchResults(max_results=3).invoke(query=query)
113
+ formatted_search_docs = "\n\n---\n\n".join(
114
+ [
115
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
116
+ for doc in search_docs
117
+ ])
118
+ return {"web_results": formatted_search_docs}
119
 
120
 
121
  # ArXiv search tool
122
  @tool
123
+ def search_arxiv(query: str) -> str:
124
+ """Search Arxiv for a query and return maximum 3 result.
125
+
126
+ Args:
127
+ query: The search query."""
128
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
129
+ formatted_search_docs = "\n\n---\n\n".join(
130
+ [
131
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
132
+ for doc in search_docs
133
+ ])
134
+ return {"arvix_results": formatted_search_docs}
135
 
136
 
137
  # build retriever
138
  # bm25_retriever = BM25Retriever.from_documents(docs)
139
 
140
 
 
 
 
 
 
141
  # init system message
142
  sys_msg = SystemMessage(content=system_prompt)
143
 
 
158
  def build_graph():
159
  # llm
160
  llm = ChatGroq(
161
+ model="meta-llama/llama-4-scout-17b-16e-instruct",
162
  temperature=0,
163
  )
164
  print(f"DEBUG: llm object = {llm}")
system_prompt.txt → prompts.py RENAMED
@@ -1,5 +1,6 @@
1
- You are a helpful assistant tasked with answering questions using a set of tools.
2
  Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
3
  FINAL ANSWER: [YOUR FINAL ANSWER].
4
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
5
  Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
 
 
1
+ system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools.
2
  Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
3
  FINAL ANSWER: [YOUR FINAL ANSWER].
4
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
5
  Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
6
+ """
pyproject.toml CHANGED
@@ -18,5 +18,6 @@ dependencies = [
18
  "pandas>=2.2.3",
19
  "rank-bm25>=0.2.2",
20
  "requests>=2.32.3",
 
21
  "wikipedia>=1.4.0",
22
  ]
 
18
  "pandas>=2.2.3",
19
  "rank-bm25>=0.2.2",
20
  "requests>=2.32.3",
21
+ "tavily-python>=0.7.2",
22
  "wikipedia>=1.4.0",
23
  ]
requirements.txt CHANGED
@@ -11,4 +11,5 @@ langgraph
11
  pandas
12
  rank-bm25
13
  requests
 
14
  wikipedia
 
11
  pandas
12
  rank-bm25
13
  requests
14
+ tavily-python
15
  wikipedia
uv.lock CHANGED
The diff for this file is too large to render. See raw diff