josondev commited on
Commit
b857b00
·
verified ·
1 Parent(s): 2c0f430

Update veryfinal.py

Browse files
Files changed (1) hide show
  1. veryfinal.py +131 -92
veryfinal.py CHANGED
@@ -1,4 +1,4 @@
1
- """LangGraph Agent with FAISS Vector Store and Custom Tools"""
2
  import os, time, random
3
  from dotenv import load_dotenv
4
  from typing import List, Dict, Any, TypedDict, Annotated
@@ -26,11 +26,13 @@ from langchain_community.document_loaders import JSONLoader
26
 
27
  load_dotenv()
28
 
29
- # Advanced Rate Limiter (SILENT)
30
  class AdvancedRateLimiter:
31
- def __init__(self, requests_per_minute: int):
32
  self.requests_per_minute = requests_per_minute
 
33
  self.request_times = []
 
34
 
35
  def wait_if_needed(self):
36
  current_time = time.time()
@@ -42,72 +44,139 @@ class AdvancedRateLimiter:
42
  wait_time = 60 - (current_time - self.request_times[0]) + random.uniform(2, 8)
43
  time.sleep(wait_time)
44
 
 
 
 
 
 
45
  # Record this request
46
  self.request_times.append(current_time)
 
 
 
 
 
 
 
 
 
 
47
 
48
- # Initialize rate limiters
49
- groq_limiter = AdvancedRateLimiter(requests_per_minute=30)
50
- gemini_limiter = AdvancedRateLimiter(requests_per_minute=2)
51
- nvidia_limiter = AdvancedRateLimiter(requests_per_minute=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # Custom Tools
54
  @tool
55
  def multiply(a: int, b: int) -> int:
56
- """Multiply two numbers.
57
- Args:
58
- a: first int
59
- b: second int
60
- """
61
  return a * b
62
 
63
  @tool
64
  def add(a: int, b: int) -> int:
65
- """Add two numbers.
66
-
67
- Args:
68
- a: first int
69
- b: second int
70
- """
71
  return a + b
72
 
73
  @tool
74
  def subtract(a: int, b: int) -> int:
75
- """Subtract two numbers.
76
-
77
- Args:
78
- a: first int
79
- b: second int
80
- """
81
  return a - b
82
 
83
  @tool
84
  def divide(a: int, b: int) -> float:
85
- """Divide two numbers.
86
-
87
- Args:
88
- a: first int
89
- b: second int
90
- """
91
  if b == 0:
92
  raise ValueError("Cannot divide by zero.")
93
  return a / b
94
 
95
  @tool
96
  def modulus(a: int, b: int) -> int:
97
- """Get the modulus of two numbers.
98
-
99
- Args:
100
- a: first int
101
- b: second int
102
- """
103
  return a % b
104
 
105
  @tool
106
  def wiki_search(query: str) -> str:
107
- """Search Wikipedia for a query and return maximum 2 results.
108
-
109
- Args:
110
- query: The search query."""
111
  try:
112
  time.sleep(random.uniform(1, 3))
113
  search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
@@ -122,10 +191,7 @@ def wiki_search(query: str) -> str:
122
 
123
  @tool
124
  def web_search(query: str) -> str:
125
- """Search Tavily for a query and return maximum 3 results.
126
-
127
- Args:
128
- query: The search query."""
129
  try:
130
  time.sleep(random.uniform(2, 5))
131
  search_docs = TavilySearchResults(max_results=3).invoke(query=query)
@@ -140,10 +206,7 @@ def web_search(query: str) -> str:
140
 
141
  @tool
142
  def arvix_search(query: str) -> str:
143
- """Search Arxiv for a query and return maximum 3 result.
144
-
145
- Args:
146
- query: The search query."""
147
  try:
148
  time.sleep(random.uniform(1, 4))
149
  search_docs = ArxivLoader(query=query, load_max_docs=3).load()
@@ -156,7 +219,7 @@ def arvix_search(query: str) -> str:
156
  except Exception as e:
157
  return f"ArXiv search failed: {str(e)}"
158
 
159
- # Load and process JSONL data for FAISS vector store
160
  def setup_faiss_vector_store():
161
  """Setup FAISS vector database from JSONL metadata"""
162
  try:
@@ -177,15 +240,12 @@ def setup_faiss_vector_store():
177
  }
178
  """
179
 
180
- # Load documents
181
  json_loader = JSONLoader(file_path="metadata.jsonl", jq_schema=jq_schema, json_lines=True, text_content=False)
182
  json_docs = json_loader.load()
183
 
184
- # Split documents
185
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=200)
186
  json_chunks = text_splitter.split_documents(json_docs)
187
 
188
- # Create FAISS vector store
189
  embeddings = NVIDIAEmbeddings(
190
  model="nvidia/nv-embedqa-e5-v5",
191
  api_key=os.getenv("NVIDIA_API_KEY")
@@ -205,13 +265,11 @@ except FileNotFoundError:
205
  system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools.
206
  Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
207
  FINAL ANSWER: [YOUR FINAL ANSWER].
208
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
209
- Your answer should only start with "FINAL ANSWER: ", then follows with the answer."""
210
 
211
- # System message
212
  sys_msg = SystemMessage(content=system_prompt)
213
 
214
- # Setup FAISS vector store and retriever
215
  vector_store = setup_faiss_vector_store()
216
  if vector_store:
217
  retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
@@ -224,47 +282,30 @@ else:
224
  retriever_tool = None
225
 
226
  # All tools
227
- all_tools = [
228
- multiply,
229
- add,
230
- subtract,
231
- divide,
232
- modulus,
233
- wiki_search,
234
- web_search,
235
- arvix_search,
236
- ]
237
-
238
  if retriever_tool:
239
  all_tools.append(retriever_tool)
240
 
241
- # Build graph function
242
- def build_graph(provider: str = "groq"):
243
- """Build the LangGraph with rate limiting"""
244
 
245
- # Initialize LLMs with best free models
246
- if provider == "google":
247
- llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-thinking-exp", temperature=0)
248
- elif provider == "groq":
249
- llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)
250
- elif provider == "nvidia":
251
- llm = ChatNVIDIA(model="meta/llama-3.1-70b-instruct", temperature=0)
252
- else:
253
- raise ValueError("Invalid provider. Choose 'google', 'groq' or 'nvidia'.")
254
 
255
- # Bind tools to LLM
256
- llm_with_tools = llm.bind_tools(all_tools)
 
 
 
 
 
 
 
 
257
 
258
  # Node functions
259
  def assistant(state: MessagesState):
260
- """Assistant node with rate limiting"""
261
- if provider == "groq":
262
- groq_limiter.wait_if_needed()
263
- elif provider == "google":
264
- gemini_limiter.wait_if_needed()
265
- elif provider == "nvidia":
266
- nvidia_limiter.wait_if_needed()
267
-
268
  return {"messages": [llm_with_tools.invoke(state["messages"])]}
269
 
270
  def retriever_node(state: MessagesState):
@@ -299,9 +340,7 @@ def build_graph(provider: str = "groq"):
299
  # Test
300
  if __name__ == "__main__":
301
  question = "What are the names of the US presidents who were assassinated?"
302
- # Build the graph
303
- graph = build_graph(provider="groq")
304
- # Run the graph
305
  messages = [HumanMessage(content=question)]
306
  config = {"configurable": {"thread_id": "test_thread"}}
307
  result = graph.invoke({"messages": messages}, config)
 
1
+ """LangGraph Agent with Best Free Models and Minimal Rate Limits"""
2
  import os, time, random
3
  from dotenv import load_dotenv
4
  from typing import List, Dict, Any, TypedDict, Annotated
 
26
 
27
  load_dotenv()
28
 
29
+ # Advanced Rate Limiter with Exponential Backoff
30
  class AdvancedRateLimiter:
31
+ def __init__(self, requests_per_minute: int, provider_name: str):
32
  self.requests_per_minute = requests_per_minute
33
+ self.provider_name = provider_name
34
  self.request_times = []
35
+ self.consecutive_failures = 0
36
 
37
  def wait_if_needed(self):
38
  current_time = time.time()
 
44
  wait_time = 60 - (current_time - self.request_times[0]) + random.uniform(2, 8)
45
  time.sleep(wait_time)
46
 
47
+ # Add exponential backoff for consecutive failures
48
+ if self.consecutive_failures > 0:
49
+ backoff_time = min(2 ** self.consecutive_failures, 60) + random.uniform(1, 3)
50
+ time.sleep(backoff_time)
51
+
52
  # Record this request
53
  self.request_times.append(current_time)
54
+
55
+ def record_success(self):
56
+ self.consecutive_failures = 0
57
+
58
+ def record_failure(self):
59
+ self.consecutive_failures += 1
60
+
61
+ # Initialize rate limiters based on search results
62
+ # Gemini 2.0 Flash-Lite: 30 RPM (highest free tier)
63
+ gemini_limiter = AdvancedRateLimiter(requests_per_minute=25, provider_name="Gemini") # Conservative
64
 
65
+ # Groq: Typically 30 RPM for free tier
66
+ groq_limiter = AdvancedRateLimiter(requests_per_minute=25, provider_name="Groq") # Conservative
67
+
68
+ # NVIDIA: Typically 5 RPM for free tier
69
+ nvidia_limiter = AdvancedRateLimiter(requests_per_minute=4, provider_name="NVIDIA") # Very conservative
70
+
71
+ # Initialize LLMs with best models and minimal rate limits
72
+ def get_best_models():
73
+ """Get the best models with lowest rate limits"""
74
+
75
+ # Gemini 2.0 Flash-Lite - Best rate limit (30 RPM) with good performance
76
+ gemini_llm = ChatGoogleGenerativeAI(
77
+ model="gemini-2.0-flash-lite", # Best rate limit from search results
78
+ api_key=os.getenv("GOOGLE_API_KEY"),
79
+ temperature=0,
80
+ max_output_tokens=4000
81
+ )
82
+
83
+ # Groq Llama 3.3 70B - Fast and capable
84
+ groq_llm = ChatGroq(
85
+ model="llama-3.3-70b-versatile",
86
+ api_key=os.getenv("GROQ_API_KEY"),
87
+ temperature=0,
88
+ max_tokens=4000
89
+ )
90
+
91
+ # NVIDIA Llama 3.1 70B - Good for specialized tasks
92
+ nvidia_llm = ChatNVIDIA(
93
+ model="meta/llama-3.1-70b-instruct",
94
+ api_key=os.getenv("NVIDIA_API_KEY"),
95
+ temperature=0,
96
+ max_tokens=4000
97
+ )
98
+
99
+ return {
100
+ "gemini": gemini_llm,
101
+ "groq": groq_llm,
102
+ "nvidia": nvidia_llm
103
+ }
104
+
105
+ # Fallback strategy with rate limit handling
106
+ class ModelFallbackManager:
107
+ def __init__(self):
108
+ self.models = get_best_models()
109
+ self.limiters = {
110
+ "gemini": gemini_limiter,
111
+ "groq": groq_limiter,
112
+ "nvidia": nvidia_limiter
113
+ }
114
+ self.fallback_order = ["gemini", "groq", "nvidia"] # Order by rate limit capacity
115
+
116
+ def invoke_with_fallback(self, messages, max_retries=3):
117
+ """Try models in order with rate limiting and fallbacks"""
118
+
119
+ for provider in self.fallback_order:
120
+ limiter = self.limiters[provider]
121
+ model = self.models[provider]
122
+
123
+ for attempt in range(max_retries):
124
+ try:
125
+ # Apply rate limiting
126
+ limiter.wait_if_needed()
127
+
128
+ # Try to invoke the model
129
+ response = model.invoke(messages)
130
+ limiter.record_success()
131
+ return response
132
+
133
+ except Exception as e:
134
+ error_msg = str(e).lower()
135
+
136
+ # Check if it's a rate limit error
137
+ if any(keyword in error_msg for keyword in ['rate limit', '429', 'quota', 'too many requests']):
138
+ limiter.record_failure()
139
+ wait_time = (2 ** attempt) + random.uniform(10, 30)
140
+ time.sleep(wait_time)
141
+ continue
142
+ else:
143
+ # Non-rate limit error, try next provider
144
+ break
145
+
146
+ # If all providers fail
147
+ raise Exception("All model providers failed or hit rate limits")
148
 
149
  # Custom Tools
150
  @tool
151
  def multiply(a: int, b: int) -> int:
152
+ """Multiply two numbers."""
 
 
 
 
153
  return a * b
154
 
155
  @tool
156
  def add(a: int, b: int) -> int:
157
+ """Add two numbers."""
 
 
 
 
 
158
  return a + b
159
 
160
  @tool
161
  def subtract(a: int, b: int) -> int:
162
+ """Subtract two numbers."""
 
 
 
 
 
163
  return a - b
164
 
165
  @tool
166
  def divide(a: int, b: int) -> float:
167
+ """Divide two numbers."""
 
 
 
 
 
168
  if b == 0:
169
  raise ValueError("Cannot divide by zero.")
170
  return a / b
171
 
172
  @tool
173
  def modulus(a: int, b: int) -> int:
174
+ """Get the modulus of two numbers."""
 
 
 
 
 
175
  return a % b
176
 
177
  @tool
178
  def wiki_search(query: str) -> str:
179
+ """Search Wikipedia for a query and return maximum 2 results."""
 
 
 
180
  try:
181
  time.sleep(random.uniform(1, 3))
182
  search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
 
191
 
192
  @tool
193
  def web_search(query: str) -> str:
194
+ """Search Tavily for a query and return maximum 3 results."""
 
 
 
195
  try:
196
  time.sleep(random.uniform(2, 5))
197
  search_docs = TavilySearchResults(max_results=3).invoke(query=query)
 
206
 
207
  @tool
208
  def arvix_search(query: str) -> str:
209
+ """Search Arxiv for a query and return maximum 3 result."""
 
 
 
210
  try:
211
  time.sleep(random.uniform(1, 4))
212
  search_docs = ArxivLoader(query=query, load_max_docs=3).load()
 
219
  except Exception as e:
220
  return f"ArXiv search failed: {str(e)}"
221
 
222
+ # Setup FAISS vector store
223
  def setup_faiss_vector_store():
224
  """Setup FAISS vector database from JSONL metadata"""
225
  try:
 
240
  }
241
  """
242
 
 
243
  json_loader = JSONLoader(file_path="metadata.jsonl", jq_schema=jq_schema, json_lines=True, text_content=False)
244
  json_docs = json_loader.load()
245
 
 
246
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=200)
247
  json_chunks = text_splitter.split_documents(json_docs)
248
 
 
249
  embeddings = NVIDIAEmbeddings(
250
  model="nvidia/nv-embedqa-e5-v5",
251
  api_key=os.getenv("NVIDIA_API_KEY")
 
265
  system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools.
266
  Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
267
  FINAL ANSWER: [YOUR FINAL ANSWER].
268
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings."""
 
269
 
 
270
  sys_msg = SystemMessage(content=system_prompt)
271
 
272
+ # Setup vector store and retriever
273
  vector_store = setup_faiss_vector_store()
274
  if vector_store:
275
  retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
 
282
  retriever_tool = None
283
 
284
  # All tools
285
+ all_tools = [multiply, add, subtract, divide, modulus, wiki_search, web_search, arvix_search]
 
 
 
 
 
 
 
 
 
 
286
  if retriever_tool:
287
  all_tools.append(retriever_tool)
288
 
289
+ # Build graph function with fallback manager
290
+ def build_graph():
291
+ """Build the LangGraph with rate limiting and fallbacks"""
292
 
293
+ fallback_manager = ModelFallbackManager()
 
 
 
 
 
 
 
 
294
 
295
+ # Create a wrapper LLM that uses fallback manager
296
+ class FallbackLLM:
297
+ def bind_tools(self, tools):
298
+ self.tools = tools
299
+ return self
300
+
301
+ def invoke(self, messages):
302
+ return fallback_manager.invoke_with_fallback(messages)
303
+
304
+ llm_with_tools = FallbackLLM().bind_tools(all_tools)
305
 
306
  # Node functions
307
  def assistant(state: MessagesState):
308
+ """Assistant node with fallback handling"""
 
 
 
 
 
 
 
309
  return {"messages": [llm_with_tools.invoke(state["messages"])]}
310
 
311
  def retriever_node(state: MessagesState):
 
340
  # Test
341
  if __name__ == "__main__":
342
  question = "What are the names of the US presidents who were assassinated?"
343
+ graph = build_graph()
 
 
344
  messages = [HumanMessage(content=question)]
345
  config = {"configurable": {"thread_id": "test_thread"}}
346
  result = graph.invoke({"messages": messages}, config)