Final_Assignment_Template

Sleeping

App Files Files Community

josondev commited on Jun 27

Commit

b857b00

verified ·

1 Parent(s): 2c0f430

Update veryfinal.py

Browse files

Files changed (1) hide show

veryfinal.py +131 -92

veryfinal.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""LangGraph Agent with FAISS Vector Store and Custom Tools"""
 import os, time, random
 from dotenv import load_dotenv
 from typing import List, Dict, Any, TypedDict, Annotated
@@ -26,11 +26,13 @@ from langchain_community.document_loaders import JSONLoader
 load_dotenv()
-# Advanced Rate Limiter (SILENT)
 class AdvancedRateLimiter:
-    def __init__(self, requests_per_minute: int):
         self.requests_per_minute = requests_per_minute
         self.request_times = []
     def wait_if_needed(self):
         current_time = time.time()
@@ -42,72 +44,139 @@ class AdvancedRateLimiter:
             wait_time = 60 - (current_time - self.request_times[0]) + random.uniform(2, 8)
             time.sleep(wait_time)
         # Record this request
         self.request_times.append(current_time)
-# Initialize rate limiters
-groq_limiter = AdvancedRateLimiter(requests_per_minute=30)
-gemini_limiter = AdvancedRateLimiter(requests_per_minute=2)
-nvidia_limiter = AdvancedRateLimiter(requests_per_minute=5)
 # Custom Tools
 @tool
 def multiply(a: int, b: int) -> int:
-    """Multiply two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     return a * b
 @tool
 def add(a: int, b: int) -> int:
-    """Add two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     return a + b
 @tool
 def subtract(a: int, b: int) -> int:
-    """Subtract two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     return a - b
 @tool
 def divide(a: int, b: int) -> float:
-    """Divide two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     if b == 0:
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
 def modulus(a: int, b: int) -> int:
-    """Get the modulus of two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     return a % b
 @tool
 def wiki_search(query: str) -> str:
-    """Search Wikipedia for a query and return maximum 2 results.
-    Args:
-        query: The search query."""
     try:
         time.sleep(random.uniform(1, 3))
         search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
@@ -122,10 +191,7 @@ def wiki_search(query: str) -> str:
 @tool
 def web_search(query: str) -> str:
-    """Search Tavily for a query and return maximum 3 results.
-    Args:
-        query: The search query."""
     try:
         time.sleep(random.uniform(2, 5))
         search_docs = TavilySearchResults(max_results=3).invoke(query=query)
@@ -140,10 +206,7 @@ def web_search(query: str) -> str:
 @tool
 def arvix_search(query: str) -> str:
-    """Search Arxiv for a query and return maximum 3 result.
-    Args:
-        query: The search query."""
     try:
         time.sleep(random.uniform(1, 4))
         search_docs = ArxivLoader(query=query, load_max_docs=3).load()
@@ -156,7 +219,7 @@ def arvix_search(query: str) -> str:
     except Exception as e:
         return f"ArXiv search failed: {str(e)}"
-# Load and process JSONL data for FAISS vector store
 def setup_faiss_vector_store():
     """Setup FAISS vector database from JSONL metadata"""
     try:
@@ -177,15 +240,12 @@ def setup_faiss_vector_store():
         }
         """
-        # Load documents
         json_loader = JSONLoader(file_path="metadata.jsonl", jq_schema=jq_schema, json_lines=True, text_content=False)
         json_docs = json_loader.load()
-        # Split documents
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=200)
         json_chunks = text_splitter.split_documents(json_docs)
-        # Create FAISS vector store
         embeddings = NVIDIAEmbeddings(
             model="nvidia/nv-embedqa-e5-v5",
             api_key=os.getenv("NVIDIA_API_KEY")
@@ -205,13 +265,11 @@ except FileNotFoundError:
     system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools.
     Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
     FINAL ANSWER: [YOUR FINAL ANSWER].
-    YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
-    Your answer should only start with "FINAL ANSWER: ", then follows with the answer."""
-# System message
 sys_msg = SystemMessage(content=system_prompt)
-# Setup FAISS vector store and retriever
 vector_store = setup_faiss_vector_store()
 if vector_store:
     retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
@@ -224,47 +282,30 @@ else:
     retriever_tool = None
 # All tools
-all_tools = [
-    multiply,
-    add,
-    subtract,
-    divide,
-    modulus,
-    wiki_search,
-    web_search,
-    arvix_search,
-]
 if retriever_tool:
     all_tools.append(retriever_tool)
-# Build graph function
-def build_graph(provider: str = "groq"):
-    """Build the LangGraph with rate limiting"""
-    # Initialize LLMs with best free models
-    if provider == "google":
-        llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-thinking-exp", temperature=0)
-    elif provider == "groq":
-        llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)
-    elif provider == "nvidia":
-        llm = ChatNVIDIA(model="meta/llama-3.1-70b-instruct", temperature=0)
-    else:
-        raise ValueError("Invalid provider. Choose 'google', 'groq' or 'nvidia'.")
-    # Bind tools to LLM
-    llm_with_tools = llm.bind_tools(all_tools)
     # Node functions
     def assistant(state: MessagesState):
-        """Assistant node with rate limiting"""
-        if provider == "groq":
-            groq_limiter.wait_if_needed()
-        elif provider == "google":
-            gemini_limiter.wait_if_needed()
-        elif provider == "nvidia":
-            nvidia_limiter.wait_if_needed()
         return {"messages": [llm_with_tools.invoke(state["messages"])]}
     def retriever_node(state: MessagesState):
@@ -299,9 +340,7 @@ def build_graph(provider: str = "groq"):
 # Test
 if __name__ == "__main__":
     question = "What are the names of the US presidents who were assassinated?"
-    # Build the graph
-    graph = build_graph(provider="groq")
-    # Run the graph
     messages = [HumanMessage(content=question)]
     config = {"configurable": {"thread_id": "test_thread"}}
     result = graph.invoke({"messages": messages}, config)

+"""LangGraph Agent with Best Free Models and Minimal Rate Limits"""
 import os, time, random
 from dotenv import load_dotenv
 from typing import List, Dict, Any, TypedDict, Annotated
 load_dotenv()
+# Advanced Rate Limiter with Exponential Backoff
 class AdvancedRateLimiter:
+    def __init__(self, requests_per_minute: int, provider_name: str):
         self.requests_per_minute = requests_per_minute
+        self.provider_name = provider_name
         self.request_times = []
+        self.consecutive_failures = 0
     def wait_if_needed(self):
         current_time = time.time()
             wait_time = 60 - (current_time - self.request_times[0]) + random.uniform(2, 8)
             time.sleep(wait_time)
+        # Add exponential backoff for consecutive failures
+        if self.consecutive_failures > 0:
+            backoff_time = min(2 ** self.consecutive_failures, 60) + random.uniform(1, 3)
+            time.sleep(backoff_time)
         # Record this request
         self.request_times.append(current_time)
+    def record_success(self):
+        self.consecutive_failures = 0
+    def record_failure(self):
+        self.consecutive_failures += 1
+# Initialize rate limiters based on search results
+# Gemini 2.0 Flash-Lite: 30 RPM (highest free tier)
+gemini_limiter = AdvancedRateLimiter(requests_per_minute=25, provider_name="Gemini")  # Conservative
+# Groq: Typically 30 RPM for free tier
+groq_limiter = AdvancedRateLimiter(requests_per_minute=25, provider_name="Groq")  # Conservative
+# NVIDIA: Typically 5 RPM for free tier
+nvidia_limiter = AdvancedRateLimiter(requests_per_minute=4, provider_name="NVIDIA")  # Very conservative
+# Initialize LLMs with best models and minimal rate limits
+def get_best_models():
+    """Get the best models with lowest rate limits"""
+    # Gemini 2.0 Flash-Lite - Best rate limit (30 RPM) with good performance
+    gemini_llm = ChatGoogleGenerativeAI(
+        model="gemini-2.0-flash-lite",  # Best rate limit from search results
+        api_key=os.getenv("GOOGLE_API_KEY"),
+        temperature=0,
+        max_output_tokens=4000
+    )
+    # Groq Llama 3.3 70B - Fast and capable
+    groq_llm = ChatGroq(
+        model="llama-3.3-70b-versatile",
+        api_key=os.getenv("GROQ_API_KEY"),
+        temperature=0,
+        max_tokens=4000
+    )
+    # NVIDIA Llama 3.1 70B - Good for specialized tasks
+    nvidia_llm = ChatNVIDIA(
+        model="meta/llama-3.1-70b-instruct",
+        api_key=os.getenv("NVIDIA_API_KEY"),
+        temperature=0,
+        max_tokens=4000
+    )
+    return {
+        "gemini": gemini_llm,
+        "groq": groq_llm,
+        "nvidia": nvidia_llm
+    }
+# Fallback strategy with rate limit handling
+class ModelFallbackManager:
+    def __init__(self):
+        self.models = get_best_models()
+        self.limiters = {
+            "gemini": gemini_limiter,
+            "groq": groq_limiter,
+            "nvidia": nvidia_limiter
+        }
+        self.fallback_order = ["gemini", "groq", "nvidia"]  # Order by rate limit capacity
+    def invoke_with_fallback(self, messages, max_retries=3):
+        """Try models in order with rate limiting and fallbacks"""
+        for provider in self.fallback_order:
+            limiter = self.limiters[provider]
+            model = self.models[provider]
+            for attempt in range(max_retries):
+                try:
+                    # Apply rate limiting
+                    limiter.wait_if_needed()
+                    # Try to invoke the model
+                    response = model.invoke(messages)
+                    limiter.record_success()
+                    return response
+                except Exception as e:
+                    error_msg = str(e).lower()
+                    # Check if it's a rate limit error
+                    if any(keyword in error_msg for keyword in ['rate limit', '429', 'quota', 'too many requests']):
+                        limiter.record_failure()
+                        wait_time = (2 ** attempt) + random.uniform(10, 30)
+                        time.sleep(wait_time)
+                        continue
+                    else:
+                        # Non-rate limit error, try next provider
+                        break
+        # If all providers fail
+        raise Exception("All model providers failed or hit rate limits")
 # Custom Tools
 @tool
 def multiply(a: int, b: int) -> int:
+    """Multiply two numbers."""
     return a * b
 @tool
 def add(a: int, b: int) -> int:
+    """Add two numbers."""
     return a + b
 @tool
 def subtract(a: int, b: int) -> int:
+    """Subtract two numbers."""
     return a - b
 @tool
 def divide(a: int, b: int) -> float:
+    """Divide two numbers."""
     if b == 0:
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
 def modulus(a: int, b: int) -> int:
+    """Get the modulus of two numbers."""
     return a % b
 @tool
 def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 2 results."""
     try:
         time.sleep(random.uniform(1, 3))
         search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
 @tool
 def web_search(query: str) -> str:
+    """Search Tavily for a query and return maximum 3 results."""
     try:
         time.sleep(random.uniform(2, 5))
         search_docs = TavilySearchResults(max_results=3).invoke(query=query)
 @tool
 def arvix_search(query: str) -> str:
+    """Search Arxiv for a query and return maximum 3 result."""
     try:
         time.sleep(random.uniform(1, 4))
         search_docs = ArxivLoader(query=query, load_max_docs=3).load()
     except Exception as e:
         return f"ArXiv search failed: {str(e)}"
+# Setup FAISS vector store
 def setup_faiss_vector_store():
     """Setup FAISS vector database from JSONL metadata"""
     try:
         }
         """
         json_loader = JSONLoader(file_path="metadata.jsonl", jq_schema=jq_schema, json_lines=True, text_content=False)
         json_docs = json_loader.load()
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=200)
         json_chunks = text_splitter.split_documents(json_docs)
         embeddings = NVIDIAEmbeddings(
             model="nvidia/nv-embedqa-e5-v5",
             api_key=os.getenv("NVIDIA_API_KEY")
     system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools.
     Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
     FINAL ANSWER: [YOUR FINAL ANSWER].
+    YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings."""
 sys_msg = SystemMessage(content=system_prompt)
+# Setup vector store and retriever
 vector_store = setup_faiss_vector_store()
 if vector_store:
     retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
     retriever_tool = None
 # All tools
+all_tools = [multiply, add, subtract, divide, modulus, wiki_search, web_search, arvix_search]
 if retriever_tool:
     all_tools.append(retriever_tool)
+# Build graph function with fallback manager
+def build_graph():
+    """Build the LangGraph with rate limiting and fallbacks"""
+    fallback_manager = ModelFallbackManager()
+    # Create a wrapper LLM that uses fallback manager
+    class FallbackLLM:
+        def bind_tools(self, tools):
+            self.tools = tools
+            return self
+        def invoke(self, messages):
+            return fallback_manager.invoke_with_fallback(messages)
+    llm_with_tools = FallbackLLM().bind_tools(all_tools)
     # Node functions
     def assistant(state: MessagesState):
+        """Assistant node with fallback handling"""
         return {"messages": [llm_with_tools.invoke(state["messages"])]}
     def retriever_node(state: MessagesState):
 # Test
 if __name__ == "__main__":
     question = "What are the names of the US presidents who were assassinated?"
+    graph = build_graph()
     messages = [HumanMessage(content=question)]
     config = {"configurable": {"thread_id": "test_thread"}}
     result = graph.invoke({"messages": messages}, config)