Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 29

Commit

8d36e0e

1 Parent(s): 7343388

Fix

Browse files

Files changed (1) hide show

agent.py +151 -131

agent.py CHANGED Viewed

@@ -14,13 +14,14 @@ serper_api_key = os.getenv("SERPER_API_KEY")
 # ---- Imports ----
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition, ToolNode
-from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
 from langchain_community.vectorstores import Chroma
 from langchain_core.documents import Document
-from langchain_core.messages import SystemMessage, HumanMessage
 from langchain_core.tools import tool
 from langchain.tools.retriever import create_retriever_tool
 from langchain.vectorstores import Chroma
 from langchain.embeddings import HuggingFaceEmbeddings
@@ -32,6 +33,61 @@ import re
 import math
 from datetime import datetime
 # ---- Enhanced Tools ----
 @tool
@@ -105,16 +161,25 @@ def compound_interest(principal: float, rate: float, time: float, n: int = 1) ->
     """Calculate compound interest"""
     return principal * (1 + rate/n) ** (n * time)
 @tool
 def wiki_search(query: str) -> str:
     """Search Wikipedia for information"""
     try:
-        search_docs = WikipediaLoader(query=query, load_max_docs=3).load()
         if not search_docs:
             return "No Wikipedia results found."
         formatted = "\n\n---\n\n".join([
-            f'<Document source="{doc.metadata.get("source", "Wikipedia")}" title="{doc.metadata.get("title", "Unknown")}"/>\n{doc.page_content[:2000]}\n</Document>'
             for doc in search_docs
         ])
         return formatted
@@ -125,12 +190,12 @@ def wiki_search(query: str) -> str:
 def web_search(query: str) -> str:
     """Search the web using Tavily"""
     try:
-        search_docs = TavilySearchResults(max_results=3).invoke(query=query)
         if not search_docs:
             return "No web search results found."
         formatted = "\n\n---\n\n".join([
-            f'<Document source="{doc.get("url", "Unknown")}" title="{doc.get("title", "Unknown")}"/>\n{doc.get("content", "")[:2000]}\n</Document>'
             for doc in search_docs
         ])
         return formatted
@@ -138,56 +203,24 @@ def web_search(query: str) -> str:
         return f"Web search error: {str(e)}"
 @tool
-def arxiv_search(query: str) -> str:
-    """Search ArXiv for academic papers"""
-    try:
-        search_docs = ArxivLoader(query=query, load_max_docs=2).load()
-        if not search_docs:
-            return "No ArXiv results found."
-        formatted = "\n\n---\n\n".join([
-            f'<Document source="{doc.metadata.get("source", "ArXiv")}" title="{doc.metadata.get("Title", "Unknown")}"/>\n{doc.page_content[:1500]}\n</Document>'
-            for doc in search_docs
-        ])
-        return formatted
-    except Exception as e:
-        return f"ArXiv search error: {str(e)}"
-@tool
-def serper_search(query: str) -> str:
-    """Enhanced web search using Serper API"""
-    if not serper_api_key:
-        return "Serper API key not available"
     try:
-        url = "https://google.serper.dev/search"
-        payload = json.dumps({
-            "q": query,
-            "num": 5
-        })
-        headers = {
-            'X-API-KEY': serper_api_key,
-            'Content-Type': 'application/json'
-        }
-        response = requests.request("POST", url, headers=headers, data=payload)
-        results = response.json()
-        if 'organic' not in results:
-            return "No search results found"
-        formatted = "\n\n---\n\n".join([
-            f'<Document source="{result.get("link", "Unknown")}" title="{result.get("title", "Unknown")}"/>\n{result.get("snippet", "")}\n</Document>'
-            for result in results['organic'][:3]
-        ])
-        return formatted
     except Exception as e:
-        return f"Serper search error: {str(e)}"
 # ---- Embedding & Vector Store Setup ----
 def setup_vector_store():
     try:
-        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
         # Check if metadata.jsonl exists and load it
         if os.path.exists('metadata.jsonl'):
@@ -195,16 +228,20 @@ def setup_vector_store():
             with open('metadata.jsonl', 'r') as jsonl_file:
                 for line in jsonl_file:
                     if line.strip():  # Skip empty lines
-                        json_QA.append(json.loads(line))
             if json_QA:
-                documents = [
-                    Document(
-                        page_content=f"Question: {sample.get('Question', '')}\n\nFinal answer: {sample.get('Final answer', '')}",
-                        metadata={"source": sample.get("task_id", "unknown")}
-                    )
-                    for sample in json_QA if sample.get('Question') and sample.get('Final answer')
-                ]
                 if documents:
                     vector_store = Chroma.from_documents(
@@ -228,7 +265,6 @@ def setup_vector_store():
     except Exception as e:
         print(f"Vector store setup error: {e}")
-        # Return a dummy vector store function
         return None
 vector_store = setup_vector_store()
@@ -237,15 +273,15 @@ vector_store = setup_vector_store()
 def similar_question_search(query: str) -> str:
     """Search for similar questions in the knowledge base"""
     if not vector_store:
-        return "Vector store not available"
     try:
-        matched_docs = vector_store.similarity_search(query, 3)
         if not matched_docs:
             return "No similar questions found"
-        formatted = "\n\n---\n\n".join([
-            f'<Document source="{doc.metadata.get("source", "Unknown")}" />\n{doc.page_content[:1000]}\n</Document>'
             for doc in matched_docs
         ])
         return formatted
@@ -254,110 +290,97 @@ def similar_question_search(query: str) -> str:
 # ---- Enhanced System Prompt ----
 system_prompt = """
-You are an expert assistant capable of solving complex questions using available tools. You have access to:
-1. Mathematical tools: add, subtract, multiply, divide, modulus, power, square_root, factorial, gcd, lcm, percentage, compound_interest
-2. Search tools: wiki_search, web_search, arxiv_search, serper_search, similar_question_search
-IMPORTANT INSTRUCTIONS:
-1. Break down complex questions into smaller steps
-2. Use tools systematically to gather information and perform calculations
-3. For mathematical problems, show your work step by step
-4. For factual questions, search for current and accurate information
-5. Cross-reference information from multiple sources when possible
-6. Be precise with numbers - avoid rounding unless necessary
-When providing your final answer, use this exact format:
-FINAL ANSWER: [YOUR ANSWER]
-Rules for the final answer:
-- Numbers: Use plain digits without commas, units, or symbols (unless specifically requested)
-- Strings: Use exact names without articles or abbreviations
-- Lists: Comma-separated values following the above rules
-- Be concise and accurate
-Think step by step and use the available tools to ensure accuracy.
 """
 sys_msg = SystemMessage(content=system_prompt)
-# ---- Enhanced Tool List ----
 tools = [
     # Math tools
     multiply, add, subtract, divide, modulus, power, square_root,
-    factorial, gcd, lcm, percentage, compound_interest,
     # Search tools
-    wiki_search, web_search, arxiv_search, serper_search, similar_question_search
 ]
 # ---- Graph Definition ----
 def build_graph(provider: str = "huggingface"):
-    """Build the agent graph with improved HuggingFace model"""
     if provider == "huggingface":
-        # Use a more capable model from HuggingFace
-        try:
-            # Try with a well-supported model first
-            endpoint = HuggingFaceEndpoint(
-                repo_id="google/flan-t5-base",  # This model works well with the current setup
-                temperature=0.1,
-                huggingfacehub_api_token=hf_token,
-                max_new_tokens=512,
-                task="text2text-generation"
-            )
-            llm = ChatHuggingFace(llm=endpoint)
-        except Exception as e:
-            print(f"Failed to initialize google/flan-t5-base: {e}")
-            # Fallback to another model
             try:
-                endpoint = HuggingFaceEndpoint(
-                    repo_id="microsoft/DialoGPT-medium",
-                    temperature=0.1,
-                    huggingfacehub_api_token=hf_token,
-                    max_new_tokens=512
-                )
-                llm = ChatHuggingFace(llm=endpoint)
-            except Exception as e2:
-                print(f"Failed to initialize DialoGPT-medium: {e2}")
-                # Final fallback
-                endpoint = HuggingFaceEndpoint(
-                    repo_id="bigscience/bloom-560m",
-                    temperature=0.1,
-                    huggingfacehub_api_token=hf_token,
-                    max_new_tokens=256
-                )
-                llm = ChatHuggingFace(llm=endpoint)
     else:
-        raise ValueError("Only 'huggingface' provider is supported in this version.")
-    llm_with_tools = llm.bind_tools(tools)
     def assistant(state: MessagesState):
-        """Enhanced assistant node with better error handling"""
         try:
             messages = state["messages"]
-            response = llm_with_tools.invoke(messages)
             return {"messages": [response]}
         except Exception as e:
             print(f"Assistant error: {e}")
-            # Fallback response
-            fallback_msg = HumanMessage(content=f"I encountered an error: {str(e)}. Let me try a simpler approach.")
-            return {"messages": [fallback_msg]}
     def retriever(state: MessagesState):
-        """Enhanced retriever with better context injection"""
         messages = state["messages"]
         user_query = messages[-1].content if messages else ""
-        # Try to find similar questions
         context_messages = [sys_msg]
         if vector_store:
             try:
-                similar = vector_store.similarity_search(user_query, k=2)
                 if similar:
                     context_msg = HumanMessage(
-                        content=f"Here are similar questions for context:\n\n{similar[0].page_content}"
                     )
                     context_messages.append(context_msg)
             except Exception as e:
@@ -365,16 +388,13 @@ def build_graph(provider: str = "huggingface"):
         return {"messages": context_messages + messages}
-    # Build the graph
     builder = StateGraph(MessagesState)
     builder.add_node("retriever", retriever)
     builder.add_node("assistant", assistant)
-    builder.add_node("tools", ToolNode(tools))
-    # Define edges
     builder.add_edge(START, "retriever")
     builder.add_edge("retriever", "assistant")
-    builder.add_conditional_edges("assistant", tools_condition)
-    builder.add_edge("tools", "assistant")
     return builder.compile()

 # ---- Imports ----
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition, ToolNode
+from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
 from langchain_community.vectorstores import Chroma
 from langchain_core.documents import Document
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
 from langchain_core.tools import tool
+from langchain_core.language_models.base import BaseLanguageModel
 from langchain.tools.retriever import create_retriever_tool
 from langchain.vectorstores import Chroma
 from langchain.embeddings import HuggingFaceEmbeddings
 import math
 from datetime import datetime
+# Custom HuggingFace LLM wrapper
+class SimpleHuggingFaceLLM(BaseLanguageModel):
+    def __init__(self, repo_id: str, hf_token: str):
+        super().__init__()
+        self.repo_id = repo_id
+        self.hf_token = hf_token
+        self.api_url = f"https://api-inference.huggingface.co/models/{repo_id}"
+        self.headers = {"Authorization": f"Bearer {hf_token}"}
+    def _generate(self, messages, stop=None, run_manager=None, **kwargs):
+        # Convert messages to a single prompt
+        if isinstance(messages, list):
+            prompt = messages[-1].content if messages else ""
+        else:
+            prompt = str(messages)
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "max_new_tokens": 512,
+                "temperature": 0.1,
+                "return_full_text": False
+            }
+        }
+        try:
+            response = requests.post(self.api_url, headers=self.headers, json=payload)
+            if response.status_code == 200:
+                result = response.json()
+                if isinstance(result, list) and len(result) > 0:
+                    generated_text = result[0].get('generated_text', '')
+                else:
+                    generated_text = str(result)
+                from langchain_core.outputs import LLMResult, Generation
+                return LLMResult(generations=[[Generation(text=generated_text)]])
+            else:
+                return LLMResult(generations=[[Generation(text=f"Error: {response.status_code}")]])
+        except Exception as e:
+            return LLMResult(generations=[[Generation(text=f"Error: {str(e)}")]])
+    def invoke(self, input, config=None, **kwargs):
+        if isinstance(input, list):
+            prompt = input[-1].content if input else ""
+        else:
+            prompt = str(input)
+        result = self._generate(prompt)
+        generated_text = result.generations[0][0].text
+        return AIMessage(content=generated_text)
+    @property
+    def _llm_type(self):
+        return "huggingface_custom"
 # ---- Enhanced Tools ----
 @tool
     """Calculate compound interest"""
     return principal * (1 + rate/n) ** (n * time)
+@tool
+def calculate_average(numbers: str) -> float:
+    """Calculate average of comma-separated numbers"""
+    try:
+        nums = [float(x.strip()) for x in numbers.split(',')]
+        return sum(nums) / len(nums)
+    except:
+        return 0.0
 @tool
 def wiki_search(query: str) -> str:
     """Search Wikipedia for information"""
     try:
+        search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
         if not search_docs:
             return "No Wikipedia results found."
         formatted = "\n\n---\n\n".join([
+            f'Wikipedia: {doc.metadata.get("title", "Unknown")}\n{doc.page_content[:1500]}'
             for doc in search_docs
         ])
         return formatted
 def web_search(query: str) -> str:
     """Search the web using Tavily"""
     try:
+        search_docs = TavilySearchResults(max_results=2).invoke(query=query)
         if not search_docs:
             return "No web search results found."
         formatted = "\n\n---\n\n".join([
+            f'Web: {doc.get("title", "Unknown")}\n{doc.get("content", "")[:1500]}'
             for doc in search_docs
         ])
         return formatted
         return f"Web search error: {str(e)}"
 @tool
+def simple_calculation(expression: str) -> str:
+    """Safely evaluate simple mathematical expressions"""
     try:
+        # Remove any non-mathematical characters for safety
+        safe_chars = set('0123456789+-*/.() ')
+        if not all(c in safe_chars for c in expression):
+            return "Invalid characters in expression"
+        # Evaluate the expression
+        result = eval(expression)
+        return str(result)
     except Exception as e:
+        return f"Calculation error: {str(e)}"
 # ---- Embedding & Vector Store Setup ----
 def setup_vector_store():
     try:
+        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
         # Check if metadata.jsonl exists and load it
         if os.path.exists('metadata.jsonl'):
             with open('metadata.jsonl', 'r') as jsonl_file:
                 for line in jsonl_file:
                     if line.strip():  # Skip empty lines
+                        try:
+                            json_QA.append(json.loads(line))
+                        except:
+                            continue
             if json_QA:
+                documents = []
+                for sample in json_QA:
+                    if sample.get('Question') and sample.get('Final answer'):
+                        doc = Document(
+                            page_content=f"Question: {sample['Question']}\n\nAnswer: {sample['Final answer']}",
+                            metadata={"source": sample.get("task_id", "unknown")}
+                        )
+                        documents.append(doc)
                 if documents:
                     vector_store = Chroma.from_documents(
     except Exception as e:
         print(f"Vector store setup error: {e}")
         return None
 vector_store = setup_vector_store()
 def similar_question_search(query: str) -> str:
     """Search for similar questions in the knowledge base"""
     if not vector_store:
+        return "No similar questions available"
     try:
+        matched_docs = vector_store.similarity_search(query, k=2)
         if not matched_docs:
             return "No similar questions found"
+        formatted = "\n\n".join([
+            f'Similar Q&A:\n{doc.page_content[:800]}'
             for doc in matched_docs
         ])
         return formatted
 # ---- Enhanced System Prompt ----
 system_prompt = """
+You are an expert assistant that can solve various types of questions using available tools.
+Available tools:
+- Math: add, subtract, multiply, divide, modulus, power, square_root, factorial, gcd, lcm, percentage, compound_interest, calculate_average, simple_calculation
+- Search: wiki_search, web_search, similar_question_search
+Instructions:
+1. Read the question carefully
+2. Break down complex problems into steps
+3. Use appropriate tools to gather information or perform calculations
+4. Think step by step and show your reasoning
+5. Provide accurate, concise answers
+IMPORTANT: Always end your response with:
+FINAL ANSWER: [your answer here]
+For the final answer:
+- Numbers: Use plain digits (no commas, units, or symbols unless requested)
+- Text: Use exact names without articles
+- Lists: Comma-separated values
+Think carefully and use tools when needed.
 """
 sys_msg = SystemMessage(content=system_prompt)
+# ---- Tool List ----
 tools = [
     # Math tools
     multiply, add, subtract, divide, modulus, power, square_root,
+    factorial, gcd, lcm, percentage, compound_interest, calculate_average, simple_calculation,
     # Search tools
+    wiki_search, web_search, similar_question_search
 ]
 # ---- Graph Definition ----
 def build_graph(provider: str = "huggingface"):
+    """Build the agent graph with custom HuggingFace integration"""
     if provider == "huggingface":
+        # Use custom HuggingFace LLM with fallback models
+        models_to_try = [
+            "google/flan-t5-base",
+            "microsoft/DialoGPT-medium",
+            "bigscience/bloom-560m"
+        ]
+        llm = None
+        for model_id in models_to_try:
             try:
+                llm = SimpleHuggingFaceLLM(repo_id=model_id, hf_token=hf_token)
+                print(f"Successfully initialized model: {model_id}")
+                break
+            except Exception as e:
+                print(f"Failed to initialize {model_id}: {e}")
+                continue
+        if llm is None:
+            raise ValueError("Failed to initialize any HuggingFace model")
     else:
+        raise ValueError("Only 'huggingface' provider is supported")
+    # Simple tool binding simulation
+    def llm_with_tools(messages):
+        return llm.invoke(messages)
     def assistant(state: MessagesState):
+        """Assistant node with enhanced error handling"""
         try:
             messages = state["messages"]
+            response = llm_with_tools(messages)
             return {"messages": [response]}
         except Exception as e:
             print(f"Assistant error: {e}")
+            fallback_response = AIMessage(content="I encountered an error processing your request. Let me try a simpler approach.")
+            return {"messages": [fallback_response]}
     def retriever(state: MessagesState):
+        """Enhanced retriever with context injection"""
         messages = state["messages"]
         user_query = messages[-1].content if messages else ""
         context_messages = [sys_msg]
+        # Add similar question context if available
         if vector_store:
             try:
+                similar = vector_store.similarity_search(user_query, k=1)
                 if similar:
                     context_msg = HumanMessage(
+                        content=f"Here's a similar example:\n{similar[0].page_content[:500]}"
                     )
                     context_messages.append(context_msg)
             except Exception as e:
         return {"messages": context_messages + messages}
+    # Build simplified graph (without complex tool routing for now)
     builder = StateGraph(MessagesState)
     builder.add_node("retriever", retriever)
     builder.add_node("assistant", assistant)
+    # Simple linear flow
     builder.add_edge(START, "retriever")
     builder.add_edge("retriever", "assistant")
     return builder.compile()