Final_Assignment_Template

Sleeping

App Files Files Community

EtienneB commited on Jul 1

Commit

64434a5

1 Parent(s): 600dd01

update

Browse files

Files changed (3) hide show

agent.py +89 -37
app.py +0 -3
requirements.txt +14 -9

agent.py CHANGED Viewed

@@ -2,8 +2,7 @@ import os
 from dotenv import load_dotenv
 from langchain_community.vectorstores import Chroma
-from langchain_core.messages import HumanMessage, SystemMessage
-from langchain_core.tools import tool
 from langchain_huggingface import (ChatHuggingFace, HuggingFaceEmbeddings,
                                    HuggingFaceEndpoint)
 from langgraph.graph import START, MessagesState, StateGraph
@@ -33,7 +32,6 @@ tools = [
     wiki_search, analyze_excel_file, arvix_search, audio_transcription, python_code_parser
 ]
 # Load system prompt
 system_prompt = """
 You are a helpful assistant tasked with answering questions using a set of tools.
@@ -46,62 +44,106 @@ Your answer should only start with "FINAL ANSWER: ", then follows with the answe
 # System message
 sys_msg = SystemMessage(content=system_prompt)
-# Embeddings + Chroma Vector Store
-embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
-vector_store = Chroma(
-    collection_name="langgraph-documents",
-    embedding_function=embeddings,
-    persist_directory="chroma_db"  # Use a persistent directory
 )
 def build_graph():
-    """Build the graph"""
     # First create the HuggingFaceEndpoint
     llm_endpoint = HuggingFaceEndpoint(
-        # repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
-        repo_id="mistralai/Mistral-7B-Instruct-v0.2",
-        # Other models to try:
-        # "meta-llama/Llama-2-7b-chat-hf"
-        # "google/gemma-7b-it"
-        # "mosaicml/mpt-7b-instruct"
-        # "tiiuae/falcon-7b-instruct"
         huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
-        temperature=0.1,  # Lower temperature for more consistent responses
-        max_new_tokens=1024,
-        timeout=30,
-        )
     # Then wrap it with ChatHuggingFace to get chat model functionality
     llm = ChatHuggingFace(llm=llm_endpoint)
     # Bind tools to LLM
     llm_with_tools = llm.bind_tools(tools)
-    # Node
     def assistant(state: MessagesState):
         """Assistant node"""
-        return {"messages": [llm_with_tools.invoke([system_prompt] + state["messages"])]}
-    def retriever(state: MessagesState):
-        similar = vector_store.similarity_search(state["messages"][0].content)
-        if similar:
-            example_msg = HumanMessage(content=f"Here is a similar question:\n\n{similar[0].page_content}")
-            return {"messages": [sys_msg] + state["messages"] + [example_msg]}
-        return {"messages": [sys_msg] + state["messages"]}
     builder = StateGraph(MessagesState)
-    builder.add_node("retriever", retriever)
     builder.add_node("assistant", assistant)
     builder.add_node("tools", ToolNode(tools))
     builder.add_edge(START, "retriever")
     builder.add_edge("retriever", "assistant")
     builder.add_conditional_edges("assistant", tools_condition)
     builder.add_edge("tools", "assistant")
-     # Compile graph
     return builder.compile()
 # test
 if __name__ == "__main__":
     question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
@@ -109,6 +151,16 @@ if __name__ == "__main__":
     graph = build_graph()
     # Run the graph
     messages = [HumanMessage(content=question)]
-    messages = graph.invoke({"messages": messages})
-    for m in messages["messages"]:
-        m.pretty_print()

 from dotenv import load_dotenv
 from langchain_community.vectorstores import Chroma
+from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage
 from langchain_huggingface import (ChatHuggingFace, HuggingFaceEmbeddings,
                                    HuggingFaceEndpoint)
 from langgraph.graph import START, MessagesState, StateGraph
     wiki_search, analyze_excel_file, arvix_search, audio_transcription, python_code_parser
 ]
 # Load system prompt
 system_prompt = """
 You are a helpful assistant tasked with answering questions using a set of tools.
 # System message
 sys_msg = SystemMessage(content=system_prompt)
+def get_vector_store(persist_directory="chroma_db"):
+    """
+    Initializes and returns a Chroma vector store.
+    If the database exists, it loads it. If not, it creates it,
+    adds some initial documents, and persists them.
+    """
+    embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
+    if os.path.exists(persist_directory) and os.listdir(persist_directory):
+        print("Loading existing vector store...")
+        vector_store = Chroma(
+            persist_directory=persist_directory,
+            embedding_function=embedding_function
+        )
+    else:
+        print("Creating new vector store...")
+        os.makedirs(persist_directory, exist_ok=True)
+        # Example documents to add
+        initial_documents = [
+            "The Principle of Double Effect is an ethical theory that distinguishes between the intended and foreseen consequences of an action.",
+            "St. Thomas Aquinas is often associated with the development of the Principle of Double Effect.",
+            "LangGraph is a library for building stateful, multi-actor applications with LLMs.",
+            "Chroma is a vector database used for storing and retrieving embeddings."
+        ]
+        vector_store = Chroma.from_texts(
+            texts=initial_documents,
+            embedding=embedding_function,
+            persist_directory=persist_directory
+        )
+        # No need to call persist() when using from_texts with a persist_directory
+    return vector_store
+# --- Initialize Vector Store and Retriever ---
+vector_store = get_vector_store()
+retriever_component = vector_store.as_retriever(
+    search_type="mmr",  # Use Maximum Marginal Relevance for diverse results
+    search_kwargs={'k': 2, 'lambda_mult': 0.5}  # Retrieve 2 documents
 )
 def build_graph():
+    """Build the graph"""
     # First create the HuggingFaceEndpoint
     llm_endpoint = HuggingFaceEndpoint(
+        repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
         huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
+        temperature=0.3,
+        max_new_tokens=2048,
+        timeout=60,
+    )
     # Then wrap it with ChatHuggingFace to get chat model functionality
     llm = ChatHuggingFace(llm=llm_endpoint)
     # Bind tools to LLM
     llm_with_tools = llm.bind_tools(tools)
+    # --- Nodes ---
     def assistant(state: MessagesState):
         """Assistant node"""
+        # Prepend the system message to the state
+        messages_with_system_prompt = [sys_msg] + state["messages"]
+        return {"messages": [llm_with_tools.invoke(messages_with_system_prompt)]}
+    def retriever_node(state: MessagesState):
+        """
+        Retrieves relevant documents from the vector store based on the latest human message.
+        """
+        last_human_message = state["messages"][-1].content
+        retrieved_docs = retriever_component.invoke(last_human_message)
+        if retrieved_docs:
+            retrieved_context = "\n\n".join([doc.page_content for doc in retrieved_docs])
+            # Create a ToolMessage to hold the retrieved context
+            context_message = ToolMessage(
+                content=f"Retrieved context from vector store:\n\n{retrieved_context}",
+                tool_call_id="retriever" # A descriptive ID
+            )
+            return {"messages": [context_message]}
+        return {"messages": []}
+    # --- Graph Definition ---
     builder = StateGraph(MessagesState)
+    builder.add_node("retriever", retriever_node)
     builder.add_node("assistant", assistant)
     builder.add_node("tools", ToolNode(tools))
     builder.add_edge(START, "retriever")
     builder.add_edge("retriever", "assistant")
     builder.add_conditional_edges("assistant", tools_condition)
     builder.add_edge("tools", "assistant")
+    # Compile graph
     return builder.compile()
 # test
 if __name__ == "__main__":
     question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
     graph = build_graph()
     # Run the graph
     messages = [HumanMessage(content=question)]
+    # The initial state for the graph
+    initial_state = {"messages": messages}
+    # Invoke the graph stream to see the steps
+    for s in graph.stream(initial_state, stream_mode="values"):
+        message = s["messages"][-1]
+        if isinstance(message, ToolMessage):
+            print("---RETRIEVED CONTEXT---")
+            print(message.content)
+            print("-----------------------")
+        else:
+            message.pretty_print()

app.py CHANGED Viewed

@@ -36,9 +36,6 @@ class BasicAgent:
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,

 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,

requirements.txt CHANGED Viewed

@@ -2,6 +2,7 @@
 gradio
 requests
 pandas
 # LangChain and ecosystem
 langchain
@@ -10,24 +11,28 @@ langchain-community
 langgraph
 langchain-huggingface
 langchain-chroma
 sentence-transformers
 # Hugging Face integration
 huggingface_hub
 transformers
-accelerate  # Needed for many transformer-based models
 # Environment config
 python-dotenv
 # Tools dependencies
-duckduckgo-search  # Required for web_search tool
-pytz             # Required for get_current_time_in_timezone tool
-# Additional utilities for better error handling and performance
 typing-extensions
-asyncio-throttle  # For rate limiting (optional)
-tenacity          # For retry logic (optional)
-# Optional: For better logging and monitoring
-loguru            # Better logging (optional)

 gradio
 requests
 pandas
+openpyxl # For reading excel files with pandas
 # LangChain and ecosystem
 langchain
 langgraph
 langchain-huggingface
 langchain-chroma
+chromadb # Explicitly add the Chroma database
 sentence-transformers
 # Hugging Face integration
 huggingface_hub
 transformers
+accelerate
 # Environment config
 python-dotenv
 # Tools dependencies
+duckduckgo-search
+pytz
+wikipedia # For WikipediaLoader
+arxiv # For ArxivLoader
+assemblyai # For AssemblyAIAudioTranscriptLoader
+tree-sitter # For LanguageParser
+tree-sitter-languages # For LanguageParser
+# Additional utilities
 typing-extensions
+asyncio-throttle
+tenacity
+loguru