Spaces:

Phoenix21
/

Chatbot2

Sleeping

App Files Files Community

Phoenix21 commited on Jan 10

Commit

99474e2

verified ·

1 Parent(s): 1f10543

Updated pipeline.py for the history feature

Browse files

Files changed (1) hide show

pipeline.py +48 -28

pipeline.py CHANGED Viewed

@@ -2,7 +2,7 @@
 import os
 import getpass
 import pandas as pd
-from typing import Optional
 from langchain.docstore.document import Document
 from langchain.embeddings import HuggingFaceEmbeddings
@@ -12,17 +12,17 @@ from langchain.chains import RetrievalQA
 from smolagents import CodeAgent, DuckDuckGoSearchTool, ManagedAgent, LiteLLMModel
 import litellm
-# We import the chain builders from our separate files
 from classification_chain import get_classification_chain
 from refusal_chain import get_refusal_chain
 from tailor_chain import get_tailor_chain
-from cleaner_chain import get_cleaner_chain, CleanerChain
-# We also import the relevant RAG logic here or define it directly
-# (We define build_rag_chain in this file for clarity)
 ###############################################################################
-# 1) Environment: set up keys if missing
 ###############################################################################
 if not os.environ.get("GEMINI_API_KEY"):
     os.environ["GEMINI_API_KEY"] = getpass.getpass("Enter your Gemini API Key: ")
@@ -30,7 +30,7 @@ if not os.environ.get("GROQ_API_KEY"):
     os.environ["GROQ_API_KEY"] = getpass.getpass("Enter your GROQ API Key: ")
 ###############################################################################
-# 2) build_or_load_vectorstore
 ###############################################################################
 def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
     if os.path.exists(store_dir):
@@ -43,18 +43,22 @@ def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
         df = pd.read_csv(csv_path)
         df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
         df.columns = df.columns.str.strip()
         if "Answer" in df.columns:
             df.rename(columns={"Answer": "Answers"}, inplace=True)
         if "Question" not in df.columns and "Question " in df.columns:
             df.rename(columns={"Question ": "Question"}, inplace=True)
         if "Question" not in df.columns or "Answers" not in df.columns:
             raise ValueError("CSV must have 'Question' and 'Answers' columns.")
         docs = []
         for _, row in df.iterrows():
             q = str(row["Question"])
             ans = str(row["Answers"])
             doc = Document(page_content=ans, metadata={"question": q})
             docs.append(doc)
         embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
         vectorstore = FAISS.from_documents(docs, embedding=embeddings)
         vectorstore.save_local(store_dir)
@@ -63,15 +67,17 @@ def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
 ###############################################################################
 # 3) Build RAG chain for Gemini
 ###############################################################################
-from langchain.llms.base import LLM
 def build_rag_chain(llm_model: LiteLLMModel, vectorstore: FAISS) -> RetrievalQA:
     class GeminiLangChainLLM(LLM):
         def _call(self, prompt: str, stop: Optional[list] = None, **kwargs) -> str:
             messages = [{"role": "user", "content": prompt}]
             return llm_model(messages, stop_sequences=stop)
         @property
         def _llm_type(self) -> str:
             return "custom_gemini"
     retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
     gemini_as_llm = GeminiLangChainLLM()
     rag_chain = RetrievalQA.from_chain_type(
@@ -83,35 +89,29 @@ def build_rag_chain(llm_model: LiteLLMModel, vectorstore: FAISS) -> RetrievalQA:
     return rag_chain
 ###############################################################################
-# 4) Initialize all the separate chains
 ###############################################################################
-# Classification chain
 classification_chain = get_classification_chain()
-# Refusal chain
 refusal_chain = get_refusal_chain()
-# Tailor chain
 tailor_chain = get_tailor_chain()
-# Cleaner chain
 cleaner_chain = get_cleaner_chain()
 ###############################################################################
-# 5) Build our vectorstores + RAG chains
 ###############################################################################
 wellness_csv = "AIChatbot.csv"
 brand_csv = "BrandAI.csv"
 wellness_store_dir = "faiss_wellness_store"
 brand_store_dir = "faiss_brand_store"
 wellness_vectorstore = build_or_load_vectorstore(wellness_csv, wellness_store_dir)
 brand_vectorstore = build_or_load_vectorstore(brand_csv, brand_store_dir)
-gemini_llm = LiteLLMModel(model_id="gemini/gemini-pro", api_key=os.environ.get("GEMINI_API_KEY"))
 wellness_rag_chain = build_rag_chain(gemini_llm, wellness_vectorstore)
 brand_rag_chain = build_rag_chain(gemini_llm, brand_vectorstore)
-###############################################################################
-# 6) Tools / Agents for web search
-###############################################################################
 search_tool = DuckDuckGoSearchTool()
 web_agent = CodeAgent(tools=[search_tool], model=gemini_llm)
 managed_web_agent = ManagedAgent(agent=web_agent, name="web_search", description="Runs web search for you.")
@@ -124,24 +124,40 @@ def do_web_search(query: str) -> str:
     return response
 ###############################################################################
-# 7) Orchestrator: run_with_chain
 ###############################################################################
-def run_with_chain(query: str) -> str:
-    print("DEBUG: Starting run_with_chain...")
-    # 1) Classify
     class_result = classification_chain.invoke({"query": query})
     classification = class_result.get("text", "").strip()
     print("DEBUG: Classification =>", classification)
-    # If OutOfScope => refusal => tailor => return
     if classification == "OutOfScope":
         refusal_text = refusal_chain.run({})
         final_refusal = tailor_chain.run({"response": refusal_text})
         return final_refusal.strip()
-    # If Wellness => wellness RAG => if insufficient => web => unify => tailor
     if classification == "Wellness":
-        rag_result = wellness_rag_chain({"query": query})
         csv_answer = rag_result["result"].strip()
         if not csv_answer:
             web_answer = do_web_search(query)
@@ -151,19 +167,23 @@ def run_with_chain(query: str) -> str:
                 web_answer = do_web_search(query)
             else:
                 web_answer = ""
         final_merged = cleaner_chain.merge(kb=csv_answer, web=web_answer)
         final_answer = tailor_chain.run({"response": final_merged})
         return final_answer.strip()
-    # If Brand => brand RAG => tailor => return
     if classification == "Brand":
-        rag_result = brand_rag_chain({"query": query})
         csv_answer = rag_result["result"].strip()
         final_merged = cleaner_chain.merge(kb=csv_answer, web="")
         final_answer = tailor_chain.run({"response": final_merged})
         return final_answer.strip()
-    # fallback
     refusal_text = refusal_chain.run({})
     final_refusal = tailor_chain.run({"response": refusal_text})
     return final_refusal.strip()

 import os
 import getpass
 import pandas as pd
+from typing import Optional, List
 from langchain.docstore.document import Document
 from langchain.embeddings import HuggingFaceEmbeddings
 from smolagents import CodeAgent, DuckDuckGoSearchTool, ManagedAgent, LiteLLMModel
 import litellm
+# Import your classification/refusal/tailor/cleaner chains
 from classification_chain import get_classification_chain
 from refusal_chain import get_refusal_chain
 from tailor_chain import get_tailor_chain
+from cleaner_chain import get_cleaner_chain
+# For RAG chain building
+from langchain.llms.base import LLM
 ###############################################################################
+# 1) Environment: set up keys
 ###############################################################################
 if not os.environ.get("GEMINI_API_KEY"):
     os.environ["GEMINI_API_KEY"] = getpass.getpass("Enter your Gemini API Key: ")
     os.environ["GROQ_API_KEY"] = getpass.getpass("Enter your GROQ API Key: ")
 ###############################################################################
+# 2) Build or Load VectorStore
 ###############################################################################
 def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
     if os.path.exists(store_dir):
         df = pd.read_csv(csv_path)
         df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
         df.columns = df.columns.str.strip()
         if "Answer" in df.columns:
             df.rename(columns={"Answer": "Answers"}, inplace=True)
         if "Question" not in df.columns and "Question " in df.columns:
             df.rename(columns={"Question ": "Question"}, inplace=True)
         if "Question" not in df.columns or "Answers" not in df.columns:
             raise ValueError("CSV must have 'Question' and 'Answers' columns.")
         docs = []
         for _, row in df.iterrows():
             q = str(row["Question"])
             ans = str(row["Answers"])
             doc = Document(page_content=ans, metadata={"question": q})
             docs.append(doc)
         embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
         vectorstore = FAISS.from_documents(docs, embedding=embeddings)
         vectorstore.save_local(store_dir)
 ###############################################################################
 # 3) Build RAG chain for Gemini
 ###############################################################################
 def build_rag_chain(llm_model: LiteLLMModel, vectorstore: FAISS) -> RetrievalQA:
     class GeminiLangChainLLM(LLM):
         def _call(self, prompt: str, stop: Optional[list] = None, **kwargs) -> str:
+            # We'll treat the entire prompt as 'user' content
             messages = [{"role": "user", "content": prompt}]
             return llm_model(messages, stop_sequences=stop)
         @property
         def _llm_type(self) -> str:
             return "custom_gemini"
     retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
     gemini_as_llm = GeminiLangChainLLM()
     rag_chain = RetrievalQA.from_chain_type(
     return rag_chain
 ###############################################################################
+# 4) Initialize your sub-chains
 ###############################################################################
 classification_chain = get_classification_chain()
 refusal_chain = get_refusal_chain()
 tailor_chain = get_tailor_chain()
 cleaner_chain = get_cleaner_chain()
 ###############################################################################
+# 5) Build VectorStores & RAG Chains
 ###############################################################################
 wellness_csv = "AIChatbot.csv"
 brand_csv = "BrandAI.csv"
 wellness_store_dir = "faiss_wellness_store"
 brand_store_dir = "faiss_brand_store"
+gemini_llm = LiteLLMModel(model_id="gemini/gemini-pro", api_key=os.environ.get("GEMINI_API_KEY"))
 wellness_vectorstore = build_or_load_vectorstore(wellness_csv, wellness_store_dir)
 brand_vectorstore = build_or_load_vectorstore(brand_csv, brand_store_dir)
 wellness_rag_chain = build_rag_chain(gemini_llm, wellness_vectorstore)
 brand_rag_chain = build_rag_chain(gemini_llm, brand_vectorstore)
 search_tool = DuckDuckGoSearchTool()
 web_agent = CodeAgent(tools=[search_tool], model=gemini_llm)
 managed_web_agent = ManagedAgent(agent=web_agent, name="web_search", description="Runs web search for you.")
     return response
 ###############################################################################
+# 6) Orchestrator: run_with_chain_context
 ###############################################################################
+def run_with_chain_context(query: str, chat_history: list) -> str:
+    """
+    Like run_with_chain, but also references `chat_history`.
+    We'll do single-turn classification, but pass chat_history
+    to the RAG chain if needed.
+    Example usage:
+        chat_history = []
+        question = "What is Self-Reflection?"
+        resp1 = run_with_chain_context(question, chat_history)
+        # then chat_history.extend([...]) with HumanMessage/AIMessage
+    """
+    print("DEBUG: Starting run_with_chain_context...")
+    # 1) Classification (no multi-turn, just single-turn classification)
     class_result = classification_chain.invoke({"query": query})
     classification = class_result.get("text", "").strip()
     print("DEBUG: Classification =>", classification)
+    # 2) If OutOfScope => refusal => tailor => return
     if classification == "OutOfScope":
         refusal_text = refusal_chain.run({})
         final_refusal = tailor_chain.run({"response": refusal_text})
         return final_refusal.strip()
+    # 3) If Wellness => call wellness_rag_chain with chat_history
     if classification == "Wellness":
+        # pass the conversation to .invoke(...) so it can see it if needed
+        rag_result = wellness_rag_chain.invoke({
+            "input": query,
+            "chat_history": chat_history  # pass the entire list of prior messages
+        })
         csv_answer = rag_result["result"].strip()
         if not csv_answer:
             web_answer = do_web_search(query)
                 web_answer = do_web_search(query)
             else:
                 web_answer = ""
         final_merged = cleaner_chain.merge(kb=csv_answer, web=web_answer)
         final_answer = tailor_chain.run({"response": final_merged})
         return final_answer.strip()
+    # 4) If Brand => brand_rag_chain with chat_history
     if classification == "Brand":
+        rag_result = brand_rag_chain.invoke({
+            "input": query,
+            "chat_history": chat_history
+        })
         csv_answer = rag_result["result"].strip()
         final_merged = cleaner_chain.merge(kb=csv_answer, web="")
         final_answer = tailor_chain.run({"response": final_merged})
         return final_answer.strip()
+    # fallback => refusal
     refusal_text = refusal_chain.run({})
     final_refusal = tailor_chain.run({"response": refusal_text})
     return final_refusal.strip()