ChatBotAgenticRAG_dup

Sleeping

App Files Files Community

Phoenix21 commited on Jan 12

Commit

b0739e4

verified ·

1 Parent(s): c09fe62

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +26 -25

pipeline.py CHANGED Viewed

@@ -4,14 +4,13 @@ import spacy
 import pandas as pd
 from typing import Optional
 import subprocess
-import asyncio  # Needed for managing async tasks
 from langchain.llms.base import LLM
 from langchain.docstore.document import Document
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.chains import RetrievalQA
 from smolagents import CodeAgent, DuckDuckGoSearchTool, ManagedAgent, LiteLLMModel
-from pydantic_ai import Agent  # Import Pydantic AI's Agent
 from mistralai import Mistral
 from langchain.prompts import PromptTemplate
@@ -26,9 +25,6 @@ from prompts import classification_prompt, refusal_prompt, tailor_prompt
 mistral_api_key = os.environ.get("MISTRAL_API_KEY")
 client = Mistral(api_key=mistral_api_key)
-# Initialize Pydantic AI Agent (for text validation)
-pydantic_agent = Agent('mistral:mistral-large-latest', result_type=str)
 # Load spaCy model for NER and download it if not already installed
 def install_spacy_model():
     try:
@@ -67,19 +63,31 @@ def classify_query(query: str) -> str:
     classification = class_result.get("text", "").strip()
     return classification if classification != "OutOfScope" else "OutOfScope"
-# Function to moderate text using Mistral moderation API (synchronous version)
-def moderate_text(query: str) -> str:
     try:
-        # Use Pydantic AI to validate the text
-        pydantic_agent.run_sync(query)  # Use sync run for Pydantic validation
-    except Exception as e:
         print(f"Error validating text: {e}")
         return "Invalid text format."
     # Call the Mistral moderation API
     response = client.classifiers.moderate_chat(
         model="mistral-moderation-latest",
-        inputs=[{"role": "user", "content": query}]
     )
     # Assuming the response is an object of type 'ClassificationResponse',
@@ -93,7 +101,7 @@ def moderate_text(query: str) -> str:
            categories.get("selfharm", False):
             return "OutOfScope"
-    return query
 # Function to build or load the vector store from CSV data
@@ -147,7 +155,7 @@ def build_rag_chain(llm_model: LiteLLMModel, vectorstore: FAISS) -> RetrievalQA:
     return rag_chain
 # Function to perform web search using DuckDuckGo
-async def do_web_search(query: str) -> str:
     search_tool = DuckDuckGoSearchTool()
     web_agent = CodeAgent(tools=[search_tool], model=pydantic_agent)
     managed_web_agent = ManagedAgent(agent=web_agent, name="web_search", description="Runs web search for you.")
@@ -158,13 +166,13 @@ async def do_web_search(query: str) -> str:
     return response
 # Function to combine web and knowledge base responses
-async def merge_responses(kb_answer: str, web_answer: str) -> str:
     # Merge both answers with a cohesive response
     final_answer = f"Knowledge Base Answer: {kb_answer}\n\nWeb Search Result: {web_answer}"
     return final_answer.strip()
 # Orchestrate the entire workflow
-async def run_async_pipeline(query: str) -> str:
     # Moderate the query for harmful content (sync)
     moderated_query = moderate_text(query)
     if moderated_query == "OutOfScope":
@@ -183,15 +191,15 @@ async def run_async_pipeline(query: str) -> str:
         csv_answer = rag_result["result"].strip()
         web_answer = ""  # Empty if we found an answer from the knowledge base
         if not csv_answer:
-            web_answer = await do_web_search(moderated_query)
-        final_merged = await merge_responses(csv_answer, web_answer)
         final_answer = tailor_chain.run({"response": final_merged})
         return final_answer.strip()
     if classification == "Brand":
         rag_result = brand_rag_chain({"query": moderated_query})
         csv_answer = rag_result["result"].strip()
-        final_merged = await merge_responses(csv_answer, "")
         final_answer = tailor_chain.run({"response": final_merged})
         return final_answer.strip()
@@ -199,13 +207,6 @@ async def run_async_pipeline(query: str) -> str:
     final_refusal = tailor_chain.run({"response": refusal_text})
     return final_refusal.strip()
-# Run the pipeline with the event loop
-import asyncio
-def run_with_chain(query: str) -> str:
-    # Use asyncio.run to run the async pipeline, which ensures a fresh event loop
-    return asyncio.run(run_async_pipeline(query))
 # Initialize chains here
 classification_chain = get_classification_chain()
 refusal_chain = get_refusal_chain()

 import pandas as pd
 from typing import Optional
 import subprocess
 from langchain.llms.base import LLM
 from langchain.docstore.document import Document
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.chains import RetrievalQA
 from smolagents import CodeAgent, DuckDuckGoSearchTool, ManagedAgent, LiteLLMModel
+from pydantic import BaseModel, ValidationError  # Import Pydantic for text validation
 from mistralai import Mistral
 from langchain.prompts import PromptTemplate
 mistral_api_key = os.environ.get("MISTRAL_API_KEY")
 client = Mistral(api_key=mistral_api_key)
 # Load spaCy model for NER and download it if not already installed
 def install_spacy_model():
     try:
     classification = class_result.get("text", "").strip()
     return classification if classification != "OutOfScope" else "OutOfScope"
+# Pydantic model for text validation
+class TextInputModel(BaseModel):
+    text: str
+# Function to validate the text input using Pydantic
+def validate_text(query: str) -> str:
     try:
+        # Attempt to validate the query as a text input
+        TextInputModel(text=query)
+        return query
+    except ValidationError as e:
         print(f"Error validating text: {e}")
         return "Invalid text format."
+# Function to moderate text using Mistral moderation API (synchronous version)
+def moderate_text(query: str) -> str:
+    # Validate the text using Pydantic
+    validated_text = validate_text(query)
+    if validated_text == "Invalid text format.":
+        return validated_text
     # Call the Mistral moderation API
     response = client.classifiers.moderate_chat(
         model="mistral-moderation-latest",
+        inputs=[{"role": "user", "content": validated_text}]
     )
     # Assuming the response is an object of type 'ClassificationResponse',
            categories.get("selfharm", False):
             return "OutOfScope"
+    return validated_text
 # Function to build or load the vector store from CSV data
     return rag_chain
 # Function to perform web search using DuckDuckGo
+def do_web_search(query: str) -> str:
     search_tool = DuckDuckGoSearchTool()
     web_agent = CodeAgent(tools=[search_tool], model=pydantic_agent)
     managed_web_agent = ManagedAgent(agent=web_agent, name="web_search", description="Runs web search for you.")
     return response
 # Function to combine web and knowledge base responses
+def merge_responses(kb_answer: str, web_answer: str) -> str:
     # Merge both answers with a cohesive response
     final_answer = f"Knowledge Base Answer: {kb_answer}\n\nWeb Search Result: {web_answer}"
     return final_answer.strip()
 # Orchestrate the entire workflow
+def run_pipeline(query: str) -> str:
     # Moderate the query for harmful content (sync)
     moderated_query = moderate_text(query)
     if moderated_query == "OutOfScope":
         csv_answer = rag_result["result"].strip()
         web_answer = ""  # Empty if we found an answer from the knowledge base
         if not csv_answer:
+            web_answer = do_web_search(moderated_query)
+        final_merged = merge_responses(csv_answer, web_answer)
         final_answer = tailor_chain.run({"response": final_merged})
         return final_answer.strip()
     if classification == "Brand":
         rag_result = brand_rag_chain({"query": moderated_query})
         csv_answer = rag_result["result"].strip()
+        final_merged = merge_responses(csv_answer, "")
         final_answer = tailor_chain.run({"response": final_merged})
         return final_answer.strip()
     final_refusal = tailor_chain.run({"response": refusal_text})
     return final_refusal.strip()
 # Initialize chains here
 classification_chain = get_classification_chain()
 refusal_chain = get_refusal_chain()