Spaces:
Building
Building
File size: 7,642 Bytes
3f61806 24ae72d 3f61806 24ae72d 3f61806 24ae72d 3f61806 24ae72d 3f61806 24ae72d b9ccd0b 24ae72d 3f61806 b9ccd0b 3f61806 b9ccd0b 3f61806 b9ccd0b 3f61806 b9ccd0b 3f61806 b9ccd0b 3f61806 b9ccd0b 3f61806 b9ccd0b 3f61806 b9ccd0b 3f61806 24ae72d b9ccd0b 24ae72d b9ccd0b 24ae72d b9ccd0b 24ae72d b9ccd0b 3f61806 b9ccd0b 3f61806 24ae72d 3f61806 24ae72d 3f61806 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import datetime
import datetime
from langchain_core.tools import tool
from langchain_chroma import Chroma
from typing import Optional, Dict, Any, Callable, List, Tuple
import os
import sys
# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.insert(0, project_root)
from RAG_BOT.logger import logger
# Factory function to create the tool with the vectordb instance enclosed
def create_context_retriever_tool(vectordb: Chroma, k: int = 25, search_type: str = "similarity") -> Callable:
"""
Factory function that creates and returns the murli retriever tool.
The returned tool function has the vectordb instance enclosed via closure.
Args:
vectordb: The initialized Chroma vector database instance.
k: The number of documents to retrieve (default: 25).
search_type: The type of search to perform ('similarity', 'mmr', etc. Default: 'similarity').
Returns:
A callable tool function suitable for LangChain/LangGraph.
"""
@tool(response_format="content_and_artifact")
def retrieve_context(
query: str,
date_filter: Optional[str] = None,
language: Optional[str] = None,
) -> Tuple[str, List[str]]:
"""
Retrieves relevant context snippets from indexed Brahmakumaris murlis based on a user query,
optionally filtering by date and language. Use this tool when the user asks for summaries,
details, specific points, or content related to a particular date (YYYY-MM-DD), topic,
or concept mentioned within the murlis. The tool accesses an underlying vector database
containing the murli content.
Args:
query: The core semantic query about the murli content (e.g., 'summary of teachings', 'points about effort'). The LLM should formulate this based on the user's original question.
date_filter: An optional date string in 'YYYY-MM-DD' format extracted from the user's query to filter documents by date. Provide ONLY if a specific date is mentioned.
language: The language code ('en' for English, 'hi' for Hindi) inferred from the user's original query. ALWAYS try to infer and provide this parameter based on the user's input language.
Returns:
A tuple containing:
1. A status string indicating the outcome (e.g., number of documents retrieved).
2. A list of strings, where each string is the page content of a retrieved document chunk.
Returns ("Error retrieving context.", []) if an error occurs.
"""
logger.info(f"Executing context_retriever_tool for query: '{query}', date: {date_filter}, lang: {language}")
try:
# Normalize query
normalized_query = query.strip().lower()
# Prepare search kwargs
search_kwargs: Dict[str, Any] = {"k": k}
if date_filter:
try:
# Validate and format date
filter_date = datetime.datetime.strptime(date_filter, '%Y-%m-%d')
formatted_date = filter_date.strftime('%Y-%m-%d')
# Use $and if language filter is also present, otherwise just date
date_condition = {"date": formatted_date} # Chroma uses implicit $eq
if language:
lang_condition = {"language": language.lower()}
search_kwargs["filter"] = {"$and": [date_condition, lang_condition]}
logger.info(f"Applying date filter: {formatted_date}, language filter: {language.lower()}")
else:
search_kwargs["filter"] = date_condition
except ValueError:
logger.warning(f"Invalid date format '{date_filter}'. Ignoring date filter.")
# If only language is present after invalid date
if language:
search_kwargs["filter"] = {"language": language.lower()}
elif language: # Only language filter is present
logger.info(f"Applying language filter: {language.lower()}")
search_kwargs["filter"] = {"language": language.lower()}
logger.debug(f"Using search_kwargs: {search_kwargs}")
# Create retriever using the enclosed vectordb
retriever = vectordb.as_retriever(
search_type=search_type,
search_kwargs=search_kwargs
)
# Retrieve documents
retrieved_docs = retriever.invoke(normalized_query)
# Return list of document contents
doc_contents = [doc.page_content for doc in retrieved_docs]
if not doc_contents:
logger.info("No documents found matching the query.")
content_string = "No relevant documents found matching the criteria."
else:
content_string = f"Successfully retrieved {len(retrieved_docs)} document chunks based on the query and filters."
logger.info(f"Retrieved {len(doc_contents)} chunks.")
logger.info(f"First retrieved doc content snippet: {doc_contents[0][:200]}...")
return content_string, doc_contents
except Exception as e:
logger.error(f"Error during context retrieval: {e}", exc_info=True)
return "Error during context retrieval.", [] # Return error status and empty list
# Return the decorated inner function
return retrieve_context
# Example usage (for testing purposes, requires a Chroma instance)
if __name__ == '__main__':
# This is a placeholder for testing. Replace with actual Chroma DB setup.
from langchain_huggingface import HuggingFaceEmbeddings
from RAG_BOT.config import Config
persist_directory = '/home/bk_anupam/code/LLM_agents/RAG_BOT/chroma_db' # Example path
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
try:
vectordb_instance = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
test_query = "What is soul consciousness?"
# 1. Create the tool using the factory, passing config values
murli_tool_instance = create_context_retriever_tool(
vectordb=vectordb_instance,
k=Config.K, # Use K from config
search_type=Config.SEARCH_TYPE # Use search_type from config
)
logger.info(f"Test: Context retriever tool created with k={Config.K} and search_type='{Config.SEARCH_TYPE}'.")
# 2. Invoke the created tool
# The tool now only expects arguments defined in its signature (query, date_filter)
# k and search_type are now part of the tool's internal configuration via the factory
tool_input = {"query": test_query} # Removed k from here
context_result_list = murli_tool_instance.invoke(tool_input)
print(f"Query: {test_query}")
print(f"Retrieved {len(context_result_list)} Context Documents (using k={Config.K}, search_type='{Config.SEARCH_TYPE}'):")
if context_result_list:
print("First document content:")
print(context_result_list[0][:500] + "...") # Print snippet of first doc
else:
print("No documents retrieved.")
print("\nFactory function defined. Tool created and tested.")
except Exception as e:
print(f"Error during test setup or execution: {e}")
# pass # Keep pass if not running example directly
|