Spaces:
Building
Building
# /home/bk_anupam/code/LLM_agents/RAG_BOT/vector_store_cli.py | |
import sys | |
import os | |
# Add the project root to the Python path to allow imports from RAG_BOT | |
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) | |
sys.path.insert(0, project_root) | |
from RAG_BOT.logger import logger | |
from RAG_BOT.config import Config | |
from RAG_BOT.vector_store import VectorStore | |
from RAG_BOT.document_indexer import DocumentIndexer | |
from RAG_BOT.file_manager import FileManager | |
def test_query_index(): | |
""" | |
Test querying the index. | |
""" | |
# Initialize with the actual VectorStore class | |
vs = VectorStore(persist_directory=Config().VECTOR_STORE_PATH) | |
query = "दूसरों की चेकिंग करने के बारे में बाबा ने मुरली में क्या बताया है?" | |
test_date = "1992-09-24" | |
logger.info(f"Testing query with date filter: {test_date}") | |
try: | |
result = vs.query_index(query, k=10, date_filter=test_date) | |
print("\n--- Query Result ---") | |
print(result) | |
print("--- End Query Result ---\n") | |
except ValueError as e: | |
logger.error(f"Query failed: {e}") | |
except Exception as e: | |
logger.error(f"An unexpected error occurred during query test: {e}", exc_info=True) | |
def index_data(): | |
""" | |
Build the index for all PDFs in 'english' and HTM files in 'hindi' subdirectories | |
of the configured DATA_PATH. | |
""" | |
config = Config() | |
data_dir = config.DATA_PATH | |
if not data_dir: | |
logger.error("DATA_PATH is not set in the configuration. Cannot start indexing.") | |
return | |
logger.info(f"Starting MANUAL indexing process for base directory: {data_dir}") | |
# Initialize components | |
# These will use the actual classes once vector_store.py is refactored | |
vector_store_instance = VectorStore(persist_directory=config.VECTOR_STORE_PATH) | |
file_manager_instance = FileManager() | |
document_indexer_instance = DocumentIndexer(vector_store_instance, file_manager_instance) | |
document_indexer_instance.index_directory(data_dir) | |
logger.info("Manual indexing process complete.") | |
# Optionally log metadata | |
vector_store_instance.log_all_indexed_metadata() | |
if __name__ == "__main__": | |
# Example: Choose to index data or test a query. | |
# For a real CLI, you might use argparse here to select actions. | |
# To run indexing: | |
# index_data() | |
# To test querying (ensure data is indexed first): | |
# test_query_index() | |
# To log all metadata (ensure data is indexed first): | |
vs = VectorStore() | |
vs.log_all_indexed_metadata() | |