File size: 2,658 Bytes
7361b6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# /home/bk_anupam/code/LLM_agents/RAG_BOT/vector_store_cli.py
import sys
import os

# Add the project root to the Python path to allow imports from RAG_BOT
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.insert(0, project_root)

from RAG_BOT.logger import logger
from RAG_BOT.config import Config
from RAG_BOT.vector_store import VectorStore
from RAG_BOT.document_indexer import DocumentIndexer
from RAG_BOT.file_manager import FileManager


def test_query_index():
    """
    Test querying the index.
    """
    # Initialize with the actual VectorStore class
    vs = VectorStore(persist_directory=Config().VECTOR_STORE_PATH) 
    query = "दूसरों की चेकिंग करने के बारे में बाबा ने मुरली में क्या बताया है?"
    test_date = "1992-09-24" 
    logger.info(f"Testing query with date filter: {test_date}")
    try:
        result = vs.query_index(query, k=10, date_filter=test_date)
        print("\n--- Query Result ---")
        print(result)
        print("--- End Query Result ---\n")
    except ValueError as e:
        logger.error(f"Query failed: {e}")
    except Exception as e:
        logger.error(f"An unexpected error occurred during query test: {e}", exc_info=True)


def index_data():
    """
    Build the index for all PDFs in 'english' and HTM files in 'hindi' subdirectories
    of the configured DATA_PATH.
    """
    config = Config()
    data_dir = config.DATA_PATH
    if not data_dir:
        logger.error("DATA_PATH is not set in the configuration. Cannot start indexing.")
        return

    logger.info(f"Starting MANUAL indexing process for base directory: {data_dir}")

    # Initialize components
    # These will use the actual classes once vector_store.py is refactored
    vector_store_instance = VectorStore(persist_directory=config.VECTOR_STORE_PATH)
    file_manager_instance = FileManager()
    document_indexer_instance = DocumentIndexer(vector_store_instance, file_manager_instance)
    
    document_indexer_instance.index_directory(data_dir)

    logger.info("Manual indexing process complete.")
    # Optionally log metadata
    vector_store_instance.log_all_indexed_metadata()


if __name__ == "__main__":
    # Example: Choose to index data or test a query.
    # For a real CLI, you might use argparse here to select actions.
    
    # To run indexing:
    # index_data()

    # To test querying (ensure data is indexed first):
    # test_query_index()

    # To log all metadata (ensure data is indexed first):
    vs = VectorStore()
    vs.log_all_indexed_metadata()