File size: 6,350 Bytes
ba11a75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
#!/usr/bin/env python3
"""
Test script to verify vector database creation functionality
"""

import sys
import os
from pathlib import Path

# Add current directory to path to import modules
sys.path.append(str(Path(__file__).parent))

try:
    from rag_tool import RAGTool
    from vector_store import VectorStore
    from document_processor import DocumentProcessor
    print("βœ… Successfully imported all RAG modules")
except ImportError as e:
    print(f"❌ Failed to import RAG modules: {e}")
    sys.exit(1)

def test_document_processing():
    """Test document processing functionality"""
    print("\n=== Testing Document Processing ===")
    
    processor = DocumentProcessor(chunk_size=200, chunk_overlap=50)
    
    # Test with our test document
    test_file = "test_document.txt"
    if not os.path.exists(test_file):
        print(f"❌ Test file {test_file} not found")
        return False
    
    try:
        chunks = processor.process_file(test_file)
        print(f"βœ… Processed {test_file} into {len(chunks)} chunks")
        
        # Show first chunk
        if chunks:
            first_chunk = chunks[0]
            print(f"First chunk preview: {first_chunk.text[:100]}...")
            print(f"Chunk metadata: {first_chunk.metadata}")
        
        return True
    except Exception as e:
        print(f"❌ Failed to process document: {e}")
        return False

def test_vector_store():
    """Test vector store functionality"""
    print("\n=== Testing Vector Store ===")
    
    try:
        # Initialize vector store
        vector_store = VectorStore()
        print("βœ… Initialized vector store")
        
        # Create test data
        test_chunks = [
            {
                'text': 'Vector databases are used for semantic search',
                'chunk_id': 'test1',
                'metadata': {'file_name': 'test.txt', 'chunk_index': 0}
            },
            {
                'text': 'Machine learning models convert text to embeddings',
                'chunk_id': 'test2', 
                'metadata': {'file_name': 'test.txt', 'chunk_index': 1}
            },
            {
                'text': 'FAISS provides efficient similarity search capabilities',
                'chunk_id': 'test3',
                'metadata': {'file_name': 'test.txt', 'chunk_index': 2}
            }
        ]
        
        # Build index
        print("Building vector index...")
        vector_store.build_index(test_chunks, show_progress=True)
        print("βœ… Built vector index")
        
        # Test search
        query = "How do vector databases work?"
        results = vector_store.search(query, top_k=2)
        
        print(f"Search results for '{query}':")
        for i, result in enumerate(results):
            print(f"  {i+1}. Score: {result.score:.3f} - {result.text[:50]}...")
        
        # Test serialization
        serialized = vector_store.serialize()
        print(f"βœ… Serialized data size: {len(serialized['index_base64'])} characters")
        
        return True
        
    except Exception as e:
        print(f"❌ Failed vector store test: {e}")
        import traceback
        traceback.print_exc()
        return False

def test_rag_tool():
    """Test complete RAG tool functionality"""
    print("\n=== Testing RAG Tool ===")
    
    try:
        # Initialize RAG tool
        rag_tool = RAGTool()
        print("βœ… Initialized RAG tool")
        
        # Process test document
        test_files = ["test_document.txt"]
        result = rag_tool.process_uploaded_files(test_files)
        
        if result['success']:
            print(f"βœ… {result['message']}")
            
            # Show summary
            summary = result['summary']
            print(f"Files processed: {summary['total_files']}")
            print(f"Total chunks: {summary['total_chunks']}")
            
            # Test context retrieval
            query = "What are the benefits of vector databases?"
            context = rag_tool.get_relevant_context(query, max_chunks=2)
            
            if context:
                print(f"\nContext for '{query}':")
                print(context[:300] + "..." if len(context) > 300 else context)
                print("βœ… Successfully retrieved context")
            else:
                print("⚠️ No context retrieved")
            
            # Test serialization for deployment
            serialized_data = rag_tool.get_serialized_data()
            if serialized_data:
                print("βœ… Successfully serialized RAG data for deployment")
                print(f"Serialized keys: {list(serialized_data.keys())}")
            else:
                print("❌ Failed to serialize RAG data")
            
            return True
        else:
            print(f"❌ {result['message']}")
            return False
            
    except Exception as e:
        print(f"❌ Failed RAG tool test: {e}")
        import traceback
        traceback.print_exc()
        return False

def main():
    """Run all tests"""
    print("=== Vector Database Testing ===")
    print("Testing vector database creation and functionality...")
    
    # Check dependencies
    print("\n=== Checking Dependencies ===")
    try:
        import sentence_transformers
        import faiss
        import fitz  # PyMuPDF
        print("βœ… All required dependencies available")
    except ImportError as e:
        print(f"❌ Missing dependency: {e}")
        return
    
    # Run tests
    tests = [
        ("Document Processing", test_document_processing),
        ("Vector Store", test_vector_store), 
        ("RAG Tool", test_rag_tool)
    ]
    
    results = []
    for test_name, test_func in tests:
        print(f"\n{'='*20}")
        success = test_func()
        results.append((test_name, success))
    
    # Summary
    print(f"\n{'='*40}")
    print("TEST SUMMARY:")
    for test_name, success in results:
        status = "βœ… PASS" if success else "❌ FAIL"
        print(f"  {test_name}: {status}")
    
    all_passed = all(success for _, success in results)
    if all_passed:
        print("\nπŸŽ‰ All tests passed! Vector database functionality is working.")
    else:
        print("\n⚠️ Some tests failed. Check the output above for details.")

if __name__ == "__main__":
    main()