Spaces:
Running
Running
File size: 6,350 Bytes
ba11a75 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
#!/usr/bin/env python3
"""
Test script to verify vector database creation functionality
"""
import sys
import os
from pathlib import Path
# Add current directory to path to import modules
sys.path.append(str(Path(__file__).parent))
try:
from rag_tool import RAGTool
from vector_store import VectorStore
from document_processor import DocumentProcessor
print("β
Successfully imported all RAG modules")
except ImportError as e:
print(f"β Failed to import RAG modules: {e}")
sys.exit(1)
def test_document_processing():
"""Test document processing functionality"""
print("\n=== Testing Document Processing ===")
processor = DocumentProcessor(chunk_size=200, chunk_overlap=50)
# Test with our test document
test_file = "test_document.txt"
if not os.path.exists(test_file):
print(f"β Test file {test_file} not found")
return False
try:
chunks = processor.process_file(test_file)
print(f"β
Processed {test_file} into {len(chunks)} chunks")
# Show first chunk
if chunks:
first_chunk = chunks[0]
print(f"First chunk preview: {first_chunk.text[:100]}...")
print(f"Chunk metadata: {first_chunk.metadata}")
return True
except Exception as e:
print(f"β Failed to process document: {e}")
return False
def test_vector_store():
"""Test vector store functionality"""
print("\n=== Testing Vector Store ===")
try:
# Initialize vector store
vector_store = VectorStore()
print("β
Initialized vector store")
# Create test data
test_chunks = [
{
'text': 'Vector databases are used for semantic search',
'chunk_id': 'test1',
'metadata': {'file_name': 'test.txt', 'chunk_index': 0}
},
{
'text': 'Machine learning models convert text to embeddings',
'chunk_id': 'test2',
'metadata': {'file_name': 'test.txt', 'chunk_index': 1}
},
{
'text': 'FAISS provides efficient similarity search capabilities',
'chunk_id': 'test3',
'metadata': {'file_name': 'test.txt', 'chunk_index': 2}
}
]
# Build index
print("Building vector index...")
vector_store.build_index(test_chunks, show_progress=True)
print("β
Built vector index")
# Test search
query = "How do vector databases work?"
results = vector_store.search(query, top_k=2)
print(f"Search results for '{query}':")
for i, result in enumerate(results):
print(f" {i+1}. Score: {result.score:.3f} - {result.text[:50]}...")
# Test serialization
serialized = vector_store.serialize()
print(f"β
Serialized data size: {len(serialized['index_base64'])} characters")
return True
except Exception as e:
print(f"β Failed vector store test: {e}")
import traceback
traceback.print_exc()
return False
def test_rag_tool():
"""Test complete RAG tool functionality"""
print("\n=== Testing RAG Tool ===")
try:
# Initialize RAG tool
rag_tool = RAGTool()
print("β
Initialized RAG tool")
# Process test document
test_files = ["test_document.txt"]
result = rag_tool.process_uploaded_files(test_files)
if result['success']:
print(f"β
{result['message']}")
# Show summary
summary = result['summary']
print(f"Files processed: {summary['total_files']}")
print(f"Total chunks: {summary['total_chunks']}")
# Test context retrieval
query = "What are the benefits of vector databases?"
context = rag_tool.get_relevant_context(query, max_chunks=2)
if context:
print(f"\nContext for '{query}':")
print(context[:300] + "..." if len(context) > 300 else context)
print("β
Successfully retrieved context")
else:
print("β οΈ No context retrieved")
# Test serialization for deployment
serialized_data = rag_tool.get_serialized_data()
if serialized_data:
print("β
Successfully serialized RAG data for deployment")
print(f"Serialized keys: {list(serialized_data.keys())}")
else:
print("β Failed to serialize RAG data")
return True
else:
print(f"β {result['message']}")
return False
except Exception as e:
print(f"β Failed RAG tool test: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Run all tests"""
print("=== Vector Database Testing ===")
print("Testing vector database creation and functionality...")
# Check dependencies
print("\n=== Checking Dependencies ===")
try:
import sentence_transformers
import faiss
import fitz # PyMuPDF
print("β
All required dependencies available")
except ImportError as e:
print(f"β Missing dependency: {e}")
return
# Run tests
tests = [
("Document Processing", test_document_processing),
("Vector Store", test_vector_store),
("RAG Tool", test_rag_tool)
]
results = []
for test_name, test_func in tests:
print(f"\n{'='*20}")
success = test_func()
results.append((test_name, success))
# Summary
print(f"\n{'='*40}")
print("TEST SUMMARY:")
for test_name, success in results:
status = "β
PASS" if success else "β FAIL"
print(f" {test_name}: {status}")
all_passed = all(success for _, success in results)
if all_passed:
print("\nπ All tests passed! Vector database functionality is working.")
else:
print("\nβ οΈ Some tests failed. Check the output above for details.")
if __name__ == "__main__":
main() |