chatui-helper / test_vector_db.py
milwright's picture
Comprehensive improvements and modernization
ba11a75
raw
history blame
6.35 kB
#!/usr/bin/env python3
"""
Test script to verify vector database creation functionality
"""
import sys
import os
from pathlib import Path
# Add current directory to path to import modules
sys.path.append(str(Path(__file__).parent))
try:
from rag_tool import RAGTool
from vector_store import VectorStore
from document_processor import DocumentProcessor
print("βœ… Successfully imported all RAG modules")
except ImportError as e:
print(f"❌ Failed to import RAG modules: {e}")
sys.exit(1)
def test_document_processing():
"""Test document processing functionality"""
print("\n=== Testing Document Processing ===")
processor = DocumentProcessor(chunk_size=200, chunk_overlap=50)
# Test with our test document
test_file = "test_document.txt"
if not os.path.exists(test_file):
print(f"❌ Test file {test_file} not found")
return False
try:
chunks = processor.process_file(test_file)
print(f"βœ… Processed {test_file} into {len(chunks)} chunks")
# Show first chunk
if chunks:
first_chunk = chunks[0]
print(f"First chunk preview: {first_chunk.text[:100]}...")
print(f"Chunk metadata: {first_chunk.metadata}")
return True
except Exception as e:
print(f"❌ Failed to process document: {e}")
return False
def test_vector_store():
"""Test vector store functionality"""
print("\n=== Testing Vector Store ===")
try:
# Initialize vector store
vector_store = VectorStore()
print("βœ… Initialized vector store")
# Create test data
test_chunks = [
{
'text': 'Vector databases are used for semantic search',
'chunk_id': 'test1',
'metadata': {'file_name': 'test.txt', 'chunk_index': 0}
},
{
'text': 'Machine learning models convert text to embeddings',
'chunk_id': 'test2',
'metadata': {'file_name': 'test.txt', 'chunk_index': 1}
},
{
'text': 'FAISS provides efficient similarity search capabilities',
'chunk_id': 'test3',
'metadata': {'file_name': 'test.txt', 'chunk_index': 2}
}
]
# Build index
print("Building vector index...")
vector_store.build_index(test_chunks, show_progress=True)
print("βœ… Built vector index")
# Test search
query = "How do vector databases work?"
results = vector_store.search(query, top_k=2)
print(f"Search results for '{query}':")
for i, result in enumerate(results):
print(f" {i+1}. Score: {result.score:.3f} - {result.text[:50]}...")
# Test serialization
serialized = vector_store.serialize()
print(f"βœ… Serialized data size: {len(serialized['index_base64'])} characters")
return True
except Exception as e:
print(f"❌ Failed vector store test: {e}")
import traceback
traceback.print_exc()
return False
def test_rag_tool():
"""Test complete RAG tool functionality"""
print("\n=== Testing RAG Tool ===")
try:
# Initialize RAG tool
rag_tool = RAGTool()
print("βœ… Initialized RAG tool")
# Process test document
test_files = ["test_document.txt"]
result = rag_tool.process_uploaded_files(test_files)
if result['success']:
print(f"βœ… {result['message']}")
# Show summary
summary = result['summary']
print(f"Files processed: {summary['total_files']}")
print(f"Total chunks: {summary['total_chunks']}")
# Test context retrieval
query = "What are the benefits of vector databases?"
context = rag_tool.get_relevant_context(query, max_chunks=2)
if context:
print(f"\nContext for '{query}':")
print(context[:300] + "..." if len(context) > 300 else context)
print("βœ… Successfully retrieved context")
else:
print("⚠️ No context retrieved")
# Test serialization for deployment
serialized_data = rag_tool.get_serialized_data()
if serialized_data:
print("βœ… Successfully serialized RAG data for deployment")
print(f"Serialized keys: {list(serialized_data.keys())}")
else:
print("❌ Failed to serialize RAG data")
return True
else:
print(f"❌ {result['message']}")
return False
except Exception as e:
print(f"❌ Failed RAG tool test: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Run all tests"""
print("=== Vector Database Testing ===")
print("Testing vector database creation and functionality...")
# Check dependencies
print("\n=== Checking Dependencies ===")
try:
import sentence_transformers
import faiss
import fitz # PyMuPDF
print("βœ… All required dependencies available")
except ImportError as e:
print(f"❌ Missing dependency: {e}")
return
# Run tests
tests = [
("Document Processing", test_document_processing),
("Vector Store", test_vector_store),
("RAG Tool", test_rag_tool)
]
results = []
for test_name, test_func in tests:
print(f"\n{'='*20}")
success = test_func()
results.append((test_name, success))
# Summary
print(f"\n{'='*40}")
print("TEST SUMMARY:")
for test_name, success in results:
status = "βœ… PASS" if success else "❌ FAIL"
print(f" {test_name}: {status}")
all_passed = all(success for _, success in results)
if all_passed:
print("\nπŸŽ‰ All tests passed! Vector database functionality is working.")
else:
print("\n⚠️ Some tests failed. Check the output above for details.")
if __name__ == "__main__":
main()