Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
Test script to verify vector database creation functionality | |
""" | |
import sys | |
import os | |
from pathlib import Path | |
# Add current directory to path to import modules | |
sys.path.append(str(Path(__file__).parent)) | |
try: | |
from rag_tool import RAGTool | |
from vector_store import VectorStore | |
from document_processor import DocumentProcessor | |
print("β Successfully imported all RAG modules") | |
except ImportError as e: | |
print(f"β Failed to import RAG modules: {e}") | |
sys.exit(1) | |
def test_document_processing(): | |
"""Test document processing functionality""" | |
print("\n=== Testing Document Processing ===") | |
processor = DocumentProcessor(chunk_size=200, chunk_overlap=50) | |
# Test with our test document | |
test_file = "test_document.txt" | |
if not os.path.exists(test_file): | |
print(f"β Test file {test_file} not found") | |
return False | |
try: | |
chunks = processor.process_file(test_file) | |
print(f"β Processed {test_file} into {len(chunks)} chunks") | |
# Show first chunk | |
if chunks: | |
first_chunk = chunks[0] | |
print(f"First chunk preview: {first_chunk.text[:100]}...") | |
print(f"Chunk metadata: {first_chunk.metadata}") | |
return True | |
except Exception as e: | |
print(f"β Failed to process document: {e}") | |
return False | |
def test_vector_store(): | |
"""Test vector store functionality""" | |
print("\n=== Testing Vector Store ===") | |
try: | |
# Initialize vector store | |
vector_store = VectorStore() | |
print("β Initialized vector store") | |
# Create test data | |
test_chunks = [ | |
{ | |
'text': 'Vector databases are used for semantic search', | |
'chunk_id': 'test1', | |
'metadata': {'file_name': 'test.txt', 'chunk_index': 0} | |
}, | |
{ | |
'text': 'Machine learning models convert text to embeddings', | |
'chunk_id': 'test2', | |
'metadata': {'file_name': 'test.txt', 'chunk_index': 1} | |
}, | |
{ | |
'text': 'FAISS provides efficient similarity search capabilities', | |
'chunk_id': 'test3', | |
'metadata': {'file_name': 'test.txt', 'chunk_index': 2} | |
} | |
] | |
# Build index | |
print("Building vector index...") | |
vector_store.build_index(test_chunks, show_progress=True) | |
print("β Built vector index") | |
# Test search | |
query = "How do vector databases work?" | |
results = vector_store.search(query, top_k=2) | |
print(f"Search results for '{query}':") | |
for i, result in enumerate(results): | |
print(f" {i+1}. Score: {result.score:.3f} - {result.text[:50]}...") | |
# Test serialization | |
serialized = vector_store.serialize() | |
print(f"β Serialized data size: {len(serialized['index_base64'])} characters") | |
return True | |
except Exception as e: | |
print(f"β Failed vector store test: {e}") | |
import traceback | |
traceback.print_exc() | |
return False | |
def test_rag_tool(): | |
"""Test complete RAG tool functionality""" | |
print("\n=== Testing RAG Tool ===") | |
try: | |
# Initialize RAG tool | |
rag_tool = RAGTool() | |
print("β Initialized RAG tool") | |
# Process test document | |
test_files = ["test_document.txt"] | |
result = rag_tool.process_uploaded_files(test_files) | |
if result['success']: | |
print(f"β {result['message']}") | |
# Show summary | |
summary = result['summary'] | |
print(f"Files processed: {summary['total_files']}") | |
print(f"Total chunks: {summary['total_chunks']}") | |
# Test context retrieval | |
query = "What are the benefits of vector databases?" | |
context = rag_tool.get_relevant_context(query, max_chunks=2) | |
if context: | |
print(f"\nContext for '{query}':") | |
print(context[:300] + "..." if len(context) > 300 else context) | |
print("β Successfully retrieved context") | |
else: | |
print("β οΈ No context retrieved") | |
# Test serialization for deployment | |
serialized_data = rag_tool.get_serialized_data() | |
if serialized_data: | |
print("β Successfully serialized RAG data for deployment") | |
print(f"Serialized keys: {list(serialized_data.keys())}") | |
else: | |
print("β Failed to serialize RAG data") | |
return True | |
else: | |
print(f"β {result['message']}") | |
return False | |
except Exception as e: | |
print(f"β Failed RAG tool test: {e}") | |
import traceback | |
traceback.print_exc() | |
return False | |
def main(): | |
"""Run all tests""" | |
print("=== Vector Database Testing ===") | |
print("Testing vector database creation and functionality...") | |
# Check dependencies | |
print("\n=== Checking Dependencies ===") | |
try: | |
import sentence_transformers | |
import faiss | |
import fitz # PyMuPDF | |
print("β All required dependencies available") | |
except ImportError as e: | |
print(f"β Missing dependency: {e}") | |
return | |
# Run tests | |
tests = [ | |
("Document Processing", test_document_processing), | |
("Vector Store", test_vector_store), | |
("RAG Tool", test_rag_tool) | |
] | |
results = [] | |
for test_name, test_func in tests: | |
print(f"\n{'='*20}") | |
success = test_func() | |
results.append((test_name, success)) | |
# Summary | |
print(f"\n{'='*40}") | |
print("TEST SUMMARY:") | |
for test_name, success in results: | |
status = "β PASS" if success else "β FAIL" | |
print(f" {test_name}: {status}") | |
all_passed = all(success for _, success in results) | |
if all_passed: | |
print("\nπ All tests passed! Vector database functionality is working.") | |
else: | |
print("\nβ οΈ Some tests failed. Check the output above for details.") | |
if __name__ == "__main__": | |
main() |