chatui-helper / test_rag_fix.py
milwright's picture
Fix RAG processing crashes with multiprocessing and memory optimizations
525ef5c
raw
history blame
6.03 kB
#!/usr/bin/env python3
"""
Test script to verify RAG functionality fixes
"""
import os
import tempfile
import warnings
from pathlib import Path
# Suppress known warnings
warnings.filterwarnings("ignore", message=".*use_auth_token.*")
warnings.filterwarnings("ignore", message=".*urllib3.*")
warnings.filterwarnings("ignore", message=".*resource_tracker.*")
# Set environment variables to prevent multiprocessing issues
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
def test_rag_dependencies():
"""Test that RAG dependencies are available"""
print("Testing RAG dependencies...")
try:
import sentence_transformers
print("βœ… sentence-transformers available")
except ImportError:
print("❌ sentence-transformers not available")
return False
try:
import faiss
print("βœ… faiss-cpu available")
except ImportError:
print("❌ faiss-cpu not available")
return False
try:
import fitz # PyMuPDF
print("βœ… PyMuPDF available")
except ImportError:
print("⚠️ PyMuPDF not available (PDF processing disabled)")
try:
from docx import Document
print("βœ… python-docx available")
except ImportError:
print("⚠️ python-docx not available (DOCX processing disabled)")
return True
def test_vector_store_initialization():
"""Test vector store initialization with improved error handling"""
print("\nTesting vector store initialization...")
try:
from vector_store import VectorStore
# Test with CPU-only settings
store = VectorStore(embedding_model="all-MiniLM-L6-v2")
print("βœ… VectorStore created successfully")
# Test a small embedding operation
test_texts = ["This is a test sentence.", "Another test sentence."]
embeddings = store.create_embeddings(test_texts)
print(f"βœ… Created embeddings: shape {embeddings.shape}")
return True
except Exception as e:
print(f"❌ VectorStore initialization failed: {e}")
return False
def test_document_processing():
"""Test document processing with a simple text file"""
print("\nTesting document processing...")
try:
from document_processor import DocumentProcessor
# Create a temporary test file
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write("This is a test document for RAG processing. ")
f.write("It contains multiple sentences that should be processed into chunks. ")
f.write("Each chunk should have proper metadata and be ready for embedding.")
test_file = f.name
try:
processor = DocumentProcessor(chunk_size=50, chunk_overlap=10)
chunks = processor.process_file(test_file)
print(f"βœ… Created {len(chunks)} chunks from test document")
if chunks:
print(f" First chunk: {chunks[0].text[:50]}...")
print(f" Metadata keys: {list(chunks[0].metadata.keys())}")
return True
finally:
# Clean up test file
os.unlink(test_file)
except Exception as e:
print(f"❌ Document processing failed: {e}")
return False
def test_rag_tool_integration():
"""Test the complete RAG tool integration"""
print("\nTesting complete RAG tool integration...")
try:
from rag_tool import RAGTool
# Create a temporary test file
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write("RAG integration test document. ")
f.write("This document tests the complete RAG pipeline from file processing to vector search. ")
f.write("The system should handle this without crashing the server.")
test_file = f.name
try:
rag_tool = RAGTool()
result = rag_tool.process_uploaded_files([test_file])
if result['success']:
print(f"βœ… RAG processing succeeded: {result['message']}")
print(f" Files processed: {len(result['summary']['files_processed'])}")
print(f" Total chunks: {result['summary']['total_chunks']}")
# Test search functionality
context = rag_tool.get_relevant_context("test document")
if context:
print(f"βœ… Search functionality working: {context[:100]}...")
else:
print("⚠️ Search returned no results")
return True
else:
print(f"❌ RAG processing failed: {result['message']}")
return False
finally:
# Clean up test file
os.unlink(test_file)
except Exception as e:
print(f"❌ RAG tool integration failed: {e}")
return False
def main():
"""Run all RAG tests"""
print("πŸš€ Testing RAG functionality fixes...")
print("=" * 50)
tests = [
test_rag_dependencies,
test_vector_store_initialization,
test_document_processing,
test_rag_tool_integration
]
passed = 0
total = len(tests)
for test in tests:
try:
if test():
passed += 1
except Exception as e:
print(f"❌ Test failed with exception: {e}")
print("\n" + "=" * 50)
print(f"πŸ“Š Test Results: {passed}/{total} tests passed")
if passed == total:
print("πŸŽ‰ All tests passed! RAG functionality should work correctly.")
return True
else:
print("⚠️ Some tests failed. Check error messages above.")
return False
if __name__ == "__main__":
main()