Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
Test script to verify RAG functionality fixes | |
""" | |
import os | |
import tempfile | |
import warnings | |
from pathlib import Path | |
# Suppress known warnings | |
warnings.filterwarnings("ignore", message=".*use_auth_token.*") | |
warnings.filterwarnings("ignore", message=".*urllib3.*") | |
warnings.filterwarnings("ignore", message=".*resource_tracker.*") | |
# Set environment variables to prevent multiprocessing issues | |
os.environ['TOKENIZERS_PARALLELISM'] = 'false' | |
def test_rag_dependencies(): | |
"""Test that RAG dependencies are available""" | |
print("Testing RAG dependencies...") | |
try: | |
import sentence_transformers | |
print("β sentence-transformers available") | |
except ImportError: | |
print("β sentence-transformers not available") | |
return False | |
try: | |
import faiss | |
print("β faiss-cpu available") | |
except ImportError: | |
print("β faiss-cpu not available") | |
return False | |
try: | |
import fitz # PyMuPDF | |
print("β PyMuPDF available") | |
except ImportError: | |
print("β οΈ PyMuPDF not available (PDF processing disabled)") | |
try: | |
from docx import Document | |
print("β python-docx available") | |
except ImportError: | |
print("β οΈ python-docx not available (DOCX processing disabled)") | |
return True | |
def test_vector_store_initialization(): | |
"""Test vector store initialization with improved error handling""" | |
print("\nTesting vector store initialization...") | |
try: | |
from vector_store import VectorStore | |
# Test with CPU-only settings | |
store = VectorStore(embedding_model="all-MiniLM-L6-v2") | |
print("β VectorStore created successfully") | |
# Test a small embedding operation | |
test_texts = ["This is a test sentence.", "Another test sentence."] | |
embeddings = store.create_embeddings(test_texts) | |
print(f"β Created embeddings: shape {embeddings.shape}") | |
return True | |
except Exception as e: | |
print(f"β VectorStore initialization failed: {e}") | |
return False | |
def test_document_processing(): | |
"""Test document processing with a simple text file""" | |
print("\nTesting document processing...") | |
try: | |
from document_processor import DocumentProcessor | |
# Create a temporary test file | |
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: | |
f.write("This is a test document for RAG processing. ") | |
f.write("It contains multiple sentences that should be processed into chunks. ") | |
f.write("Each chunk should have proper metadata and be ready for embedding.") | |
test_file = f.name | |
try: | |
processor = DocumentProcessor(chunk_size=50, chunk_overlap=10) | |
chunks = processor.process_file(test_file) | |
print(f"β Created {len(chunks)} chunks from test document") | |
if chunks: | |
print(f" First chunk: {chunks[0].text[:50]}...") | |
print(f" Metadata keys: {list(chunks[0].metadata.keys())}") | |
return True | |
finally: | |
# Clean up test file | |
os.unlink(test_file) | |
except Exception as e: | |
print(f"β Document processing failed: {e}") | |
return False | |
def test_rag_tool_integration(): | |
"""Test the complete RAG tool integration""" | |
print("\nTesting complete RAG tool integration...") | |
try: | |
from rag_tool import RAGTool | |
# Create a temporary test file | |
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: | |
f.write("RAG integration test document. ") | |
f.write("This document tests the complete RAG pipeline from file processing to vector search. ") | |
f.write("The system should handle this without crashing the server.") | |
test_file = f.name | |
try: | |
rag_tool = RAGTool() | |
result = rag_tool.process_uploaded_files([test_file]) | |
if result['success']: | |
print(f"β RAG processing succeeded: {result['message']}") | |
print(f" Files processed: {len(result['summary']['files_processed'])}") | |
print(f" Total chunks: {result['summary']['total_chunks']}") | |
# Test search functionality | |
context = rag_tool.get_relevant_context("test document") | |
if context: | |
print(f"β Search functionality working: {context[:100]}...") | |
else: | |
print("β οΈ Search returned no results") | |
return True | |
else: | |
print(f"β RAG processing failed: {result['message']}") | |
return False | |
finally: | |
# Clean up test file | |
os.unlink(test_file) | |
except Exception as e: | |
print(f"β RAG tool integration failed: {e}") | |
return False | |
def main(): | |
"""Run all RAG tests""" | |
print("π Testing RAG functionality fixes...") | |
print("=" * 50) | |
tests = [ | |
test_rag_dependencies, | |
test_vector_store_initialization, | |
test_document_processing, | |
test_rag_tool_integration | |
] | |
passed = 0 | |
total = len(tests) | |
for test in tests: | |
try: | |
if test(): | |
passed += 1 | |
except Exception as e: | |
print(f"β Test failed with exception: {e}") | |
print("\n" + "=" * 50) | |
print(f"π Test Results: {passed}/{total} tests passed") | |
if passed == total: | |
print("π All tests passed! RAG functionality should work correctly.") | |
return True | |
else: | |
print("β οΈ Some tests failed. Check error messages above.") | |
return False | |
if __name__ == "__main__": | |
main() |