Spaces:

milwright
/

chatui-helper

Running

App Files Files Community

chatui-helper / test_rag_fix.py

milwright

Fix RAG processing crashes with multiprocessing and memory optimizations

525ef5c 17 days ago

raw

history blame

6.03 kB

	#!/usr/bin/env python3
	"""
	Test script to verify RAG functionality fixes
	"""

	import os
	import tempfile
	import warnings
	from pathlib import Path

	# Suppress known warnings
	warnings.filterwarnings("ignore", message=".use_auth_token.")
	warnings.filterwarnings("ignore", message=".urllib3.")
	warnings.filterwarnings("ignore", message=".resource_tracker.")

	# Set environment variables to prevent multiprocessing issues
	os.environ['TOKENIZERS_PARALLELISM'] = 'false'

	def test_rag_dependencies():
	"""Test that RAG dependencies are available"""
	print("Testing RAG dependencies...")

	try:
	import sentence_transformers
	print("✅ sentence-transformers available")
	except ImportError:
	print("❌ sentence-transformers not available")
	return False

	try:
	import faiss
	print("✅ faiss-cpu available")
	except ImportError:
	print("❌ faiss-cpu not available")
	return False

	try:
	import fitz # PyMuPDF
	print("✅ PyMuPDF available")
	except ImportError:
	print("⚠️ PyMuPDF not available (PDF processing disabled)")

	try:
	from docx import Document
	print("✅ python-docx available")
	except ImportError:
	print("⚠️ python-docx not available (DOCX processing disabled)")

	return True

	def test_vector_store_initialization():
	"""Test vector store initialization with improved error handling"""
	print("\nTesting vector store initialization...")

	try:
	from vector_store import VectorStore

	# Test with CPU-only settings
	store = VectorStore(embedding_model="all-MiniLM-L6-v2")
	print("✅ VectorStore created successfully")

	# Test a small embedding operation
	test_texts = ["This is a test sentence.", "Another test sentence."]
	embeddings = store.create_embeddings(test_texts)
	print(f"✅ Created embeddings: shape {embeddings.shape}")

	return True

	except Exception as e:
	print(f"❌ VectorStore initialization failed: {e}")
	return False

	def test_document_processing():
	"""Test document processing with a simple text file"""
	print("\nTesting document processing...")

	try:
	from document_processor import DocumentProcessor

	# Create a temporary test file
	with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
	f.write("This is a test document for RAG processing. ")
	f.write("It contains multiple sentences that should be processed into chunks. ")
	f.write("Each chunk should have proper metadata and be ready for embedding.")
	test_file = f.name

	try:
	processor = DocumentProcessor(chunk_size=50, chunk_overlap=10)
	chunks = processor.process_file(test_file)

	print(f"✅ Created {len(chunks)} chunks from test document")
	if chunks:
	print(f" First chunk: {chunks[0].text[:50]}...")
	print(f" Metadata keys: {list(chunks[0].metadata.keys())}")

	return True

	finally:
	# Clean up test file
	os.unlink(test_file)

	except Exception as e:
	print(f"❌ Document processing failed: {e}")
	return False

	def test_rag_tool_integration():
	"""Test the complete RAG tool integration"""
	print("\nTesting complete RAG tool integration...")

	try:
	from rag_tool import RAGTool

	# Create a temporary test file
	with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
	f.write("RAG integration test document. ")
	f.write("This document tests the complete RAG pipeline from file processing to vector search. ")
	f.write("The system should handle this without crashing the server.")
	test_file = f.name

	try:
	rag_tool = RAGTool()
	result = rag_tool.process_uploaded_files([test_file])

	if result['success']:
	print(f"✅ RAG processing succeeded: {result['message']}")
	print(f" Files processed: {len(result['summary']['files_processed'])}")
	print(f" Total chunks: {result['summary']['total_chunks']}")

	# Test search functionality
	context = rag_tool.get_relevant_context("test document")
	if context:
	print(f"✅ Search functionality working: {context[:100]}...")
	else:
	print("⚠️ Search returned no results")

	return True
	else:
	print(f"❌ RAG processing failed: {result['message']}")
	return False

	finally:
	# Clean up test file
	os.unlink(test_file)

	except Exception as e:
	print(f"❌ RAG tool integration failed: {e}")
	return False

	def main():
	"""Run all RAG tests"""
	print("🚀 Testing RAG functionality fixes...")
	print("=" * 50)

	tests = [
	test_rag_dependencies,
	test_vector_store_initialization,
	test_document_processing,
	test_rag_tool_integration
	]

	passed = 0
	total = len(tests)

	for test in tests:
	try:
	if test():
	passed += 1
	except Exception as e:
	print(f"❌ Test failed with exception: {e}")

	print("\n" + "=" * 50)
	print(f"📊 Test Results: {passed}/{total} tests passed")

	if passed == total:
	print("🎉 All tests passed! RAG functionality should work correctly.")
	return True
	else:
	print("⚠️ Some tests failed. Check error messages above.")
	return False

	if __name__ == "__main__":
	main()