SpiritualChatBot / RAG_BOT /tests /unit /test_document_processor.py
bk-anupam
Refactor unit tests for PDF processor, Telegram bot, and vector store
4623a33
import unittest
from RAG_BOT.document_processor import DocumentProcessor
from langchain_core.documents import Document
from datetime import datetime
class TestDocumentProcessor(unittest.TestCase):
def setUp(self):
"""Setup a DocumentProcessor instance for use in tests."""
self.processor = DocumentProcessor()
# Tests for extract_date_from_text (English formats)
def test_extract_date_from_text_yyyy_mm_dd(self):
text = "Document Date: 2023-10-27"
self.assertEqual(self.processor.extract_date_from_text(text), "2023-10-27")
def test_extract_date_from_text_dd_mm_yyyy_slash(self):
text = "Date: 15/04/2023"
self.assertEqual(self.processor.extract_date_from_text(text), "2023-04-15")
def test_extract_date_from_text_dd_mm_yyyy_dot(self):
text = "Date: 01.01.2024"
self.assertEqual(self.processor.extract_date_from_text(text), "2024-01-01")
def test_extract_date_from_text_dd_mm_yy_dot(self):
text = "Date: 10.12.23"
self.assertEqual(self.processor.extract_date_from_text(text), "2023-12-10")
def test_extract_date_from_text_dd_mm_yy_dash(self):
text = "Date: 05-06-22"
self.assertEqual(self.processor.extract_date_from_text(text), "2022-06-05")
def test_extract_date_from_text_d_m_yyyy_dot(self):
text = "Date: 1.1.2024"
self.assertEqual(self.processor.extract_date_from_text(text), "2024-01-01")
def test_extract_date_from_text_dd_m_yyyy_dot(self):
text = "Date: 10.1.2024"
self.assertEqual(self.processor.extract_date_from_text(text), "2024-01-10")
def test_extract_date_from_text_d_mm_yyyy_dot(self):
text = "Date: 1.12.2024"
self.assertEqual(self.processor.extract_date_from_text(text), "2024-12-01")
def test_extract_date_from_text_no_date(self):
text = "This text has no date."
self.assertIsNone(self.processor.extract_date_from_text(text))
# Tests for extract_date_from_text (Hindi formats)
def test_extract_date_from_text_hindi_yyyy_mm_dd_dash(self):
text = "दिनांक: २०२३-१०-२७"
self.assertEqual(self.processor.extract_date_from_text(text), "2023-10-27")
def test_extract_date_from_text_hindi_dd_mm_yyyy_slash(self):
text = "दिनांक: १५/०४/२०२३"
self.assertEqual(self.processor.extract_date_from_text(text), "2023-04-15")
def test_extract_date_from_text_hindi_dd_mm_yyyy_dot(self):
text = "दिनांक: ०१.०१.२०२४"
self.assertEqual(self.processor.extract_date_from_text(text), "2024-01-01")
def test_extract_date_from_text_hindi_dd_mm_yy_dot(self):
text = "दिनांक: १०.१२.२३"
self.assertEqual(self.processor.extract_date_from_text(text), "2023-12-10")
def test_extract_date_from_text_hindi_dd_mm_yy_dash(self):
text = "दिनांक: ०५-०६-२२"
self.assertEqual(self.processor.extract_date_from_text(text), "2022-06-05")
def test_extract_date_from_text_hindi_d_m_yyyy_dot(self):
text = "दिनांक: १.१.२०२४"
self.assertEqual(self.processor.extract_date_from_text(text), "2024-01-01")
def test_extract_date_from_text_hindi_dd_m_yyyy_dot(self):
text = "दिनांक: १०.१.२०२४"
self.assertEqual(self.processor.extract_date_from_text(text), "2024-01-10")
def test_extract_date_from_text_hindi_d_mm_yyyy_dot(self):
text = "दिनांक: १.१२.२०२४"
self.assertEqual(self.processor.extract_date_from_text(text), "2024-12-01")
# Tests for get_murli_type
def test_get_murli_type_avyakt_english(self):
text = "This is an Avyakt Murli."
self.assertTrue(self.processor.get_murli_type(text))
def test_get_murli_type_avyakt_english_case_insensitive(self):
text = "This is an AVYAKT Murli."
self.assertTrue(self.processor.get_murli_type(text))
def test_get_murli_type_avyakt_hindi(self):
text = "यह एक अव्यक्त मुरली है।"
self.assertTrue(self.processor.get_murli_type(text))
def test_get_murli_type_sakar_english(self):
text = "This is a Sakar Murli."
self.assertFalse(self.processor.get_murli_type(text))
def test_get_murli_type_sakar_hindi(self):
text = "यह एक साकार मुरली है।"
self.assertFalse(self.processor.get_murli_type(text))
if __name__ == '__main__':
unittest.main()