File size: 820 Bytes
8d27db4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
"""Data loading and PDF processing."""

from .loaders import load_annotations, filter_pdf_files

# Try to import PDF processing functions, but handle missing dependencies gracefully
try:
    from .pdf_processing import (
        extract_pdf_text, 
        extract_tables_from_pdf,
        extract_images_ocr_from_pdf,
        extract_pdf_content_enhanced
    )
    PDF_PROCESSING_AVAILABLE = True
    __all__ = [
        'load_annotations', 'filter_pdf_files',
        'extract_pdf_text', 'extract_tables_from_pdf', 
        'extract_images_ocr_from_pdf', 'extract_pdf_content_enhanced'
    ]
except ImportError as e:
    print(f"⚠️ PDF processing not available: {e}")
    print("📝 Only working with existing embeddings")
    PDF_PROCESSING_AVAILABLE = False
    __all__ = ['load_annotations', 'filter_pdf_files']