First_RAG_System / day3 /pdf_loader.py
Hamid Omarov
HF Space app + minimal pipeline code (no secrets)
e7e9247
raw
history blame
384 Bytes
from langchain_community.document_loaders import PyPDFLoader
def load_pdf(file_path):
loader = PyPDFLoader(file_path)
pages = loader.load()
return pages
if __name__ == "__main__":
docs = load_pdf("sample.pdf")
print(f"βœ… Loaded {len(docs)} pages")
for i, page in enumerate(docs, start=1):
print(f"--- Page {i} ---")
print(page.page_content)