import os import pandas as pd from lingtrain_aligner import preprocessor, splitter, aligner, resolver, reader, vis_helper from PyPDF2 import PdfReader def pdf_to_text(pdf_path: str) -> str: text = "" with open(pdf_path, "rb") as file: reader = PdfReader(file) for page in reader.pages: text += page.extract_text() + "\n" return text def save_to_excel(df, file_name: str): df.to_excel(file_name, index=False)