import os | |
import pandas as pd | |
from lingtrain_aligner import preprocessor, splitter, aligner, resolver, reader, vis_helper | |
from PyPDF2 import PdfReader | |
def pdf_to_text(pdf_path: str) -> str: | |
text = "" | |
with open(pdf_path, "rb") as file: | |
reader = PdfReader(file) | |
for page in reader.pages: | |
text += page.extract_text() + "\n" | |
return text | |
def save_to_excel(df, file_name: str): | |
df.to_excel(file_name, index=False) |