File size: 458 Bytes
38a37b1
 
 
 
a9d9c73
38a37b1
 
a9d9c73
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import os
import pandas as pd
from lingtrain_aligner import preprocessor, splitter, aligner, resolver, reader, vis_helper
from PyPDF2 import PdfReader

def pdf_to_text(pdf_path: str) -> str:
    
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PdfReader(file)
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text

def save_to_excel(df, file_name: str):
    df.to_excel(file_name, index=False)