File size: 892 Bytes
c737346
 
7d0890e
e08b826
6b3d009
e08b826
1b11586
6b3d009
1b11586
e08b826
6b3d009
1b11586
9b952ec
 
 
 
 
 
 
59dc3f5
c6bafd5
 
 
59dc3f5
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# https://huggingface.co/spaces/Mishmosh/MichelleAssessment3

# Install Rust 
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y

#RUN python -m pip install --upgrade pip
python -m pip install --upgrade pip

#pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install --use-feature=in-tree-build tokenizers

#!pip install PyPDF2
#!pip install sentencepiece
#!pip install pdfminer.six
#!pip install pdfplumber
#!pip install pdf2image
#!pip install Pillow
#!pip install pytesseract
# @title
#!apt-get install poppler-utils
#!apt install tesseract-ocr
#!apt install libtesseract-dev
import PyPDF2
from pdfminer.high_level import extract_pages, extract_text
from pdfminer.layout import LTTextContainer, LTChar, LTRect, LTFigure
import pdfplumber
from PIL import Image
from pdf2image import convert_from_path
import pytesseract
import os