Spaces:
Runtime error
Runtime error
# https://huggingface.co/spaces/Mishmosh/MichelleAssessment3 | |
# Install Rust | |
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y | |
#RUN python -m pip install --upgrade pip | |
python -m pip install --upgrade pip | |
#pip install --upgrade pip | |
RUN pip install --no-cache-dir -r requirements.txt | |
RUN pip install --use-feature=in-tree-build tokenizers | |
#!pip install PyPDF2 | |
#!pip install sentencepiece | |
#!pip install pdfminer.six | |
#!pip install pdfplumber | |
#!pip install pdf2image | |
#!pip install Pillow | |
#!pip install pytesseract | |
# @title | |
#!apt-get install poppler-utils | |
#!apt install tesseract-ocr | |
#!apt install libtesseract-dev | |
import PyPDF2 | |
from pdfminer.high_level import extract_pages, extract_text | |
from pdfminer.layout import LTTextContainer, LTChar, LTRect, LTFigure | |
import pdfplumber | |
from PIL import Image | |
from pdf2image import convert_from_path | |
import pytesseract | |
import os | |