Spaces:
Build error
Build error
import streamlit as st | |
import fitz # PyMuPDF | |
from huggingface_hub import snapshot_download | |
import os | |
from pdf2image import convert_from_path | |
from PIL import Image | |
import tempfile | |
# Download the model if not already downloaded | |
model_dir = "./pdf-extract-kit" | |
if not os.path.exists(model_dir): | |
snapshot_download(repo_id="opendatalab/pdf-extract-kit-1.0", local_dir=model_dir, max_workers=20) | |
st.title("PDF Table Extractor with PDF-Extract-Kit-1.0") | |
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) | |
if uploaded_file: | |
st.write("Converting PDF to images...") | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf: | |
tmp_pdf.write(uploaded_file.read()) | |
tmp_pdf_path = tmp_pdf.name | |
images = convert_from_path(tmp_pdf_path) | |
for i, img in enumerate(images): | |
st.image(img, caption=f"Page {i+1}", use_column_width=True) | |
# Here you would call the table detection model on each image | |
st.info("🛠 Table detection model would run here... (to be implemented)") | |
st.success("Done processing PDF!") | |