Spaces:
Build error
Build error
File size: 1,086 Bytes
005a185 d29af94 e205139 b39b068 d29af94 b39b068 d29af94 005a185 d29af94 005a185 d29af94 005a185 d29af94 005a185 d29af94 005a185 d29af94 005a185 d29af94 005a185 d29af94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import streamlit as st
import fitz # PyMuPDF
from huggingface_hub import snapshot_download
import os
from pdf2image import convert_from_path
from PIL import Image
import tempfile
# Download the model if not already downloaded
model_dir = "./pdf-extract-kit"
if not os.path.exists(model_dir):
snapshot_download(repo_id="opendatalab/pdf-extract-kit-1.0", local_dir=model_dir, max_workers=20)
st.title("PDF Table Extractor with PDF-Extract-Kit-1.0")
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
if uploaded_file:
st.write("Converting PDF to images...")
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
tmp_pdf.write(uploaded_file.read())
tmp_pdf_path = tmp_pdf.name
images = convert_from_path(tmp_pdf_path)
for i, img in enumerate(images):
st.image(img, caption=f"Page {i+1}", use_column_width=True)
# Here you would call the table detection model on each image
st.info("🛠 Table detection model would run here... (to be implemented)")
st.success("Done processing PDF!")
|