|
import io |
|
import streamlit as st |
|
import fitz |
|
from PIL import Image |
|
from datasets import Dataset |
|
from streamlit_cropper import st_cropper |
|
|
|
|
|
def get_padded_image(image, cropped_image): |
|
base_w, base_h = image.size |
|
crop_w, crop_h = cropped_image.size |
|
|
|
crop_img = ( |
|
cropped_image.convert(image.mode) |
|
if cropped_image.mode != image.mode |
|
else cropped_image |
|
) |
|
|
|
padded_image = Image.new(image.mode, (base_w, base_h), color="white") |
|
|
|
paste_x = max(0, (base_w - crop_w) // 2) |
|
paste_y = max(0, (base_h - crop_h) // 2) |
|
padded_image.paste(crop_img, (paste_x, paste_y)) |
|
return padded_image |
|
|
|
|
|
st.header("Line Art Data Annotation App") |
|
uploaded_pdf = st.sidebar.file_uploader("Upload a PDF", type=["pdf"]) |
|
|
|
if uploaded_pdf: |
|
data = uploaded_pdf.read() |
|
doc = fitz.open(stream=data, filetype="pdf") |
|
|
|
|
|
if "page_idx" not in st.session_state: |
|
st.session_state.page_idx = 0 |
|
if "cropped_images" not in st.session_state: |
|
st.session_state.cropped_images = [] |
|
if "captions" not in st.session_state: |
|
st.session_state.captions = [] |
|
|
|
total_pages = doc.page_count |
|
page_idx = st.session_state.page_idx % total_pages |
|
|
|
col_prev, col_caption, col_next = st.columns([1, 8, 1]) |
|
with col_prev: |
|
if st.button("<"): |
|
st.session_state.page_idx = (page_idx - 1) % total_pages |
|
st.rerun() |
|
with col_caption: |
|
st.markdown( |
|
f"<center>Page {page_idx}/{total_pages - 1}</center>", |
|
unsafe_allow_html=True, |
|
) |
|
with col_next: |
|
if st.button("\>"): |
|
st.session_state.page_idx = (page_idx + 1) % total_pages |
|
st.rerun() |
|
|
|
|
|
page = doc.load_page(page_idx) |
|
pix = page.get_pixmap(dpi=200) |
|
image = Image.open(io.BytesIO(pix.tobytes("png"))) |
|
cropped_image = st_cropper(image, realtime_update=True) |
|
st.image(cropped_image) |
|
caption = st.text_input("Caption", key="caption") |
|
if st.button("Save"): |
|
padded_image = get_padded_image(image, cropped_image) |
|
st.session_state.cropped_images.append(padded_image) |
|
st.session_state.captions.append(caption) |
|
print(f"{len(st.session_state.cropped_images)=}") |
|
print(f"{st.session_state.cropped_images[-1].size=}") |
|
print(f"{st.session_state.captions[-1]=}") |
|
|
|
huggingface_dataset_address = st.sidebar.text_input("Hugging Face Dataset Address") |
|
if st.sidebar.button("Save to HuggingFace"): |
|
dataset = Dataset.from_dict( |
|
{ |
|
"image": st.session_state.cropped_images, |
|
"caption": st.session_state.captions, |
|
}, |
|
) |
|
dataset.push_to_hub(huggingface_dataset_address) |
|
st.success("Dataset saved to Hugging Face") |
|
|