Uddipan Basu Bir
Download checkpoint from HF hub in OcrReorderPipeline
2ebc710
raw
history blame
1.67 kB
import os, json, base64
from io import BytesIO
from PIL import Image
import gradio as gr
from inference import OcrReorderPipeline
from transformers import AutoProcessor, LayoutLMv3Model, AutoTokenizer
# Load model/tokenizer/processor...
repo = "Uddipan107/ocr-layoutlmv3-base-t5-small"
model = LayoutLMv3Model.from_pretrained(repo)
tokenizer = AutoTokenizer.from_pretrained(repo, subfolder="preprocessor")
processor = AutoProcessor.from_pretrained(repo, subfolder="preprocessor", apply_ocr=False)
pipe = OcrReorderPipeline(model, tokenizer, processor, device=0)
def infer(image_path, json_file):
img_name = os.path.basename(image_path)
# Parse NDJSON from the uploaded file
data = []
with open(json_file.name, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
data.append(json.loads(line))
entry = next((e for e in data if e["img_name"] == img_name), None)
if entry is None:
return f"❌ No JSON entry found for image '{img_name}'"
words = entry["src_word_list"]
boxes = entry["src_wordbox_list"]
# Read and encode image
img = Image.open(image_path).convert("RGB")
buf = BytesIO(); img.save(buf, format="PNG")
b64 = base64.b64encode(buf.getvalue()).decode()
# Run pipeline
return pipe(b64, words, boxes)[0]
demo = gr.Interface(
fn=infer,
inputs=[
gr.Image(type="filepath", label="Upload Image"),
gr.File(label="Upload JSON (NDJSON format)")
],
outputs="text",
title="OCR Reorder (Image + NDJSON upload)"
)
if __name__ == "__main__":
demo.launch()