Spaces:

Uddipan107
/

ocr-reorder-space

Running

File size: 1,853 Bytes

fabf362
 
 
5b9baff
 
 
fabf362
5b9baff
fabf362
5b9baff
fabf362
 
5b9baff
ab9088f
 
fabf362
 
 
 
 
 
 
 
 
ab9088f
fabf362
 
 
 
5b9baff
fabf362
 
ab9088f
fabf362
 
 
5b9baff
ab9088f
fabf362
5b9baff
 
 
 
 
fabf362
 
 
 
5b9baff
 
fabf362
5b9baff

import os
import json
import base64
from io import BytesIO
from PIL import Image
import gradio as gr

from inference import OcrReorderPipeline
from transformers import AutoProcessor, LayoutLMv3Model, AutoTokenizer

# 1) Load your model + tokenizer + processor as before
repo      = "Uddipan107/ocr-layoutlmv3-base-t5-small"
model     = LayoutLMv3Model.from_pretrained(repo)
tokenizer = AutoTokenizer.from_pretrained(repo, subfolder="preprocessor")
processor = AutoProcessor.from_pretrained(repo, subfolder="preprocessor", apply_ocr=False)
pipe      = OcrReorderPipeline(model, tokenizer, processor, device=0)

def infer(image_path, json_file):
    # 2) Extract the filename user uploaded
    img_name = os.path.basename(image_path)

    # 3) Load the entire JSON; assume it’s a list of entries
    with open(json_file.name, "r", encoding="utf-8") as f:
        data = json.load(f)

    # 4) Find the entry matching this image
    entry = next((e for e in data if e["img_name"] == img_name), None)
    if entry is None:
        return f"❌ No JSON entry found for image '{img_name}'"

    words = entry["src_word_list"]
    boxes = entry["src_wordbox_list"]

    # 5) Read the image, encode to base64 for your pipeline
    img = Image.open(image_path).convert("RGB")
    buf = BytesIO(); img.save(buf, format="PNG")
    b64 = base64.b64encode(buf.getvalue()).decode()

    # 6) Call your pipeline and return the reordered text
    return pipe(b64, words, boxes)[0]

demo = gr.Interface(
    fn=infer,
    inputs=[
      # get the file path so we can match the filename
      gr.Image(type="filepath", label="Upload Image"),
      # this is the JSON file containing a list of entries
      gr.File(label="Upload JSON file")
    ],
    outputs="text",
    title="OCR Reorder (match image → JSON entry)"
)

if __name__ == "__main__":
    demo.launch()