File size: 1,739 Bytes
b701d44
 
 
5b9baff
 
 
b701d44
5b9baff
fabf362
5b9baff
a0040a5
fabf362
5b9baff
ab9088f
 
a0040a5
fabf362
 
 
 
a0040a5
2ebc710
fabf362
2ebc710
 
 
 
 
ab9088f
fabf362
 
 
5b9baff
fabf362
 
ab9088f
a0040a5
fabf362
a0040a5
5b9baff
ab9088f
a0040a5
 
b701d44
5b9baff
 
 
 
b701d44
 
5b9baff
 
b701d44
5b9baff
 
 
a0040a5
5b9baff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import json
import base64
from io import BytesIO
from PIL import Image
import gradio as gr

from inference import OcrReorderPipeline
from transformers import AutoProcessor, LayoutLMv3Model, AutoTokenizer

# 1) Load model/tokenizer/processor
repo      = "Uddipan107/ocr-layoutlmv3-base-t5-small"
model     = LayoutLMv3Model.from_pretrained(repo)
tokenizer = AutoTokenizer.from_pretrained(repo, subfolder="preprocessor")
processor = AutoProcessor.from_pretrained(repo, subfolder="preprocessor", apply_ocr=False)
pipe      = OcrReorderPipeline(model, tokenizer, processor, device=0)

def infer(image_path, json_file):
    img_name = os.path.basename(image_path)

    # Parse NDJSON
    data = []
    with open(json_file.name, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            data.append(json.loads(line))

    entry = next((e for e in data if e["img_name"] == img_name), None)
    if entry is None:
        return f"❌ No JSON entry found for image '{img_name}'"

    words = entry["src_word_list"]
    boxes = entry["src_wordbox_list"]

    # Read & encode image
    img = Image.open(image_path).convert("RGB")
    buf = BytesIO(); img.save(buf, format="PNG")
    b64 = base64.b64encode(buf.getvalue()).decode()

    # ⚠️ Pass as `inputs`
    reordered = pipe(inputs=b64, words=words, boxes=boxes)[0]
    return reordered

demo = gr.Interface(
    fn=infer,
    inputs=[
        gr.Image(type="filepath", label="Upload Image"),
        gr.File(label="Upload JSON (NDJSON)")
    ],
    outputs="text",
    title="OCR Reorder Pipeline"
)

if __name__ == "__main__":
    # set share=True if you want a public link
    demo.launch()