File size: 1,853 Bytes
fabf362
 
 
5b9baff
 
 
fabf362
5b9baff
fabf362
5b9baff
fabf362
 
5b9baff
ab9088f
 
fabf362
 
 
 
 
 
 
 
 
ab9088f
fabf362
 
 
 
5b9baff
fabf362
 
ab9088f
fabf362
 
 
5b9baff
ab9088f
fabf362
5b9baff
 
 
 
 
fabf362
 
 
 
5b9baff
 
fabf362
5b9baff
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import json
import base64
from io import BytesIO
from PIL import Image
import gradio as gr

from inference import OcrReorderPipeline
from transformers import AutoProcessor, LayoutLMv3Model, AutoTokenizer

# 1) Load your model + tokenizer + processor as before
repo      = "Uddipan107/ocr-layoutlmv3-base-t5-small"
model     = LayoutLMv3Model.from_pretrained(repo)
tokenizer = AutoTokenizer.from_pretrained(repo, subfolder="preprocessor")
processor = AutoProcessor.from_pretrained(repo, subfolder="preprocessor", apply_ocr=False)
pipe      = OcrReorderPipeline(model, tokenizer, processor, device=0)

def infer(image_path, json_file):
    # 2) Extract the filename user uploaded
    img_name = os.path.basename(image_path)

    # 3) Load the entire JSON; assume it’s a list of entries
    with open(json_file.name, "r", encoding="utf-8") as f:
        data = json.load(f)

    # 4) Find the entry matching this image
    entry = next((e for e in data if e["img_name"] == img_name), None)
    if entry is None:
        return f"❌ No JSON entry found for image '{img_name}'"

    words = entry["src_word_list"]
    boxes = entry["src_wordbox_list"]

    # 5) Read the image, encode to base64 for your pipeline
    img = Image.open(image_path).convert("RGB")
    buf = BytesIO(); img.save(buf, format="PNG")
    b64 = base64.b64encode(buf.getvalue()).decode()

    # 6) Call your pipeline and return the reordered text
    return pipe(b64, words, boxes)[0]

demo = gr.Interface(
    fn=infer,
    inputs=[
      # get the file path so we can match the filename
      gr.Image(type="filepath", label="Upload Image"),
      # this is the JSON file containing a list of entries
      gr.File(label="Upload JSON file")
    ],
    outputs="text",
    title="OCR Reorder (match image → JSON entry)"
)

if __name__ == "__main__":
    demo.launch()