|
import gradio as gr |
|
from transformers import TrOCRProcessor, VisionEncoderDecoderModel |
|
from PIL import Image, ImageOps |
|
import numpy as np |
|
import io |
|
import base64 |
|
|
|
|
|
name = "chanelcolgate/trocr-base-printed_captcha_ocr" |
|
model = VisionEncoderDecoderModel.from_pretrained(name) |
|
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") |
|
|
|
|
|
def prepare_image(pil_image): |
|
"""Xử lý nền trắng nếu ảnh có nền trong suốt""" |
|
if pil_image.mode in ("RGBA", "LA"): |
|
background = Image.new("RGB", pil_image.size, (255, 255, 255)) |
|
background.paste(pil_image, mask=pil_image.split()[-1]) |
|
return background |
|
return pil_image.convert("RGB") |
|
|
|
|
|
def process_image(image): |
|
pil_image = Image.fromarray(image) |
|
image_clean = prepare_image(pil_image) |
|
|
|
pixel_values = processor(image_clean, return_tensors="pt").pixel_values |
|
generated_ids = model.generate(pixel_values) |
|
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
|
return image_clean, generated_text |
|
|
|
|
|
def process_base64(base64_str): |
|
|
|
if ',' in base64_str: |
|
base64_str = base64_str.split(',')[1] |
|
image_data = base64.b64decode(base64_str) |
|
image = Image.open(io.BytesIO(image_data)) |
|
image_clean = prepare_image(image) |
|
|
|
pixel_values = processor(image_clean, return_tensors="pt").pixel_values |
|
generated_ids = model.generate(pixel_values) |
|
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
|
return image_clean, generated_text |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Captcha OCR Demo") |
|
|
|
with gr.Tab("Upload image"): |
|
with gr.Row(): |
|
image_input = gr.Image(type="numpy", label="Upload Image") |
|
image_output = gr.Image(type="pil", label="Processed Image") |
|
text_output = gr.Textbox(label="OCR Output") |
|
image_button = gr.Button("Submit") |
|
image_button.click(fn=process_image, inputs=image_input, outputs=[image_output, text_output]) |
|
|
|
with gr.Tab("Paste base64"): |
|
with gr.Row(): |
|
base64_input = gr.Textbox(label="Paste base64 here", lines=5, placeholder="data:image/png;base64,...") |
|
with gr.Row(): |
|
base64_output_img = gr.Image(type="pil", label="Processed Image") |
|
base64_output_txt = gr.Textbox(label="OCR Output") |
|
base64_button = gr.Button("Submit") |
|
base64_button.click(fn=process_base64, inputs=base64_input, outputs=[base64_output_img, base64_output_txt]) |
|
|
|
gr.Examples( |
|
examples=[f"examples/captcha-{i}.png" for i in range(10)], |
|
inputs=image_input |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|