captcha / app.py
cubuvl's picture
Update app.py
cbe0e5f verified
raw
history blame
2.76 kB
import gradio as gr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image, ImageOps
import numpy as np
import io
import base64
# Load model và processor
name = "chanelcolgate/trocr-base-printed_captcha_ocr"
model = VisionEncoderDecoderModel.from_pretrained(name)
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
def prepare_image(pil_image):
"""Xử lý nền trắng nếu ảnh có nền trong suốt"""
if pil_image.mode in ("RGBA", "LA"):
background = Image.new("RGB", pil_image.size, (255, 255, 255))
background.paste(pil_image, mask=pil_image.split()[-1])
return background
return pil_image.convert("RGB")
def process_image(image):
pil_image = Image.fromarray(image)
image_clean = prepare_image(pil_image)
pixel_values = processor(image_clean, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return image_clean, generated_text
def process_base64(base64_str):
# Tách phần prefix (data:image/png;base64,...) nếu có
if ',' in base64_str:
base64_str = base64_str.split(',')[1]
image_data = base64.b64decode(base64_str)
image = Image.open(io.BytesIO(image_data))
image_clean = prepare_image(image)
pixel_values = processor(image_clean, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return image_clean, generated_text
with gr.Blocks() as demo:
gr.Markdown("## Captcha OCR Demo")
with gr.Tab("Upload image"):
with gr.Row():
image_input = gr.Image(type="numpy", label="Upload Image")
image_output = gr.Image(type="pil", label="Processed Image")
text_output = gr.Textbox(label="OCR Output")
image_button = gr.Button("Submit")
image_button.click(fn=process_image, inputs=image_input, outputs=[image_output, text_output])
with gr.Tab("Paste base64"):
with gr.Row():
base64_input = gr.Textbox(label="Paste base64 here", lines=5, placeholder="data:image/png;base64,...")
with gr.Row():
base64_output_img = gr.Image(type="pil", label="Processed Image")
base64_output_txt = gr.Textbox(label="OCR Output")
base64_button = gr.Button("Submit")
base64_button.click(fn=process_base64, inputs=base64_input, outputs=[base64_output_img, base64_output_txt])
gr.Examples(
examples=[f"examples/captcha-{i}.png" for i in range(10)],
inputs=image_input
)
if __name__ == "__main__":
demo.launch()