import pandas as pd import PIL from PIL import Image from PIL import ImageDraw import gradio as gr import torch import easyocr import omegaconf import cv2 import json from iftc.spelling_corrector import acronym_correction spell = Speller(lang='vi') from vietocr.vietocr.tool.predictor import Predictor from vietocr.vietocr.tool.config import Cfg # Configure of VietOCR config = Cfg.load_config_from_name('vgg_transformer') # config = Cfg.load_config_from_file('vietocr/config.yml') # config['weights'] = '/Users/bmd1905/Desktop/pretrain_ocr/vi00_vi01_transformer.pth' config['cnn']['pretrained'] = True config['predictor']['beamsearch'] = True config['device'] = 'cpu' # mps recognitor = Predictor(config) #model_name = "microsoft/xdoc-base-squad2.0" #nlp = pipeline('question-answering', model=model_name) from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline model_name = "timpal0l/mdeberta-v3-base-squad2" model = AutoModelForQuestionAnswering.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) nlp = pipeline('question-answering', model=model, tokenizer=tokenizer) def query(doc, labels): questions = labels.split(", ") result={} for question in questions: QA_input = { 'question': question+'?', 'context': doc } res= nlp(QA_input) print(res) value = res['answer'] result[question]=value return result def draw_boxes(image, bounds, color='yellow', width=2): draw = ImageDraw.Draw(image) for bound in bounds: p0, p1, p2, p3 = bound[0] draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width) return image def inference(filepath, lang, labels): img = cv2.imread(filepath) width, height, _ = img.shape if width>height: height, width, _ = img.shape reader = easyocr.Reader(lang) bounds = reader.readtext(filepath) new_bounds=[] texts='' for (bbox, text, prob) in bounds: (tl, tr, br, bl) = bbox tl = (int(tl[0]), int(tl[1])) tr = (int(tr[0]), int(tr[1])) br = (int(br[0]), int(br[1])) bl = (int(bl[0]), int(bl[1])) min_x = min(tl[0], tr[0], br[0], bl[0]) min_x = max(0, min_x) max_x = max(tl[0], tr[0], br[0], bl[0]) max_x = min(width-1, max_x) min_y = min(tl[1], tr[1], br[1], bl[1]) min_y = max(0, min_y) max_y = max(tl[1], tr[1], br[1], bl[1]) max_y = min(height-1, max_y) # crop the region of interest (ROI) cropped_image = img[min_y:max_y,min_x:max_x] # crop the image cropped_image = Image.fromarray(cropped_image) out = recognitor.predict(cropped_image) texts = texts + '\t' + acronym_correction(out) result = query(texts, labels) jsonText = json.dumps(result) im = PIL.Image.open(filepath) draw_boxes(im, bounds) im.save('result.jpg') return ['result.jpg', texts, jsonText] title = 'EasyOCR' description = 'Gradio demo for EasyOCR. EasyOCR demo supports 80+ languages.To use it, simply upload your image and choose a language from the dropdown menu, or click one of the examples to load them. Read more at the links below.' article = "
Ready-to-use OCR with 80+ supported languages and all popular writing scripts including Latin, Chinese, Arabic, Devanagari, Cyrillic and etc. | Github Repo
" css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}" choices = [ "vi" ] gr.Interface( inference, [gr.inputs.Image(type='filepath', label='Input'),gr.inputs.CheckboxGroup(choices, type="value", default=['vi'], label='language'), gr.inputs.Textbox(label='Labels',default='Người nộp, Tiếp nhận hồ sơ của')], [gr.outputs.Image(type='pil', label='Output'), gr.outputs.Textbox(label='Text'), gr.outputs.JSON(label='document')], title=title, description=description, article=article, css=css, examples=[['giaytiepnhan.jpg',['vi'],'Người nộp, Tiếp nhận hồ sơ của'],['giaytiepnhan2.jpg',['vi'],'Mã số hồ sơ, Địa chỉ']], enable_queue=True ).launch(debug=True)