File size: 3,999 Bytes
6d6af66 8b775e5 6d6af66 db7a2e8 a103b3b 8b775e5 ebb9438 6d6af66 8b775e5 db7a2e8 8b775e5 6d6af66 8b775e5 6d6af66 db7a2e8 6d6af66 db7a2e8 6d6af66 db7a2e8 8b775e5 6d6af66 8b775e5 6d6af66 8b775e5 6d6af66 8b775e5 6d6af66 8b775e5 fa63dda 7a086ec 6d6af66 8b775e5 6d6af66 a103b3b 6814c00 acc42fe 6814c00 a103b3b bfb86b3 a103b3b 420580f 389b598 e2feaed ebb9438 daf8121 8b775e5 daf8121 1254e4a b23ae71 a103b3b e726d75 8b775e5 8a43307 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import atexit
import functools
from queue import Queue
from threading import Event, Thread
import time
from paddleocr import PaddleOCR, draw_ocr
from PIL import Image
import gradio as gr
LANG_CONFIG = {
"ch": {"num_workers": 2},
"en": {"num_workers": 2},
"fr": {"num_workers": 1},
"german": {"num_workers": 1},
"korean": {"num_workers": 1},
"japan": {"num_workers": 1},
}
CONCURRENCY_LIMIT = 8
class PaddleOCRModelManager(object):
def __init__(self,
num_workers,
model_factory):
super().__init__()
self._model_factory = model_factory
self._queue = Queue()
self._workers = []
self._model_initialized_event = Event()
for _ in range(num_workers):
worker = Thread(target=self._worker, daemon=False)
worker.start()
self._model_initialized_event.wait()
self._model_initialized_event.clear()
self._workers.append(worker)
def infer(self, *args, **kwargs):
# XXX: Should I use a more lightweight data structure, say, a future?
result_queue = Queue(maxsize=1)
self._queue.put((args, kwargs, result_queue))
success, payload = result_queue.get()
if success:
return payload
else:
raise payload
def close(self):
for _ in self._workers:
self._queue.put(None)
for worker in self._workers:
worker.join()
def _worker(self):
model = self._model_factory()
self._model_initialized_event.set()
while True:
item = self._queue.get()
if item is None:
break
args, kwargs, result_queue = item
try:
result = model.ocr(*args, **kwargs)
result_queue.put((True, result))
except Exception as e:
result_queue.put((False, e))
finally:
self._queue.task_done()
def create_model(lang):
return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
model_managers = {}
for lang, config in LANG_CONFIG.items():
model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang))
model_managers[lang] = model_manager
def close_model_managers():
for manager in model_managers.values():
manager.close()
# XXX: Not sure if gradio allows adding custom teardown logic
atexit.register(close_model_managers)
def inference(img, lang):
ocr = model_managers[lang]
result = ocr.infer(img, cls=True)[0]
if result is not None:
for item in result:
print(item)
else:
print("La variable 'result' es None, no hay elementos para procesar.")
print("Resultado de la inferencia: ")
print(result)
if result is not None:
#Hacer ésto solo si result no es None.
img_path = img
image = Image.open(img_path).convert("RGB")
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores,
font_path="./simfang.ttf")
print("Impresión de todos los resultados: ")
print("Boxes:")
print(boxes)
print("Texts:")
print(txts)
print("Scores:")
print(scores)
return result
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
gr.Interface(
inference,
[
gr.Image(type='filepath', label='Input'),
gr.Dropdown(choices=list(LANG_CONFIG.keys()), value='en', label='language')
],
#gr.Image(type='pil', label='Output'), #Resultado en imagen
gr.Dataframe(),
#title=title,
#description=description,
#examples=examples,
cache_examples=False,
css=css,
concurrency_limit=CONCURRENCY_LIMIT,
).launch(debug=False, show_error=True) |