ai_ocr

Sleeping

App Files Files Community

ai_ocr / app.py

Moibe

Procesa correctamente imagenes sin texto

bfb86b3 27 days ago

raw

history blame contribute delete

4 kB

	import atexit
	import functools
	from queue import Queue
	from threading import Event, Thread
	import time

	from paddleocr import PaddleOCR, draw_ocr
	from PIL import Image
	import gradio as gr


	LANG_CONFIG = {
	"ch": {"num_workers": 2},
	"en": {"num_workers": 2},
	"fr": {"num_workers": 1},
	"german": {"num_workers": 1},
	"korean": {"num_workers": 1},
	"japan": {"num_workers": 1},
	}
	CONCURRENCY_LIMIT = 8


	class PaddleOCRModelManager(object):
	def __init__(self,
	num_workers,
	model_factory):
	super().__init__()
	self._model_factory = model_factory
	self._queue = Queue()
	self._workers = []
	self._model_initialized_event = Event()
	for _ in range(num_workers):
	worker = Thread(target=self._worker, daemon=False)
	worker.start()
	self._model_initialized_event.wait()
	self._model_initialized_event.clear()
	self._workers.append(worker)

	def infer(self, args, *kwargs):
	# XXX: Should I use a more lightweight data structure, say, a future?
	result_queue = Queue(maxsize=1)
	self._queue.put((args, kwargs, result_queue))
	success, payload = result_queue.get()
	if success:
	return payload
	else:
	raise payload

	def close(self):
	for _ in self._workers:
	self._queue.put(None)
	for worker in self._workers:
	worker.join()

	def _worker(self):
	model = self._model_factory()
	self._model_initialized_event.set()
	while True:
	item = self._queue.get()
	if item is None:
	break
	args, kwargs, result_queue = item
	try:
	result = model.ocr(args, *kwargs)
	result_queue.put((True, result))
	except Exception as e:
	result_queue.put((False, e))
	finally:
	self._queue.task_done()


	def create_model(lang):
	return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)


	model_managers = {}
	for lang, config in LANG_CONFIG.items():
	model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang))
	model_managers[lang] = model_manager


	def close_model_managers():
	for manager in model_managers.values():
	manager.close()


	# XXX: Not sure if gradio allows adding custom teardown logic
	atexit.register(close_model_managers)


	def inference(img, lang):
	ocr = model_managers[lang]
	result = ocr.infer(img, cls=True)[0]

	if result is not None:
	for item in result:
	print(item)
	else:
	print("La variable 'result' es None, no hay elementos para procesar.")

	print("Resultado de la inferencia: ")
	print(result)

	if result is not None:

	#Hacer ésto solo si result no es None.
	img_path = img
	image = Image.open(img_path).convert("RGB")
	boxes = [line[0] for line in result]
	txts = [line[1][0] for line in result]
	scores = [line[1][1] for line in result]
	im_show = draw_ocr(image, boxes, txts, scores,
	font_path="./simfang.ttf")
	print("Impresión de todos los resultados: ")
	print("Boxes:")
	print(boxes)
	print("Texts:")
	print(txts)
	print("Scores:")
	print(scores)

	return result

	css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
	gr.Interface(
	inference,
	[
	gr.Image(type='filepath', label='Input'),
	gr.Dropdown(choices=list(LANG_CONFIG.keys()), value='en', label='language')
	],
	#gr.Image(type='pil', label='Output'), #Resultado en imagen
	gr.Dataframe(),
	#title=title,
	#description=description,
	#examples=examples,
	cache_examples=False,
	css=css,
	concurrency_limit=CONCURRENCY_LIMIT,
	).launch(debug=False, show_error=True)