Bobholamovic
commited on
Commit
·
66403e6
1
Parent(s):
bf0a8e3
[Feat] Init code
Browse files- app.py +38 -104
- requirements.txt +2 -4
app.py
CHANGED
@@ -1,130 +1,64 @@
|
|
1 |
-
import
|
2 |
-
import
|
3 |
-
|
4 |
-
from threading import Event, Thread
|
5 |
|
6 |
-
from paddleocr import PaddleOCR, draw_ocr
|
7 |
-
from PIL import Image
|
8 |
import gradio as gr
|
|
|
|
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
"ch": {"num_workers": 2},
|
13 |
-
"en": {"num_workers": 2},
|
14 |
-
"fr": {"num_workers": 1},
|
15 |
-
"german": {"num_workers": 1},
|
16 |
-
"korean": {"num_workers": 1},
|
17 |
-
"japan": {"num_workers": 1},
|
18 |
-
}
|
19 |
-
CONCURRENCY_LIMIT = 8
|
20 |
-
|
21 |
-
|
22 |
-
class PaddleOCRModelManager(object):
|
23 |
-
def __init__(self,
|
24 |
-
num_workers,
|
25 |
-
model_factory):
|
26 |
-
super().__init__()
|
27 |
-
self._model_factory = model_factory
|
28 |
-
self._queue = Queue()
|
29 |
-
self._workers = []
|
30 |
-
self._model_initialized_event = Event()
|
31 |
-
for _ in range(num_workers):
|
32 |
-
worker = Thread(target=self._worker, daemon=False)
|
33 |
-
worker.start()
|
34 |
-
self._model_initialized_event.wait()
|
35 |
-
self._model_initialized_event.clear()
|
36 |
-
self._workers.append(worker)
|
37 |
-
|
38 |
-
def infer(self, *args, **kwargs):
|
39 |
-
# XXX: Should I use a more lightweight data structure, say, a future?
|
40 |
-
result_queue = Queue(maxsize=1)
|
41 |
-
self._queue.put((args, kwargs, result_queue))
|
42 |
-
success, payload = result_queue.get()
|
43 |
-
if success:
|
44 |
-
return payload
|
45 |
-
else:
|
46 |
-
raise payload
|
47 |
-
|
48 |
-
def close(self):
|
49 |
-
for _ in self._workers:
|
50 |
-
self._queue.put(None)
|
51 |
-
for worker in self._workers:
|
52 |
-
worker.join()
|
53 |
-
|
54 |
-
def _worker(self):
|
55 |
-
model = self._model_factory()
|
56 |
-
self._model_initialized_event.set()
|
57 |
-
while True:
|
58 |
-
item = self._queue.get()
|
59 |
-
if item is None:
|
60 |
-
break
|
61 |
-
args, kwargs, result_queue = item
|
62 |
-
try:
|
63 |
-
result = model.ocr(*args, **kwargs)
|
64 |
-
result_queue.put((True, result))
|
65 |
-
except Exception as e:
|
66 |
-
result_queue.put((False, e))
|
67 |
-
finally:
|
68 |
-
self._queue.task_done()
|
69 |
-
|
70 |
-
|
71 |
-
def create_model(lang):
|
72 |
-
return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
|
73 |
-
|
74 |
-
|
75 |
-
model_managers = {}
|
76 |
-
for lang, config in LANG_CONFIG.items():
|
77 |
-
model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang))
|
78 |
-
model_managers[lang] = model_manager
|
79 |
|
80 |
|
81 |
-
def
|
82 |
-
|
83 |
-
|
|
|
84 |
|
|
|
|
|
|
|
|
|
85 |
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
88 |
|
|
|
|
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
img_path = img
|
94 |
-
image = Image.open(img_path).convert("RGB")
|
95 |
-
boxes = [line[0] for line in result]
|
96 |
-
txts = [line[1][0] for line in result]
|
97 |
-
scores = [line[1][1] for line in result]
|
98 |
-
im_show = draw_ocr(image, boxes, txts, scores,
|
99 |
-
font_path="./simfang.ttf")
|
100 |
-
return im_show
|
101 |
|
102 |
|
103 |
-
title =
|
104 |
-
description =
|
105 |
-
- Gradio demo for
|
106 |
-
- To use it, simply upload your image
|
107 |
- [Docs](https://paddlepaddle.github.io/PaddleOCR/), [Github Repository](https://github.com/PaddlePaddle/PaddleOCR).
|
108 |
-
|
109 |
|
110 |
examples = [
|
111 |
-
[
|
112 |
-
[
|
113 |
-
[
|
114 |
]
|
115 |
|
116 |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
|
117 |
gr.Interface(
|
118 |
inference,
|
119 |
[
|
120 |
-
gr.Image(type=
|
121 |
-
gr.Dropdown(choices=list(LANG_CONFIG.keys()), value='en', label='language')
|
122 |
],
|
123 |
-
gr.Image(type=
|
124 |
title=title,
|
125 |
description=description,
|
126 |
examples=examples,
|
127 |
cache_examples=False,
|
128 |
css=css,
|
129 |
-
|
130 |
-
).launch(debug=False)
|
|
|
1 |
+
import base64
|
2 |
+
import io
|
3 |
+
import os
|
|
|
4 |
|
|
|
|
|
5 |
import gradio as gr
|
6 |
+
import requests
|
7 |
+
from PIL import Image
|
8 |
|
9 |
+
API_URL = "https://t7nd0cf3u89ck4bf.aistudio-hub.baidu.com/ocr"
|
10 |
+
TOKEN = os.getenv("API_TOKEN", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
|
13 |
+
def inference(img):
|
14 |
+
with io.BytesIO() as buffer:
|
15 |
+
img.save(buffer, format="png")
|
16 |
+
img_base64 = base64.b64encode(buffer.getvalue()).decode("ascii")
|
17 |
|
18 |
+
headers = {
|
19 |
+
"Authorization": f"token {TOKEN}",
|
20 |
+
"Content-Type": "application/json",
|
21 |
+
}
|
22 |
|
23 |
+
response = requests.post(
|
24 |
+
API_URL,
|
25 |
+
json={"file": img_base64, "fileType": 1},
|
26 |
+
headers=headers,
|
27 |
+
timeout=1000,
|
28 |
+
)
|
29 |
+
response.raise_for_status()
|
30 |
|
31 |
+
result = response.json()
|
32 |
+
ocr_img_url = result["result"]["ocrResults"][0]["ocrImage"]
|
33 |
|
34 |
+
response = requests.get(ocr_img_url, timeout=10)
|
35 |
+
response.raise_for_status()
|
36 |
+
return Image.open(io.BytesIO(response.content))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
|
39 |
+
title = "PP-OCRv5"
|
40 |
+
description = """
|
41 |
+
- Gradio demo for PP-OCRv5. This demo supports Chinese, English, French, German, Korean, and Japanese.
|
42 |
+
- To use it, simply upload your image, or click one of the examples to load them. Read more at the links below.
|
43 |
- [Docs](https://paddlepaddle.github.io/PaddleOCR/), [Github Repository](https://github.com/PaddlePaddle/PaddleOCR).
|
44 |
+
"""
|
45 |
|
46 |
examples = [
|
47 |
+
["en_example.jpg"],
|
48 |
+
["cn_example.jpg"],
|
49 |
+
["jp_example.jpg"],
|
50 |
]
|
51 |
|
52 |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
|
53 |
gr.Interface(
|
54 |
inference,
|
55 |
[
|
56 |
+
gr.Image(type="pil", label="Input"),
|
|
|
57 |
],
|
58 |
+
gr.Image(type="pil", label="Output"),
|
59 |
title=title,
|
60 |
description=description,
|
61 |
examples=examples,
|
62 |
cache_examples=False,
|
63 |
css=css,
|
64 |
+
).launch(debug=False)
|
|
requirements.txt
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
requests
|
4 |
-
paddlepaddle
|
5 |
-
paddleocr
|
|
|
1 |
+
gradio
|
2 |
+
pillow
|
3 |
requests
|
|
|
|