Bobholamovic commited on
Commit
66403e6
·
1 Parent(s): bf0a8e3

[Feat] Init code

Browse files
Files changed (2) hide show
  1. app.py +38 -104
  2. requirements.txt +2 -4
app.py CHANGED
@@ -1,130 +1,64 @@
1
- import atexit
2
- import functools
3
- from queue import Queue
4
- from threading import Event, Thread
5
 
6
- from paddleocr import PaddleOCR, draw_ocr
7
- from PIL import Image
8
  import gradio as gr
 
 
9
 
10
-
11
- LANG_CONFIG = {
12
- "ch": {"num_workers": 2},
13
- "en": {"num_workers": 2},
14
- "fr": {"num_workers": 1},
15
- "german": {"num_workers": 1},
16
- "korean": {"num_workers": 1},
17
- "japan": {"num_workers": 1},
18
- }
19
- CONCURRENCY_LIMIT = 8
20
-
21
-
22
- class PaddleOCRModelManager(object):
23
- def __init__(self,
24
- num_workers,
25
- model_factory):
26
- super().__init__()
27
- self._model_factory = model_factory
28
- self._queue = Queue()
29
- self._workers = []
30
- self._model_initialized_event = Event()
31
- for _ in range(num_workers):
32
- worker = Thread(target=self._worker, daemon=False)
33
- worker.start()
34
- self._model_initialized_event.wait()
35
- self._model_initialized_event.clear()
36
- self._workers.append(worker)
37
-
38
- def infer(self, *args, **kwargs):
39
- # XXX: Should I use a more lightweight data structure, say, a future?
40
- result_queue = Queue(maxsize=1)
41
- self._queue.put((args, kwargs, result_queue))
42
- success, payload = result_queue.get()
43
- if success:
44
- return payload
45
- else:
46
- raise payload
47
-
48
- def close(self):
49
- for _ in self._workers:
50
- self._queue.put(None)
51
- for worker in self._workers:
52
- worker.join()
53
-
54
- def _worker(self):
55
- model = self._model_factory()
56
- self._model_initialized_event.set()
57
- while True:
58
- item = self._queue.get()
59
- if item is None:
60
- break
61
- args, kwargs, result_queue = item
62
- try:
63
- result = model.ocr(*args, **kwargs)
64
- result_queue.put((True, result))
65
- except Exception as e:
66
- result_queue.put((False, e))
67
- finally:
68
- self._queue.task_done()
69
-
70
-
71
- def create_model(lang):
72
- return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
73
-
74
-
75
- model_managers = {}
76
- for lang, config in LANG_CONFIG.items():
77
- model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang))
78
- model_managers[lang] = model_manager
79
 
80
 
81
- def close_model_managers():
82
- for manager in model_managers.values():
83
- manager.close()
 
84
 
 
 
 
 
85
 
86
- # XXX: Not sure if gradio allows adding custom teardown logic
87
- atexit.register(close_model_managers)
 
 
 
 
 
88
 
 
 
89
 
90
- def inference(img, lang):
91
- ocr = model_managers[lang]
92
- result = ocr.infer(img, cls=True)[0]
93
- img_path = img
94
- image = Image.open(img_path).convert("RGB")
95
- boxes = [line[0] for line in result]
96
- txts = [line[1][0] for line in result]
97
- scores = [line[1][1] for line in result]
98
- im_show = draw_ocr(image, boxes, txts, scores,
99
- font_path="./simfang.ttf")
100
- return im_show
101
 
102
 
103
- title = 'PaddleOCR'
104
- description = '''
105
- - Gradio demo for PaddleOCR. PaddleOCR demo supports Chinese, English, French, German, Korean and Japanese.
106
- - To use it, simply upload your image and choose a language from the dropdown menu, or click one of the examples to load them. Read more at the links below.
107
  - [Docs](https://paddlepaddle.github.io/PaddleOCR/), [Github Repository](https://github.com/PaddlePaddle/PaddleOCR).
108
- '''
109
 
110
  examples = [
111
- ['en_example.jpg','en'],
112
- ['cn_example.jpg','ch'],
113
- ['jp_example.jpg','japan'],
114
  ]
115
 
116
  css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
117
  gr.Interface(
118
  inference,
119
  [
120
- gr.Image(type='filepath', label='Input'),
121
- gr.Dropdown(choices=list(LANG_CONFIG.keys()), value='en', label='language')
122
  ],
123
- gr.Image(type='pil', label='Output'),
124
  title=title,
125
  description=description,
126
  examples=examples,
127
  cache_examples=False,
128
  css=css,
129
- concurrency_limit=CONCURRENCY_LIMIT,
130
- ).launch(debug=False)
 
1
+ import base64
2
+ import io
3
+ import os
 
4
 
 
 
5
  import gradio as gr
6
+ import requests
7
+ from PIL import Image
8
 
9
+ API_URL = "https://t7nd0cf3u89ck4bf.aistudio-hub.baidu.com/ocr"
10
+ TOKEN = os.getenv("API_TOKEN", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
+ def inference(img):
14
+ with io.BytesIO() as buffer:
15
+ img.save(buffer, format="png")
16
+ img_base64 = base64.b64encode(buffer.getvalue()).decode("ascii")
17
 
18
+ headers = {
19
+ "Authorization": f"token {TOKEN}",
20
+ "Content-Type": "application/json",
21
+ }
22
 
23
+ response = requests.post(
24
+ API_URL,
25
+ json={"file": img_base64, "fileType": 1},
26
+ headers=headers,
27
+ timeout=1000,
28
+ )
29
+ response.raise_for_status()
30
 
31
+ result = response.json()
32
+ ocr_img_url = result["result"]["ocrResults"][0]["ocrImage"]
33
 
34
+ response = requests.get(ocr_img_url, timeout=10)
35
+ response.raise_for_status()
36
+ return Image.open(io.BytesIO(response.content))
 
 
 
 
 
 
 
 
37
 
38
 
39
+ title = "PP-OCRv5"
40
+ description = """
41
+ - Gradio demo for PP-OCRv5. This demo supports Chinese, English, French, German, Korean, and Japanese.
42
+ - To use it, simply upload your image, or click one of the examples to load them. Read more at the links below.
43
  - [Docs](https://paddlepaddle.github.io/PaddleOCR/), [Github Repository](https://github.com/PaddlePaddle/PaddleOCR).
44
+ """
45
 
46
  examples = [
47
+ ["en_example.jpg"],
48
+ ["cn_example.jpg"],
49
+ ["jp_example.jpg"],
50
  ]
51
 
52
  css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
53
  gr.Interface(
54
  inference,
55
  [
56
+ gr.Image(type="pil", label="Input"),
 
57
  ],
58
+ gr.Image(type="pil", label="Output"),
59
  title=title,
60
  description=description,
61
  examples=examples,
62
  cache_examples=False,
63
  css=css,
64
+ ).launch(debug=False)
 
requirements.txt CHANGED
@@ -1,5 +1,3 @@
1
- Pillow
2
- Gradio
3
  requests
4
- paddlepaddle
5
- paddleocr
 
1
+ gradio
2
+ pillow
3
  requests