Charles Kabui
commited on
Commit
·
af37085
1
Parent(s):
d9699d7
examples
Browse files- analysis.ipynb +145 -83
- main.py +21 -11
analysis.ipynb
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
},
|
12 |
{
|
13 |
"cell_type": "code",
|
14 |
-
"execution_count":
|
15 |
"metadata": {},
|
16 |
"outputs": [],
|
17 |
"source": [
|
@@ -301,108 +301,170 @@
|
|
301 |
},
|
302 |
{
|
303 |
"cell_type": "code",
|
304 |
-
"execution_count":
|
305 |
"metadata": {},
|
306 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
307 |
"source": [
|
308 |
"from main import app\n",
|
309 |
"\n",
|
310 |
"model_path = '../detectron2-layout-parser/model_final.pth'\n",
|
311 |
"config_path = '../detectron2-layout-parser/config.yaml'\n",
|
312 |
-
"
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
"metadata": {},
|
319 |
-
"outputs": [],
|
320 |
-
"source": [
|
321 |
-
"from PIL import Image\n",
|
322 |
-
"from PIL import ImageDraw\n",
|
323 |
-
"back = Image.open(\n",
|
324 |
-
" '/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.3.jpg')\n",
|
325 |
-
"x0, y0, x1, y1 = [100, 100, 500, 500]\n",
|
326 |
-
"width, height = x1 - x0, y1 - y0\n",
|
327 |
-
"outline_width = 10\n",
|
328 |
-
"poly = Image.new('RGBA', (1000, 1000))\n",
|
329 |
-
"pdraw = ImageDraw.Draw(poly)\n",
|
330 |
-
"pdraw.rectangle([x0, y0, x1-outline_width, y1-outline_width],\n",
|
331 |
-
" fill=(256, 0, 0, 123), outline=(0, 0, 256, 123), width=outline_width)\n",
|
332 |
-
"back.paste(\n",
|
333 |
-
" poly,\n",
|
334 |
-
" box=[x0, y0, x1, y1],\n",
|
335 |
-
" mask=poly)\n",
|
336 |
-
"back"
|
337 |
]
|
338 |
},
|
339 |
{
|
340 |
"cell_type": "code",
|
341 |
-
"execution_count":
|
342 |
"metadata": {},
|
343 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
344 |
"source": [
|
345 |
-
"from utils.visualize_bboxes_on_image import visualize_bboxes_on_image\n",
|
346 |
"\n",
|
347 |
-
"
|
348 |
-
"visualize_bboxes_on_image_kwargs = {\n",
|
349 |
-
" 'label_text_color': 'white',\n",
|
350 |
-
" 'label_fill_color': 'black',\n",
|
351 |
-
" 'label_text_size': 12,\n",
|
352 |
-
" 'label_text_padding': 3,\n",
|
353 |
-
" 'label_rectangle_left_margin': 0,\n",
|
354 |
-
" 'label_rectangle_top_margin': 0,\n",
|
355 |
-
" # 'bbox_outline_color': '#acc2d9',\n",
|
356 |
-
" # 'bbox_fill_color': (0, 0, 256, 0)\n",
|
357 |
-
"}\n",
|
358 |
-
"visualize_bboxes_on_image(\n",
|
359 |
-
" image,\n",
|
360 |
-
" [[100, 100, 500, 500]],\n",
|
361 |
-
" ['Invoice Number'],\n",
|
362 |
-
" **visualize_bboxes_on_image_kwargs\n",
|
363 |
-
")"
|
364 |
-
]
|
365 |
-
},
|
366 |
-
{
|
367 |
-
"cell_type": "code",
|
368 |
-
"execution_count": null,
|
369 |
-
"metadata": {},
|
370 |
-
"outputs": [],
|
371 |
-
"source": [
|
372 |
-
"from utils.visualize_bboxes_on_image import get_color\n",
|
373 |
-
"import matplotlib.colors as colors\n",
|
374 |
"\n",
|
375 |
-
"
|
376 |
-
"
|
377 |
-
"
|
378 |
-
"
|
379 |
-
"
|
380 |
-
"
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
"metadata": {},
|
387 |
-
"outputs": [],
|
388 |
-
"source": [
|
389 |
-
"from utils.get_RGB_image import get_RGB_image\n",
|
390 |
-
"url = 'https://www.lifewire.com/thmb/GG6qBbZxV0mM7Kvgni3u-NtjVp8=/750x0/filters:no_upscale():max_bytes(150000):strip_icc():format(webp)/photopea-online-picture-editor-5bead7d446e0fb00267a5ac1.png'\n",
|
391 |
-
"get_RGB_image(url)"
|
392 |
]
|
393 |
},
|
394 |
{
|
395 |
"cell_type": "code",
|
396 |
-
"execution_count":
|
397 |
"metadata": {},
|
398 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
"source": [
|
400 |
-
"
|
401 |
-
"
|
402 |
-
"\n",
|
403 |
-
"
|
404 |
-
"
|
405 |
-
"
|
|
|
|
|
406 |
]
|
407 |
}
|
408 |
],
|
|
|
11 |
},
|
12 |
{
|
13 |
"cell_type": "code",
|
14 |
+
"execution_count": 1,
|
15 |
"metadata": {},
|
16 |
"outputs": [],
|
17 |
"source": [
|
|
|
301 |
},
|
302 |
{
|
303 |
"cell_type": "code",
|
304 |
+
"execution_count": 22,
|
305 |
"metadata": {},
|
306 |
+
"outputs": [
|
307 |
+
{
|
308 |
+
"name": "stdout",
|
309 |
+
"output_type": "stream",
|
310 |
+
"text": [
|
311 |
+
"Running on local URL: http://127.0.0.1:7862\n",
|
312 |
+
"\n",
|
313 |
+
"To create a public link, set `share=True` in `launch()`.\n"
|
314 |
+
]
|
315 |
+
},
|
316 |
+
{
|
317 |
+
"data": {
|
318 |
+
"text/html": [
|
319 |
+
"<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
320 |
+
],
|
321 |
+
"text/plain": [
|
322 |
+
"<IPython.core.display.HTML object>"
|
323 |
+
]
|
324 |
+
},
|
325 |
+
"metadata": {},
|
326 |
+
"output_type": "display_data"
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"name": "stderr",
|
330 |
+
"output_type": "stream",
|
331 |
+
"text": [
|
332 |
+
"/Users/charleskabue/miniconda3/envs/dss-env/lib/python3.10/site-packages/torch/functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at /Users/runner/work/_temp/anaconda/conda-bld/pytorch_1695391816234/work/aten/src/ATen/native/TensorShape.cpp:3527.)\n",
|
333 |
+
" return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]\n"
|
334 |
+
]
|
335 |
+
},
|
336 |
+
{
|
337 |
+
"name": "stdout",
|
338 |
+
"output_type": "stream",
|
339 |
+
"text": [
|
340 |
+
"Keyboard interruption in main thread... closing server.\n"
|
341 |
+
]
|
342 |
+
},
|
343 |
+
{
|
344 |
+
"data": {
|
345 |
+
"text/plain": []
|
346 |
+
},
|
347 |
+
"execution_count": 22,
|
348 |
+
"metadata": {},
|
349 |
+
"output_type": "execute_result"
|
350 |
+
}
|
351 |
+
],
|
352 |
"source": [
|
353 |
"from main import app\n",
|
354 |
"\n",
|
355 |
"model_path = '../detectron2-layout-parser/model_final.pth'\n",
|
356 |
"config_path = '../detectron2-layout-parser/config.yaml'\n",
|
357 |
+
"examples = [\n",
|
358 |
+
" '/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.1.jpg',\n",
|
359 |
+
" '/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.2.jpg',\n",
|
360 |
+
" '/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.3.jpg',\n",
|
361 |
+
" ] * 5\n",
|
362 |
+
"app(model_path=model_path, config_path=config_path, examples=examples, debug=True)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
]
|
364 |
},
|
365 |
{
|
366 |
"cell_type": "code",
|
367 |
+
"execution_count": 16,
|
368 |
"metadata": {},
|
369 |
+
"outputs": [
|
370 |
+
{
|
371 |
+
"name": "stdout",
|
372 |
+
"output_type": "stream",
|
373 |
+
"text": [
|
374 |
+
"Running on local URL: http://127.0.0.1:7861\n",
|
375 |
+
"\n",
|
376 |
+
"To create a public link, set `share=True` in `launch()`.\n"
|
377 |
+
]
|
378 |
+
},
|
379 |
+
{
|
380 |
+
"data": {
|
381 |
+
"text/html": [
|
382 |
+
"<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
383 |
+
],
|
384 |
+
"text/plain": [
|
385 |
+
"<IPython.core.display.HTML object>"
|
386 |
+
]
|
387 |
+
},
|
388 |
+
"metadata": {},
|
389 |
+
"output_type": "display_data"
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"data": {
|
393 |
+
"text/plain": []
|
394 |
+
},
|
395 |
+
"execution_count": 16,
|
396 |
+
"metadata": {},
|
397 |
+
"output_type": "execute_result"
|
398 |
+
}
|
399 |
+
],
|
400 |
"source": [
|
|
|
401 |
"\n",
|
402 |
+
"import gradio as gr\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
"\n",
|
404 |
+
"def classify_image(image):\n",
|
405 |
+
" # Replace with your image classification logic\n",
|
406 |
+
" # (e.g., using a pre-trained model or custom implementation)\n",
|
407 |
+
" classification_results = {\"cat\": 0.8, \"dog\": 0.2}\n",
|
408 |
+
" return classification_results\n",
|
409 |
+
"\n",
|
410 |
+
"image_input = gr.components.Image(type=\"pil\") # Allow image upload\n",
|
411 |
+
"label = gr.components.Label(num_top_classes=3) # Show top 3 predictions\n",
|
412 |
+
"\n",
|
413 |
+
"interface = gr.Interface(classify_image, inputs=image_input, outputs=label)\n",
|
414 |
+
"interface.launch()"
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
]
|
416 |
},
|
417 |
{
|
418 |
"cell_type": "code",
|
419 |
+
"execution_count": 15,
|
420 |
"metadata": {},
|
421 |
+
"outputs": [
|
422 |
+
{
|
423 |
+
"name": "stdout",
|
424 |
+
"output_type": "stream",
|
425 |
+
"text": [
|
426 |
+
"Running on local URL: http://127.0.0.1:7861\n",
|
427 |
+
"\n",
|
428 |
+
"To create a public link, set `share=True` in `launch()`.\n"
|
429 |
+
]
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"data": {
|
433 |
+
"text/html": [
|
434 |
+
"<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
435 |
+
],
|
436 |
+
"text/plain": [
|
437 |
+
"<IPython.core.display.HTML object>"
|
438 |
+
]
|
439 |
+
},
|
440 |
+
"metadata": {},
|
441 |
+
"output_type": "display_data"
|
442 |
+
},
|
443 |
+
{
|
444 |
+
"name": "stdout",
|
445 |
+
"output_type": "stream",
|
446 |
+
"text": [
|
447 |
+
"Keyboard interruption in main thread... closing server.\n"
|
448 |
+
]
|
449 |
+
},
|
450 |
+
{
|
451 |
+
"data": {
|
452 |
+
"text/plain": []
|
453 |
+
},
|
454 |
+
"execution_count": 15,
|
455 |
+
"metadata": {},
|
456 |
+
"output_type": "execute_result"
|
457 |
+
}
|
458 |
+
],
|
459 |
"source": [
|
460 |
+
"with gr.Blocks() as interface:\n",
|
461 |
+
" document = gr.Image(\n",
|
462 |
+
" type=\"pil\", label=f\"Document\", interactive=False, show_download_button=True)\n",
|
463 |
+
" gr.Examples(\n",
|
464 |
+
" examples=examples,\n",
|
465 |
+
" inputs=document,\n",
|
466 |
+
" label='Select any of these test document images')\n",
|
467 |
+
"interface.launch(debug=True)"
|
468 |
]
|
469 |
}
|
470 |
],
|
main.py
CHANGED
@@ -129,7 +129,7 @@ def load_image(filename, page=0):
|
|
129 |
first_error = e
|
130 |
image = get_RGB_image(filename)
|
131 |
return [
|
132 |
-
|
133 |
None
|
134 |
]
|
135 |
except Exception as second_error:
|
@@ -145,8 +145,10 @@ def preview_url(url, page=0):
|
|
145 |
return [gr.Tabs(selected=1), image, error]
|
146 |
|
147 |
|
148 |
-
def document_view(document_number: int):
|
149 |
-
gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image
|
|
|
|
|
150 |
'center'])
|
151 |
with gr.Tabs() as document_tabs:
|
152 |
with gr.Tab("From Image", id=0):
|
@@ -155,7 +157,7 @@ def document_view(document_number: int):
|
|
155 |
document_error_message = gr.HTML(
|
156 |
label="Error Message", visible=False)
|
157 |
document_preview = gr.UploadButton(
|
158 |
-
"Upload PDF or Document Image",
|
159 |
file_types=["image", ".pdf"],
|
160 |
file_count="single")
|
161 |
with gr.Tab("From URL", id=1):
|
@@ -166,7 +168,12 @@ def document_view(document_number: int):
|
|
166 |
document_url_error_message = gr.HTML(
|
167 |
label="Error Message", visible=False)
|
168 |
document_url_preview = gr.Button(
|
169 |
-
value="Preview", variant="
|
|
|
|
|
|
|
|
|
|
|
170 |
document_preview.upload(
|
171 |
fn=lambda file: load_image(file.name),
|
172 |
inputs=[document_preview],
|
@@ -175,10 +182,14 @@ def document_view(document_number: int):
|
|
175 |
fn=preview_url,
|
176 |
inputs=[document_url],
|
177 |
outputs=[document_tabs, document, document_url_error_message])
|
|
|
|
|
|
|
|
|
178 |
return document
|
179 |
|
180 |
|
181 |
-
def app(*, model_path, config_path, debug=False):
|
182 |
model: lp.Detectron2LayoutModel = lp.Detectron2LayoutModel(
|
183 |
config_path=config_path,
|
184 |
model_path=model_path,
|
@@ -190,14 +201,14 @@ def app(*, model_path, config_path, debug=False):
|
|
190 |
.center { display: flex; flex: 1 1 auto; align-items: center; align-content: center; justify-content: center; justify-items: center; }
|
191 |
.hr { width: 100%; display: block; padding: 0; margin: 0; background: gray; height: 4px; border: none; }
|
192 |
'''
|
193 |
-
with gr.Blocks(title=title, css=css) as
|
194 |
with gr.Row():
|
195 |
gr.HTML(value=description, elem_classes=['center'])
|
196 |
with gr.Row(equal_height=False):
|
197 |
with gr.Column():
|
198 |
-
document_1_image = document_view(1)
|
199 |
with gr.Column():
|
200 |
-
document_2_image = document_view(2)
|
201 |
gr.HTML('<hr/>', elem_classes=['hr'])
|
202 |
with gr.Row(elem_classes=['center']):
|
203 |
with gr.Column():
|
@@ -211,7 +222,6 @@ def app(*, model_path, config_path, debug=False):
|
|
211 |
info="Select the Vectors Type to use for Similarity Calculation")
|
212 |
similarity_output = gr.HTML(
|
213 |
label="Similarity Score", visible=False)
|
214 |
-
reset = gr.Button(value="Reset", variant="secondary")
|
215 |
kwargs = {
|
216 |
'fn': lambda document_1_image, document_2_image, vectors_type: similarity_fn(
|
217 |
model,
|
@@ -223,4 +233,4 @@ def app(*, model_path, config_path, debug=False):
|
|
223 |
}
|
224 |
submit.click(**kwargs)
|
225 |
vectors_type.change(**kwargs)
|
226 |
-
return
|
|
|
129 |
first_error = e
|
130 |
image = get_RGB_image(filename)
|
131 |
return [
|
132 |
+
image,
|
133 |
None
|
134 |
]
|
135 |
except Exception as second_error:
|
|
|
145 |
return [gr.Tabs(selected=1), image, error]
|
146 |
|
147 |
|
148 |
+
def document_view(document_number: int, examples: list[str] = []):
|
149 |
+
gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image</h4>', elem_classes=[
|
150 |
+
'center'])
|
151 |
+
gr.HTML(value=f'<p>Click the button below to upload Upload PDF or Document Image or cleck the URL tab to add using link.</p>', elem_classes=[
|
152 |
'center'])
|
153 |
with gr.Tabs() as document_tabs:
|
154 |
with gr.Tab("From Image", id=0):
|
|
|
157 |
document_error_message = gr.HTML(
|
158 |
label="Error Message", visible=False)
|
159 |
document_preview = gr.UploadButton(
|
160 |
+
label="Upload PDF or Document Image",
|
161 |
file_types=["image", ".pdf"],
|
162 |
file_count="single")
|
163 |
with gr.Tab("From URL", id=1):
|
|
|
168 |
document_url_error_message = gr.HTML(
|
169 |
label="Error Message", visible=False)
|
170 |
document_url_preview = gr.Button(
|
171 |
+
value="Preview Link Document", variant="secondary")
|
172 |
+
if len(examples) > 0:
|
173 |
+
gr.Examples(
|
174 |
+
examples=examples,
|
175 |
+
inputs=document,
|
176 |
+
label='Select any of these test document images')
|
177 |
document_preview.upload(
|
178 |
fn=lambda file: load_image(file.name),
|
179 |
inputs=[document_preview],
|
|
|
182 |
fn=preview_url,
|
183 |
inputs=[document_url],
|
184 |
outputs=[document_tabs, document, document_url_error_message])
|
185 |
+
document.change(
|
186 |
+
fn = lambda image: gr.Image(value=image, visible=True) if image else gr.Image(value=None, visible=False),
|
187 |
+
inputs = [document],
|
188 |
+
outputs = [document])
|
189 |
return document
|
190 |
|
191 |
|
192 |
+
def app(*, model_path:str, config_path:str, examples: list[str], debug=False):
|
193 |
model: lp.Detectron2LayoutModel = lp.Detectron2LayoutModel(
|
194 |
config_path=config_path,
|
195 |
model_path=model_path,
|
|
|
201 |
.center { display: flex; flex: 1 1 auto; align-items: center; align-content: center; justify-content: center; justify-items: center; }
|
202 |
.hr { width: 100%; display: block; padding: 0; margin: 0; background: gray; height: 4px; border: none; }
|
203 |
'''
|
204 |
+
with gr.Blocks(title=title, css=css) as interface:
|
205 |
with gr.Row():
|
206 |
gr.HTML(value=description, elem_classes=['center'])
|
207 |
with gr.Row(equal_height=False):
|
208 |
with gr.Column():
|
209 |
+
document_1_image = document_view(1, examples)
|
210 |
with gr.Column():
|
211 |
+
document_2_image = document_view(2, examples)
|
212 |
gr.HTML('<hr/>', elem_classes=['hr'])
|
213 |
with gr.Row(elem_classes=['center']):
|
214 |
with gr.Column():
|
|
|
222 |
info="Select the Vectors Type to use for Similarity Calculation")
|
223 |
similarity_output = gr.HTML(
|
224 |
label="Similarity Score", visible=False)
|
|
|
225 |
kwargs = {
|
226 |
'fn': lambda document_1_image, document_2_image, vectors_type: similarity_fn(
|
227 |
model,
|
|
|
233 |
}
|
234 |
submit.click(**kwargs)
|
235 |
vectors_type.change(**kwargs)
|
236 |
+
return interface.launch(debug=debug)
|