Charles Kabui commited on
Commit
af37085
·
1 Parent(s): d9699d7
Files changed (2) hide show
  1. analysis.ipynb +145 -83
  2. main.py +21 -11
analysis.ipynb CHANGED
@@ -11,7 +11,7 @@
11
  },
12
  {
13
  "cell_type": "code",
14
- "execution_count": null,
15
  "metadata": {},
16
  "outputs": [],
17
  "source": [
@@ -301,108 +301,170 @@
301
  },
302
  {
303
  "cell_type": "code",
304
- "execution_count": null,
305
  "metadata": {},
306
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  "source": [
308
  "from main import app\n",
309
  "\n",
310
  "model_path = '../detectron2-layout-parser/model_final.pth'\n",
311
  "config_path = '../detectron2-layout-parser/config.yaml'\n",
312
- "app(model_path=model_path, config_path=config_path, debug=True)"
313
- ]
314
- },
315
- {
316
- "cell_type": "code",
317
- "execution_count": null,
318
- "metadata": {},
319
- "outputs": [],
320
- "source": [
321
- "from PIL import Image\n",
322
- "from PIL import ImageDraw\n",
323
- "back = Image.open(\n",
324
- " '/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.3.jpg')\n",
325
- "x0, y0, x1, y1 = [100, 100, 500, 500]\n",
326
- "width, height = x1 - x0, y1 - y0\n",
327
- "outline_width = 10\n",
328
- "poly = Image.new('RGBA', (1000, 1000))\n",
329
- "pdraw = ImageDraw.Draw(poly)\n",
330
- "pdraw.rectangle([x0, y0, x1-outline_width, y1-outline_width],\n",
331
- " fill=(256, 0, 0, 123), outline=(0, 0, 256, 123), width=outline_width)\n",
332
- "back.paste(\n",
333
- " poly,\n",
334
- " box=[x0, y0, x1, y1],\n",
335
- " mask=poly)\n",
336
- "back"
337
  ]
338
  },
339
  {
340
  "cell_type": "code",
341
- "execution_count": null,
342
  "metadata": {},
343
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  "source": [
345
- "from utils.visualize_bboxes_on_image import visualize_bboxes_on_image\n",
346
  "\n",
347
- "image = Image.open('/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.3.jpg')\n",
348
- "visualize_bboxes_on_image_kwargs = {\n",
349
- " 'label_text_color': 'white',\n",
350
- " 'label_fill_color': 'black',\n",
351
- " 'label_text_size': 12,\n",
352
- " 'label_text_padding': 3,\n",
353
- " 'label_rectangle_left_margin': 0,\n",
354
- " 'label_rectangle_top_margin': 0,\n",
355
- " # 'bbox_outline_color': '#acc2d9',\n",
356
- " # 'bbox_fill_color': (0, 0, 256, 0)\n",
357
- "}\n",
358
- "visualize_bboxes_on_image(\n",
359
- " image,\n",
360
- " [[100, 100, 500, 500]],\n",
361
- " ['Invoice Number'],\n",
362
- " **visualize_bboxes_on_image_kwargs\n",
363
- ")"
364
- ]
365
- },
366
- {
367
- "cell_type": "code",
368
- "execution_count": null,
369
- "metadata": {},
370
- "outputs": [],
371
- "source": [
372
- "from utils.visualize_bboxes_on_image import get_color\n",
373
- "import matplotlib.colors as colors\n",
374
  "\n",
375
- "print(get_color('red'), colors.to_hex('red'))\n",
376
- "print(get_color('#ff0000'))\n",
377
- "print(get_color((0, 0, 256, 0)))\n",
378
- "print(get_color((0, 0, 256)))\n",
379
- "print(get_color(('red', 127)))\n",
380
- "print(get_color(('#ff0000', 127)))"
381
- ]
382
- },
383
- {
384
- "cell_type": "code",
385
- "execution_count": null,
386
- "metadata": {},
387
- "outputs": [],
388
- "source": [
389
- "from utils.get_RGB_image import get_RGB_image\n",
390
- "url = 'https://www.lifewire.com/thmb/GG6qBbZxV0mM7Kvgni3u-NtjVp8=/750x0/filters:no_upscale():max_bytes(150000):strip_icc():format(webp)/photopea-online-picture-editor-5bead7d446e0fb00267a5ac1.png'\n",
391
- "get_RGB_image(url)"
392
  ]
393
  },
394
  {
395
  "cell_type": "code",
396
- "execution_count": null,
397
  "metadata": {},
398
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  "source": [
400
- "import fitz\n",
401
- "from utils.get_RGB_image import steam_online_file\n",
402
- "\n",
403
- "pixmap = fitz.open(\"pdf\", steam_online_file('https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf'))[0].get_pixmap()\n",
404
- "\n",
405
- "Image.frombytes(\"RGB\", [pixmap.width, pixmap.height], pixmap.samples)"
 
 
406
  ]
407
  }
408
  ],
 
11
  },
12
  {
13
  "cell_type": "code",
14
+ "execution_count": 1,
15
  "metadata": {},
16
  "outputs": [],
17
  "source": [
 
301
  },
302
  {
303
  "cell_type": "code",
304
+ "execution_count": 22,
305
  "metadata": {},
306
+ "outputs": [
307
+ {
308
+ "name": "stdout",
309
+ "output_type": "stream",
310
+ "text": [
311
+ "Running on local URL: http://127.0.0.1:7862\n",
312
+ "\n",
313
+ "To create a public link, set `share=True` in `launch()`.\n"
314
+ ]
315
+ },
316
+ {
317
+ "data": {
318
+ "text/html": [
319
+ "<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
320
+ ],
321
+ "text/plain": [
322
+ "<IPython.core.display.HTML object>"
323
+ ]
324
+ },
325
+ "metadata": {},
326
+ "output_type": "display_data"
327
+ },
328
+ {
329
+ "name": "stderr",
330
+ "output_type": "stream",
331
+ "text": [
332
+ "/Users/charleskabue/miniconda3/envs/dss-env/lib/python3.10/site-packages/torch/functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at /Users/runner/work/_temp/anaconda/conda-bld/pytorch_1695391816234/work/aten/src/ATen/native/TensorShape.cpp:3527.)\n",
333
+ " return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]\n"
334
+ ]
335
+ },
336
+ {
337
+ "name": "stdout",
338
+ "output_type": "stream",
339
+ "text": [
340
+ "Keyboard interruption in main thread... closing server.\n"
341
+ ]
342
+ },
343
+ {
344
+ "data": {
345
+ "text/plain": []
346
+ },
347
+ "execution_count": 22,
348
+ "metadata": {},
349
+ "output_type": "execute_result"
350
+ }
351
+ ],
352
  "source": [
353
  "from main import app\n",
354
  "\n",
355
  "model_path = '../detectron2-layout-parser/model_final.pth'\n",
356
  "config_path = '../detectron2-layout-parser/config.yaml'\n",
357
+ "examples = [\n",
358
+ " '/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.1.jpg',\n",
359
+ " '/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.2.jpg',\n",
360
+ " '/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.3.jpg',\n",
361
+ " ] * 5\n",
362
+ "app(model_path=model_path, config_path=config_path, examples=examples, debug=True)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  ]
364
  },
365
  {
366
  "cell_type": "code",
367
+ "execution_count": 16,
368
  "metadata": {},
369
+ "outputs": [
370
+ {
371
+ "name": "stdout",
372
+ "output_type": "stream",
373
+ "text": [
374
+ "Running on local URL: http://127.0.0.1:7861\n",
375
+ "\n",
376
+ "To create a public link, set `share=True` in `launch()`.\n"
377
+ ]
378
+ },
379
+ {
380
+ "data": {
381
+ "text/html": [
382
+ "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
383
+ ],
384
+ "text/plain": [
385
+ "<IPython.core.display.HTML object>"
386
+ ]
387
+ },
388
+ "metadata": {},
389
+ "output_type": "display_data"
390
+ },
391
+ {
392
+ "data": {
393
+ "text/plain": []
394
+ },
395
+ "execution_count": 16,
396
+ "metadata": {},
397
+ "output_type": "execute_result"
398
+ }
399
+ ],
400
  "source": [
 
401
  "\n",
402
+ "import gradio as gr\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  "\n",
404
+ "def classify_image(image):\n",
405
+ " # Replace with your image classification logic\n",
406
+ " # (e.g., using a pre-trained model or custom implementation)\n",
407
+ " classification_results = {\"cat\": 0.8, \"dog\": 0.2}\n",
408
+ " return classification_results\n",
409
+ "\n",
410
+ "image_input = gr.components.Image(type=\"pil\") # Allow image upload\n",
411
+ "label = gr.components.Label(num_top_classes=3) # Show top 3 predictions\n",
412
+ "\n",
413
+ "interface = gr.Interface(classify_image, inputs=image_input, outputs=label)\n",
414
+ "interface.launch()"
 
 
 
 
 
 
415
  ]
416
  },
417
  {
418
  "cell_type": "code",
419
+ "execution_count": 15,
420
  "metadata": {},
421
+ "outputs": [
422
+ {
423
+ "name": "stdout",
424
+ "output_type": "stream",
425
+ "text": [
426
+ "Running on local URL: http://127.0.0.1:7861\n",
427
+ "\n",
428
+ "To create a public link, set `share=True` in `launch()`.\n"
429
+ ]
430
+ },
431
+ {
432
+ "data": {
433
+ "text/html": [
434
+ "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
435
+ ],
436
+ "text/plain": [
437
+ "<IPython.core.display.HTML object>"
438
+ ]
439
+ },
440
+ "metadata": {},
441
+ "output_type": "display_data"
442
+ },
443
+ {
444
+ "name": "stdout",
445
+ "output_type": "stream",
446
+ "text": [
447
+ "Keyboard interruption in main thread... closing server.\n"
448
+ ]
449
+ },
450
+ {
451
+ "data": {
452
+ "text/plain": []
453
+ },
454
+ "execution_count": 15,
455
+ "metadata": {},
456
+ "output_type": "execute_result"
457
+ }
458
+ ],
459
  "source": [
460
+ "with gr.Blocks() as interface:\n",
461
+ " document = gr.Image(\n",
462
+ " type=\"pil\", label=f\"Document\", interactive=False, show_download_button=True)\n",
463
+ " gr.Examples(\n",
464
+ " examples=examples,\n",
465
+ " inputs=document,\n",
466
+ " label='Select any of these test document images')\n",
467
+ "interface.launch(debug=True)"
468
  ]
469
  }
470
  ],
main.py CHANGED
@@ -129,7 +129,7 @@ def load_image(filename, page=0):
129
  first_error = e
130
  image = get_RGB_image(filename)
131
  return [
132
- gr.Image(value=image, visible=True),
133
  None
134
  ]
135
  except Exception as second_error:
@@ -145,8 +145,10 @@ def preview_url(url, page=0):
145
  return [gr.Tabs(selected=1), image, error]
146
 
147
 
148
- def document_view(document_number: int):
149
- gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=[
 
 
150
  'center'])
151
  with gr.Tabs() as document_tabs:
152
  with gr.Tab("From Image", id=0):
@@ -155,7 +157,7 @@ def document_view(document_number: int):
155
  document_error_message = gr.HTML(
156
  label="Error Message", visible=False)
157
  document_preview = gr.UploadButton(
158
- "Upload PDF or Document Image",
159
  file_types=["image", ".pdf"],
160
  file_count="single")
161
  with gr.Tab("From URL", id=1):
@@ -166,7 +168,12 @@ def document_view(document_number: int):
166
  document_url_error_message = gr.HTML(
167
  label="Error Message", visible=False)
168
  document_url_preview = gr.Button(
169
- value="Preview", variant="primary")
 
 
 
 
 
170
  document_preview.upload(
171
  fn=lambda file: load_image(file.name),
172
  inputs=[document_preview],
@@ -175,10 +182,14 @@ def document_view(document_number: int):
175
  fn=preview_url,
176
  inputs=[document_url],
177
  outputs=[document_tabs, document, document_url_error_message])
 
 
 
 
178
  return document
179
 
180
 
181
- def app(*, model_path, config_path, debug=False):
182
  model: lp.Detectron2LayoutModel = lp.Detectron2LayoutModel(
183
  config_path=config_path,
184
  model_path=model_path,
@@ -190,14 +201,14 @@ def app(*, model_path, config_path, debug=False):
190
  .center { display: flex; flex: 1 1 auto; align-items: center; align-content: center; justify-content: center; justify-items: center; }
191
  .hr { width: 100%; display: block; padding: 0; margin: 0; background: gray; height: 4px; border: none; }
192
  '''
193
- with gr.Blocks(title=title, css=css) as app:
194
  with gr.Row():
195
  gr.HTML(value=description, elem_classes=['center'])
196
  with gr.Row(equal_height=False):
197
  with gr.Column():
198
- document_1_image = document_view(1)
199
  with gr.Column():
200
- document_2_image = document_view(2)
201
  gr.HTML('<hr/>', elem_classes=['hr'])
202
  with gr.Row(elem_classes=['center']):
203
  with gr.Column():
@@ -211,7 +222,6 @@ def app(*, model_path, config_path, debug=False):
211
  info="Select the Vectors Type to use for Similarity Calculation")
212
  similarity_output = gr.HTML(
213
  label="Similarity Score", visible=False)
214
- reset = gr.Button(value="Reset", variant="secondary")
215
  kwargs = {
216
  'fn': lambda document_1_image, document_2_image, vectors_type: similarity_fn(
217
  model,
@@ -223,4 +233,4 @@ def app(*, model_path, config_path, debug=False):
223
  }
224
  submit.click(**kwargs)
225
  vectors_type.change(**kwargs)
226
- return app.launch(debug=debug)
 
129
  first_error = e
130
  image = get_RGB_image(filename)
131
  return [
132
+ image,
133
  None
134
  ]
135
  except Exception as second_error:
 
145
  return [gr.Tabs(selected=1), image, error]
146
 
147
 
148
+ def document_view(document_number: int, examples: list[str] = []):
149
+ gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image</h4>', elem_classes=[
150
+ 'center'])
151
+ gr.HTML(value=f'<p>Click the button below to upload Upload PDF or Document Image or cleck the URL tab to add using link.</p>', elem_classes=[
152
  'center'])
153
  with gr.Tabs() as document_tabs:
154
  with gr.Tab("From Image", id=0):
 
157
  document_error_message = gr.HTML(
158
  label="Error Message", visible=False)
159
  document_preview = gr.UploadButton(
160
+ label="Upload PDF or Document Image",
161
  file_types=["image", ".pdf"],
162
  file_count="single")
163
  with gr.Tab("From URL", id=1):
 
168
  document_url_error_message = gr.HTML(
169
  label="Error Message", visible=False)
170
  document_url_preview = gr.Button(
171
+ value="Preview Link Document", variant="secondary")
172
+ if len(examples) > 0:
173
+ gr.Examples(
174
+ examples=examples,
175
+ inputs=document,
176
+ label='Select any of these test document images')
177
  document_preview.upload(
178
  fn=lambda file: load_image(file.name),
179
  inputs=[document_preview],
 
182
  fn=preview_url,
183
  inputs=[document_url],
184
  outputs=[document_tabs, document, document_url_error_message])
185
+ document.change(
186
+ fn = lambda image: gr.Image(value=image, visible=True) if image else gr.Image(value=None, visible=False),
187
+ inputs = [document],
188
+ outputs = [document])
189
  return document
190
 
191
 
192
+ def app(*, model_path:str, config_path:str, examples: list[str], debug=False):
193
  model: lp.Detectron2LayoutModel = lp.Detectron2LayoutModel(
194
  config_path=config_path,
195
  model_path=model_path,
 
201
  .center { display: flex; flex: 1 1 auto; align-items: center; align-content: center; justify-content: center; justify-items: center; }
202
  .hr { width: 100%; display: block; padding: 0; margin: 0; background: gray; height: 4px; border: none; }
203
  '''
204
+ with gr.Blocks(title=title, css=css) as interface:
205
  with gr.Row():
206
  gr.HTML(value=description, elem_classes=['center'])
207
  with gr.Row(equal_height=False):
208
  with gr.Column():
209
+ document_1_image = document_view(1, examples)
210
  with gr.Column():
211
+ document_2_image = document_view(2, examples)
212
  gr.HTML('<hr/>', elem_classes=['hr'])
213
  with gr.Row(elem_classes=['center']):
214
  with gr.Column():
 
222
  info="Select the Vectors Type to use for Similarity Calculation")
223
  similarity_output = gr.HTML(
224
  label="Similarity Score", visible=False)
 
225
  kwargs = {
226
  'fn': lambda document_1_image, document_2_image, vectors_type: similarity_fn(
227
  model,
 
233
  }
234
  submit.click(**kwargs)
235
  vectors_type.change(**kwargs)
236
+ return interface.launch(debug=debug)