Spaces:

mckabue
/

document-similarity-matching-using-visual-layout-features-archive

Build error

App Files Files Community

Charles Kabui commited on Mar 14, 2024

Commit

af37085

1 Parent(s): d9699d7

examples

Browse files

Files changed (2) hide show

analysis.ipynb +145 -83
main.py +21 -11

analysis.ipynb CHANGED Viewed

@@ -11,7 +11,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -301,108 +301,170 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "from main import app\n",
         "\n",
         "model_path = '../detectron2-layout-parser/model_final.pth'\n",
         "config_path = '../detectron2-layout-parser/config.yaml'\n",
-        "app(model_path=model_path, config_path=config_path, debug=True)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from PIL import Image\n",
-        "from PIL import ImageDraw\n",
-        "back = Image.open(\n",
-        "    '/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.3.jpg')\n",
-        "x0, y0, x1, y1 = [100, 100, 500, 500]\n",
-        "width, height = x1 - x0, y1 - y0\n",
-        "outline_width = 10\n",
-        "poly = Image.new('RGBA', (1000, 1000))\n",
-        "pdraw = ImageDraw.Draw(poly)\n",
-        "pdraw.rectangle([x0, y0, x1-outline_width, y1-outline_width],\n",
-        "                fill=(256, 0, 0, 123), outline=(0, 0, 256, 123), width=outline_width)\n",
-        "back.paste(\n",
-        "    poly,\n",
-        "    box=[x0, y0, x1, y1],\n",
-        "    mask=poly)\n",
-        "back"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
-        "from utils.visualize_bboxes_on_image import visualize_bboxes_on_image\n",
         "\n",
-        "image = Image.open('/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.3.jpg')\n",
-        "visualize_bboxes_on_image_kwargs = {\n",
-        "    'label_text_color': 'white',\n",
-        "    'label_fill_color': 'black',\n",
-        "    'label_text_size': 12,\n",
-        "    'label_text_padding': 3,\n",
-        "    'label_rectangle_left_margin': 0,\n",
-        "    'label_rectangle_top_margin': 0,\n",
-        "    # 'bbox_outline_color': '#acc2d9',\n",
-        "    # 'bbox_fill_color': (0, 0, 256, 0)\n",
-        "}\n",
-        "visualize_bboxes_on_image(\n",
-        "    image,\n",
-        "    [[100, 100, 500, 500]],\n",
-        "    ['Invoice Number'],\n",
-        "    **visualize_bboxes_on_image_kwargs\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from utils.visualize_bboxes_on_image import get_color\n",
-        "import matplotlib.colors as colors\n",
         "\n",
-        "print(get_color('red'), colors.to_hex('red'))\n",
-        "print(get_color('#ff0000'))\n",
-        "print(get_color((0, 0, 256, 0)))\n",
-        "print(get_color((0, 0, 256)))\n",
-        "print(get_color(('red', 127)))\n",
-        "print(get_color(('#ff0000', 127)))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from utils.get_RGB_image import get_RGB_image\n",
-        "url = 'https://www.lifewire.com/thmb/GG6qBbZxV0mM7Kvgni3u-NtjVp8=/750x0/filters:no_upscale():max_bytes(150000):strip_icc():format(webp)/photopea-online-picture-editor-5bead7d446e0fb00267a5ac1.png'\n",
-        "get_RGB_image(url)"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
-        "import fitz\n",
-        "from utils.get_RGB_image import steam_online_file\n",
-        "\n",
-        "pixmap = fitz.open(\"pdf\", steam_online_file('https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf'))[0].get_pixmap()\n",
-        "\n",
-        "Image.frombytes(\"RGB\", [pixmap.width, pixmap.height], pixmap.samples)"
       ]
     }
   ],

     },
     {
       "cell_type": "code",
+      "execution_count": 1,
       "metadata": {},
       "outputs": [],
       "source": [
     },
     {
       "cell_type": "code",
+      "execution_count": 22,
       "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Running on local URL:  http://127.0.0.1:7862\n",
+            "\n",
+            "To create a public link, set `share=True` in `launch()`.\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/Users/charleskabue/miniconda3/envs/dss-env/lib/python3.10/site-packages/torch/functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at /Users/runner/work/_temp/anaconda/conda-bld/pytorch_1695391816234/work/aten/src/ATen/native/TensorShape.cpp:3527.)\n",
+            "  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Keyboard interruption in main thread... closing server.\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": []
+          },
+          "execution_count": 22,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "from main import app\n",
         "\n",
         "model_path = '../detectron2-layout-parser/model_final.pth'\n",
         "config_path = '../detectron2-layout-parser/config.yaml'\n",
+        "examples = [\n",
+        "    '/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.1.jpg',\n",
+        "    '/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.2.jpg',\n",
+        "    '/Users/charleskabue/document-similarity-search/detectron2-layout-parser/example.3.jpg',\n",
+        "    ] * 5\n",
+        "app(model_path=model_path, config_path=config_path, examples=examples, debug=True)"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": 16,
       "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Running on local URL:  http://127.0.0.1:7861\n",
+            "\n",
+            "To create a public link, set `share=True` in `launch()`.\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/plain": []
+          },
+          "execution_count": 16,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "\n",
+        "import gradio as gr\n",
         "\n",
+        "def classify_image(image):\n",
+        "  # Replace with your image classification logic\n",
+        "  # (e.g., using a pre-trained model or custom implementation)\n",
+        "  classification_results = {\"cat\": 0.8, \"dog\": 0.2}\n",
+        "  return classification_results\n",
+        "\n",
+        "image_input = gr.components.Image(type=\"pil\")  # Allow image upload\n",
+        "label = gr.components.Label(num_top_classes=3)  # Show top 3 predictions\n",
+        "\n",
+        "interface = gr.Interface(classify_image, inputs=image_input, outputs=label)\n",
+        "interface.launch()"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": 15,
       "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Running on local URL:  http://127.0.0.1:7861\n",
+            "\n",
+            "To create a public link, set `share=True` in `launch()`.\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Keyboard interruption in main thread... closing server.\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": []
+          },
+          "execution_count": 15,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
+        "with gr.Blocks() as interface:\n",
+        "    document = gr.Image(\n",
+        "                type=\"pil\", label=f\"Document\", interactive=False, show_download_button=True)\n",
+        "    gr.Examples(\n",
+        "        examples=examples,\n",
+        "        inputs=document,\n",
+        "        label='Select any of these test document images')\n",
+        "interface.launch(debug=True)"
       ]
     }
   ],

main.py CHANGED Viewed

@@ -129,7 +129,7 @@ def load_image(filename, page=0):
             first_error = e
             image = get_RGB_image(filename)
         return [
-            gr.Image(value=image, visible=True),
             None
         ]
     except Exception as second_error:
@@ -145,8 +145,10 @@ def preview_url(url, page=0):
         return [gr.Tabs(selected=1), image, error]
-def document_view(document_number: int):
-    gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=[
             'center'])
     with gr.Tabs() as document_tabs:
         with gr.Tab("From Image", id=0):
@@ -155,7 +157,7 @@ def document_view(document_number: int):
             document_error_message = gr.HTML(
                 label="Error Message", visible=False)
             document_preview = gr.UploadButton(
-                "Upload PDF or Document Image",
                 file_types=["image", ".pdf"],
                 file_count="single")
         with gr.Tab("From URL", id=1):
@@ -166,7 +168,12 @@ def document_view(document_number: int):
             document_url_error_message = gr.HTML(
                 label="Error Message", visible=False)
             document_url_preview = gr.Button(
-                value="Preview", variant="primary")
     document_preview.upload(
         fn=lambda file: load_image(file.name),
         inputs=[document_preview],
@@ -175,10 +182,14 @@ def document_view(document_number: int):
         fn=preview_url,
         inputs=[document_url],
         outputs=[document_tabs, document, document_url_error_message])
     return document
-def app(*, model_path, config_path, debug=False):
     model: lp.Detectron2LayoutModel = lp.Detectron2LayoutModel(
         config_path=config_path,
         model_path=model_path,
@@ -190,14 +201,14 @@ def app(*, model_path, config_path, debug=False):
     .center { display: flex; flex: 1 1 auto; align-items: center; align-content: center; justify-content: center; justify-items: center; }
     .hr { width: 100%; display: block; padding: 0; margin: 0; background: gray; height: 4px;  border: none; }
     '''
-    with gr.Blocks(title=title, css=css) as app:
         with gr.Row():
             gr.HTML(value=description, elem_classes=['center'])
         with gr.Row(equal_height=False):
             with gr.Column():
-                document_1_image = document_view(1)
             with gr.Column():
-                document_2_image = document_view(2)
         gr.HTML('<hr/>', elem_classes=['hr'])
         with gr.Row(elem_classes=['center']):
             with gr.Column():
@@ -211,7 +222,6 @@ def app(*, model_path, config_path, debug=False):
                     info="Select the Vectors Type to use for Similarity Calculation")
                 similarity_output = gr.HTML(
                     label="Similarity Score", visible=False)
-        reset = gr.Button(value="Reset", variant="secondary")
         kwargs = {
             'fn': lambda document_1_image, document_2_image, vectors_type: similarity_fn(
                 model,
@@ -223,4 +233,4 @@ def app(*, model_path, config_path, debug=False):
         }
         submit.click(**kwargs)
         vectors_type.change(**kwargs)
-    return app.launch(debug=debug)

             first_error = e
             image = get_RGB_image(filename)
         return [
+            image,
             None
         ]
     except Exception as second_error:
         return [gr.Tabs(selected=1), image, error]
+def document_view(document_number: int, examples: list[str] = []):
+    gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image</h4>', elem_classes=[
+            'center'])
+    gr.HTML(value=f'<p>Click the button below to upload Upload PDF or Document Image or cleck the URL tab to add using link.</p>', elem_classes=[
             'center'])
     with gr.Tabs() as document_tabs:
         with gr.Tab("From Image", id=0):
             document_error_message = gr.HTML(
                 label="Error Message", visible=False)
             document_preview = gr.UploadButton(
+                label="Upload PDF or Document Image",
                 file_types=["image", ".pdf"],
                 file_count="single")
         with gr.Tab("From URL", id=1):
             document_url_error_message = gr.HTML(
                 label="Error Message", visible=False)
             document_url_preview = gr.Button(
+                value="Preview Link Document", variant="secondary")
+        if len(examples) > 0:
+            gr.Examples(
+                examples=examples,
+                inputs=document,
+                label='Select any of these test document images')
     document_preview.upload(
         fn=lambda file: load_image(file.name),
         inputs=[document_preview],
         fn=preview_url,
         inputs=[document_url],
         outputs=[document_tabs, document, document_url_error_message])
+    document.change(
+        fn = lambda image: gr.Image(value=image, visible=True) if image else gr.Image(value=None, visible=False),
+        inputs = [document],
+        outputs = [document])
     return document
+def app(*, model_path:str, config_path:str, examples: list[str], debug=False):
     model: lp.Detectron2LayoutModel = lp.Detectron2LayoutModel(
         config_path=config_path,
         model_path=model_path,
     .center { display: flex; flex: 1 1 auto; align-items: center; align-content: center; justify-content: center; justify-items: center; }
     .hr { width: 100%; display: block; padding: 0; margin: 0; background: gray; height: 4px;  border: none; }
     '''
+    with gr.Blocks(title=title, css=css) as interface:
         with gr.Row():
             gr.HTML(value=description, elem_classes=['center'])
         with gr.Row(equal_height=False):
             with gr.Column():
+                document_1_image = document_view(1, examples)
             with gr.Column():
+                document_2_image = document_view(2, examples)
         gr.HTML('<hr/>', elem_classes=['hr'])
         with gr.Row(elem_classes=['center']):
             with gr.Column():
                     info="Select the Vectors Type to use for Similarity Calculation")
                 similarity_output = gr.HTML(
                     label="Similarity Score", visible=False)
         kwargs = {
             'fn': lambda document_1_image, document_2_image, vectors_type: similarity_fn(
                 model,
         }
         submit.click(**kwargs)
         vectors_type.change(**kwargs)
+    return interface.launch(debug=debug)