die_demo

Sleeping

App Files Files Community

gabar92 commited on Nov 6, 2024

Commit

f6be418

1 Parent(s): f72a688

change code back to first version

Browse files

Files changed (1) hide show

app.py +64 -56

app.py CHANGED Viewed

@@ -5,16 +5,22 @@ Small demo application to explore Gradio.
 import argparse
 import os
 from functools import partial
-import torch
 import gradio as gr
 from PIL import Image
 from huggingface_hub import hf_hub_download
 from die_model import UNetDIEModel
-from utils import resize_image, make_image_square, cast_pil_image_to_torch_tensor_with_4_channel_dim, remove_square_padding
-def die_inference(image_raw, num_of_die_iterations, die_model, device):
     """
     Function to run the DIE model.
     :param image_raw: raw image
@@ -23,61 +29,69 @@ def die_inference(image_raw, num_of_die_iterations, die_model, device):
     :param device: device
     :return: cleaned image
     """
-    # Preprocess
     image_raw_resized = resize_image(image_raw, 1500)
     image_raw_resized_square = make_image_square(image_raw_resized)
     image_raw_resized_square_tensor = cast_pil_image_to_torch_tensor_with_4_channel_dim(image_raw_resized_square)
     image_raw_resized_square_tensor = image_raw_resized_square_tensor.to(device)
-    # Convert string to int
     num_of_die_iterations = int(num_of_die_iterations)
-    # Inference
     image_die = die_model.enhance_document_image(
         image_raw_list=[image_raw_resized_square_tensor],
         num_of_die_iterations=num_of_die_iterations
     )[0]
-    # Postprocess
     image_die_resized = remove_square_padding(
         original_image=image_raw,
         square_image=image_die,
         resize_back_to_original=True
     )
     return image_die_resized
 def main():
     """
     Main function to run the Gradio demo.
-    """
-    args = parse_arguments()
-    description_intro = """
-    # Welcome to the Document Image Enhancement (DIE) Model Demo!
-    This interactive application showcases the capabilities of a specialized AI model developed by the [Artificial Intelligence group](https://ai.renyi.hu) at the [Alfréd Rényi Institute of Mathematics](https://renyi.hu).
-    """
-    description_overview = """
-    ## Model Overview
-    The DIE model is crafted to enhance and restore archival and aged document images by removing various types of degradation. It tackles noise such as scribbles, bleed-through text, faded or blurred text, and other unwanted background elements. This process significantly improves the clarity of documents, which in turn enhances Optical Character Recognition (OCR) accuracy.
     """
-    description_features = """
-    ## Features
-    - Removes 20-30 types of domain-specific noise often found in historical records.
-    - Utilizes a U-Net-based architecture for effective and detailed text restoration.
-    - Serves as a valuable pre-processing tool in digitization workflows, especially for archives and historical documents.
-    """
-    description_contact = """
-    ## Contact Us
-    For more information, feel free to reach out at: [email protected]
-    """
     num_of_die_iterations_list = [1, 2, 3]
     die_token = os.getenv("DIE_TOKEN")
     example_image_list = [
         [Image.open(os.path.join(args.example_image_path, image_path))]
         for image_path in os.listdir(args.example_image_path)
@@ -90,48 +104,42 @@ def main():
         use_auth_token=die_token
     )
-    args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     die_model = UNetDIEModel(args=args)
     # Partially apply the model and device arguments to die_inference
     partial_die_inference = partial(die_inference, device=args.device, die_model=die_model)
-    # Gradio Interface
-    with gr.Blocks() as demo:
-        gr.Markdown(description_intro)
-        gr.Markdown(description_overview)
-        gr.Markdown(description_features)
-        gr.Markdown(description_contact)
-        gr.Image(label="", value="logo/qr-code.png", height=200, width=200)
-        # Define inputs and outputs
-        gr.Markdown("### Upload a degraded document image and select the number of DIE iterations:")
-        degraded_image_input = gr.Image(type="pil", label="Degraded Document Image")
-        iterations_input = gr.Dropdown(
-            num_of_die_iterations_list, label="Number of DIE iterations", value=1,
-            info="Choose the number of times to apply the enhancement model."
-        )
-        clean_image_output = gr.Image(type="pil", label="Clean Document Image")
-        gr.Interface(
-            fn=partial_die_inference,
-            inputs=[degraded_image_input, iterations_input],
-            outputs=clean_image_output,
-            examples=example_image_list,
-            title="Document Image Enhancement (DIE) Model"
-        ).launch(server_name="0.0.0.0", server_port=7860, share=True)
 def parse_arguments():
     """
     Parse arguments.
     """
     parser = argparse.ArgumentParser()
     parser.add_argument("--die_model_path", default="2024_08_09_model_epoch_89.pt")
     parser.add_argument("--example_image_path", default="example_images")
     return parser.parse_args()
 if __name__ == "__main__":
-    main()

 import argparse
 import os
 from functools import partial
 import gradio as gr
 from PIL import Image
 from huggingface_hub import hf_hub_download
 from die_model import UNetDIEModel
+from utils import resize_image, make_image_square, cast_pil_image_to_torch_tensor_with_4_channel_dim, \
+    remove_square_padding
+def die_inference(
+    image_raw,
+    num_of_die_iterations,
+    die_model,
+    device
+):
     """
     Function to run the DIE model.
     :param image_raw: raw image
     :param device: device
     :return: cleaned image
     """
+    # preprocess
     image_raw_resized = resize_image(image_raw, 1500)
     image_raw_resized_square = make_image_square(image_raw_resized)
     image_raw_resized_square_tensor = cast_pil_image_to_torch_tensor_with_4_channel_dim(image_raw_resized_square)
     image_raw_resized_square_tensor = image_raw_resized_square_tensor.to(device)
+    # convert string to int
     num_of_die_iterations = int(num_of_die_iterations)
+    # inference
     image_die = die_model.enhance_document_image(
         image_raw_list=[image_raw_resized_square_tensor],
         num_of_die_iterations=num_of_die_iterations
     )[0]
+    # postprocess
     image_die_resized = remove_square_padding(
         original_image=image_raw,
         square_image=image_die,
         resize_back_to_original=True
     )
     return image_die_resized
 def main():
     """
     Main function to run the Gradio demo.
+    :return:
     """
+    args = parse_arguments()
+    description = "Welcome to the Document Image Enhancement (DIE) model demo on Hugging Face!\n\n" \
+                  "" \
+                  "This interactive application showcases a specialized AI model developed by " \
+                  "the [Artificial Intelligence group](https://ai.renyi.hu) at the [Alfréd Rényi Institute of Mathematics](https://renyi.hu).\n\n" \
+                  "" \
+                  "Our DIE model is designed to enhance and restore archival and aged document images " \
+                  "by removing various types of degradation, thereby making historical documents more legible " \
+                  "and suitable for Optical Character Recognition (OCR) processing.\n\n" \
+                  "" \
+                  "The model effectively tackles 20-30 types of domain-specific noise found in historical records, " \
+                  "such as scribbles, bleed-through text, faded or worn text, blurriness, textured noise, " \
+                  "and unwanted background elements. " \
+                  "By applying deep learning techniques, specifically a U-Net-based architecture, " \
+                  "the model accurately cleans and clarifies text while preserving original details. " \
+                  "This improved clarity dramatically boosts OCR accuracy, making it an ideal " \
+                  "pre-processing tool in digitization workflows.\n\n" \
+                  "" \
+                  "If you’re interested in learning more about the model’s capabilities or potential applications, " \
+                  "please contact us at: [email protected].\n" \
+                  "<img src='logo/qr-code.png' width=200px>\n\n"
+    # TODO: Add a description for the Number of DIE iterations parameter!
     num_of_die_iterations_list = [1, 2, 3]
     die_token = os.getenv("DIE_TOKEN")
+    # Provide images alone for example display
     example_image_list = [
         [Image.open(os.path.join(args.example_image_path, image_path))]
         for image_path in os.listdir(args.example_image_path)
         use_auth_token=die_token
     )
     die_model = UNetDIEModel(args=args)
     # Partially apply the model and device arguments to die_inference
     partial_die_inference = partial(die_inference, device=args.device, die_model=die_model)
+    demo = gr.Interface(
+        fn=partial_die_inference,
+        inputs=[
+            gr.Image(type="pil", label="Degraded Document Image"),
+            gr.Dropdown(num_of_die_iterations_list, label="Number of DIE iterations", value=1),
+        ],
+        outputs=gr.Image(type="pil", label="Clean Document Image"),
+        title="Document Image Enhancement (DIE) model",
+        description=description,
+        examples=example_image_list
+    )
+    demo.launch(server_name="0.0.0.0", server_port=7860)
 def parse_arguments():
     """
     Parse arguments.
+    :return: argument namespace
     """
     parser = argparse.ArgumentParser()
     parser.add_argument("--die_model_path", default="2024_08_09_model_epoch_89.pt")
+    parser.add_argument("--device", default="cpu")
     parser.add_argument("--example_image_path", default="example_images")
     return parser.parse_args()
 if __name__ == "__main__":
+    main()