import traceback import gradio as gr from utils.get_RGB_image import get_RGB_image, is_online_file, steam_online_file from pdf2image import convert_from_path, convert_from_bytes def similarity_fn(document_image_1, document_image_2): xxx = 'BOTH' if document_image_1 and document_image_2 else 'ONE' if document_image_1 or document_image_2 else 'NONE' return gr.HTML(f'
Similarity between the two documents: {xxx}
', visible=True)

def load_image(filename, page = 0):
    try:
        image = None
        try:
            if (is_online_file(filename)):
                image = get_RGB_image(convert_from_bytes(steam_online_file(filename))[page])
            else:
                image = get_RGB_image(convert_from_path(filename)[page])
        except:
            image = get_RGB_image(filename)
        return [
            gr.Image(value=image, visible=True), 
            None
        ]
    except:
        error = traceback.format_exc()
        return [None, gr.HTML(value=error, visible=True)]
    
def preview_url(url, page = 0):
    [image, error] = load_image(url, page = page)
    if image:
        return [gr.Tabs(selected=0), image, error]
    else:
        return [gr.Tabs(selected=1), image, error]
    
def document_view(document_number: int):
    gr.HTML(value=f'

Load the {"first" if document_number == 1 else "second"} PDF or Document Image

', elem_classes=['center']) with gr.Tabs() as document_tabs: with gr.Tab("From Image", id=0): document = gr.Image(type="pil", label=f"Document {document_number}", visible=False) document_error_message = gr.HTML(label="Error Message", visible=False) document_preview = gr.UploadButton( "Click to PDF or Document Image", file_types=["image", ".pdf"], file_count="single") with gr.Tab("From URL", id=1): document_url = gr.Textbox( label=f"Document {document_number} URL", info="Paste a Link/URL to PDF or Document Image", placeholder="https://datasets-server.huggingface.co/.../image.jpg") document_url_error_message = gr.HTML(label="Error Message", visible=False) document_url_preview = gr.Button(value="Preview", variant="primary") document_preview.upload( fn = lambda file: load_image(file.name), inputs = [document_preview], outputs = [document, document_error_message]) document_url_preview.click( fn = preview_url, inputs = [document_url], outputs = [document_tabs, document, document_url_error_message]) return document def app(): title = 'Document Similarity Search Using Visual Layout Features' description = f"

{title}

" css = ''' image { max-height="86vh" !important; } .center { display: flex; flex: 1 1 auto; align-items: center; align-content: center; justify-content: center; justify-items: center; } .hr { width: 100%; display: block; padding: 0; margin: 0; background: gray; height: 4px; border: none; } ''' with gr.Blocks(title=title, css=css) as app: with gr.Row(): gr.HTML(value=description, elem_classes=['center']) with gr.Row(equal_height = False): with gr.Column(): document_1_image = document_view(1) with gr.Column(): document_2_image = document_view(2) gr.HTML('
', elem_classes=['hr']) with gr.Row(elem_classes=['center']): with gr.Column(): submit = gr.Button(value="Similarity", variant="primary") reset = gr.Button(value="Reset", variant="secondary") with gr.Column(): similarity_output = gr.HTML(visible=False) submit.click( fn=similarity_fn, inputs=[document_1_image, document_2_image], outputs=[similarity_output]) return app.launch(debug=True)