Nanonets-OCR

Running

App Files Files Community

MohamedRashad commited on Jun 12

Commit

fc3376f

1 Parent(s): 545acf0

Refactor app.py to remove flash-attn installation; enhance Gradio interface with styled HTML header and detailed model information section.

Browse files

Files changed (1) hide show

app.py +51 -6

app.py CHANGED Viewed

@@ -4,9 +4,6 @@ from transformers import AutoTokenizer, AutoProcessor, AutoModelForImageTextToTe
 import torch
 import spaces
-import subprocess
-subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 model_path = "nanonets/Nanonets-OCR-s"
 # Load model once at startup
@@ -60,8 +57,15 @@ def ocr_image_gradio(image, max_tokens=4096):
 # Create Gradio interface
 with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🔍 Nanonets OCR - Document Text Extraction")
-    gr.Markdown("Upload an image of a document to extract text, tables, equations, and more!")
     with gr.Row():
         with gr.Column(scale=1):
@@ -84,7 +88,6 @@ with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
             output_text = gr.Textbox(
                 label="Extracted Text",
                 lines=20,
-                max_lines=30,
                 show_copy_button=True,
                 placeholder="Extracted text will appear here..."
             )
@@ -103,6 +106,48 @@ with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
         outputs=output_text,
         show_progress=True
     )
 if __name__ == "__main__":
     demo.queue().launch()

 import torch
 import spaces
 model_path = "nanonets/Nanonets-OCR-s"
 # Load model once at startup
 # Create Gradio interface
 with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
+    # Replace simple markdown with styled HTML header
+    gr.HTML("""
+    <div class="title" style="text-align: center">
+        <h1>🔍 Nanonets OCR - Document Text Extraction</h1>
+        <p style="font-size: 1.1em; color: #6b7280; margin-bottom: 0.6em;">
+            A state-of-the-art image-to-markdown OCR model for intelligent document processing
+        </p>
+    </div>
+    """)
     with gr.Row():
         with gr.Column(scale=1):
             output_text = gr.Textbox(
                 label="Extracted Text",
                 lines=20,
                 show_copy_button=True,
                 placeholder="Extracted text will appear here..."
             )
         outputs=output_text,
         show_progress=True
     )
+    # Add model information section
+    with gr.Accordion("About Nanonets-OCR-s", open=False):
+        gr.Markdown("""
+        ## Nanonets-OCR-s
+        Nanonets-OCR-s is a powerful, state-of-the-art image-to-markdown OCR model that goes far beyond traditional text extraction.
+        It transforms documents into structured markdown with intelligent content recognition and semantic tagging, making it ideal
+        for downstream processing by Large Language Models (LLMs).
+        ### Key Features
+        - **LaTeX Equation Recognition**: Automatically converts mathematical equations and formulas into properly formatted LaTeX syntax.
+          It distinguishes between inline ($...$) and display ($$...$$) equations.
+        - **Intelligent Image Description**: Describes images within documents using structured `<img>` tags, making them digestible
+          for LLM processing. It can describe various image types, including logos, charts, graphs and so on, detailing their content,
+          style, and context.
+        - **Signature Detection & Isolation**: Identifies and isolates signatures from other text, outputting them within a `<signature>` tag.
+          This is crucial for processing legal and business documents.
+        - **Watermark Extraction**: Detects and extracts watermark text from documents, placing it within a `<watermark>` tag.
+        - **Smart Checkbox Handling**: Converts form checkboxes and radio buttons into standardized Unicode symbols (☐, ☑, ☒)
+          for consistent and reliable processing.
+        - **Complex Table Extraction**: Accurately extracts complex tables from documents and converts them into both markdown
+          and HTML table formats.
+        """)
+    # Add links section at the bottom
+    gr.Markdown("""
+    ---
+    ### Resources
+    - [📚 Hugging Face Model](https://huggingface.co/nanonets/Nanonets-OCR-s)
+    - [📝 Release Blog](https://nanonets.com/research/nanonets-ocr-s/)
+    - [💻 GitHub Repository](https://github.com/NanoNets/docext)
+    ---
+    """)
 if __name__ == "__main__":
     demo.queue().launch()