MohamedRashad commited on
Commit
fc3376f
·
1 Parent(s): 545acf0

Refactor app.py to remove flash-attn installation; enhance Gradio interface with styled HTML header and detailed model information section.

Browse files
Files changed (1) hide show
  1. app.py +51 -6
app.py CHANGED
@@ -4,9 +4,6 @@ from transformers import AutoTokenizer, AutoProcessor, AutoModelForImageTextToTe
4
  import torch
5
  import spaces
6
 
7
- import subprocess
8
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
9
-
10
  model_path = "nanonets/Nanonets-OCR-s"
11
 
12
  # Load model once at startup
@@ -60,8 +57,15 @@ def ocr_image_gradio(image, max_tokens=4096):
60
 
61
  # Create Gradio interface
62
  with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
63
- gr.Markdown("# 🔍 Nanonets OCR - Document Text Extraction")
64
- gr.Markdown("Upload an image of a document to extract text, tables, equations, and more!")
 
 
 
 
 
 
 
65
 
66
  with gr.Row():
67
  with gr.Column(scale=1):
@@ -84,7 +88,6 @@ with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
84
  output_text = gr.Textbox(
85
  label="Extracted Text",
86
  lines=20,
87
- max_lines=30,
88
  show_copy_button=True,
89
  placeholder="Extracted text will appear here..."
90
  )
@@ -103,6 +106,48 @@ with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
103
  outputs=output_text,
104
  show_progress=True
105
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  if __name__ == "__main__":
108
  demo.queue().launch()
 
4
  import torch
5
  import spaces
6
 
 
 
 
7
  model_path = "nanonets/Nanonets-OCR-s"
8
 
9
  # Load model once at startup
 
57
 
58
  # Create Gradio interface
59
  with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
60
+ # Replace simple markdown with styled HTML header
61
+ gr.HTML("""
62
+ <div class="title" style="text-align: center">
63
+ <h1>🔍 Nanonets OCR - Document Text Extraction</h1>
64
+ <p style="font-size: 1.1em; color: #6b7280; margin-bottom: 0.6em;">
65
+ A state-of-the-art image-to-markdown OCR model for intelligent document processing
66
+ </p>
67
+ </div>
68
+ """)
69
 
70
  with gr.Row():
71
  with gr.Column(scale=1):
 
88
  output_text = gr.Textbox(
89
  label="Extracted Text",
90
  lines=20,
 
91
  show_copy_button=True,
92
  placeholder="Extracted text will appear here..."
93
  )
 
106
  outputs=output_text,
107
  show_progress=True
108
  )
109
+
110
+ # Add model information section
111
+ with gr.Accordion("About Nanonets-OCR-s", open=False):
112
+ gr.Markdown("""
113
+ ## Nanonets-OCR-s
114
+
115
+ Nanonets-OCR-s is a powerful, state-of-the-art image-to-markdown OCR model that goes far beyond traditional text extraction.
116
+ It transforms documents into structured markdown with intelligent content recognition and semantic tagging, making it ideal
117
+ for downstream processing by Large Language Models (LLMs).
118
+
119
+ ### Key Features
120
+
121
+ - **LaTeX Equation Recognition**: Automatically converts mathematical equations and formulas into properly formatted LaTeX syntax.
122
+ It distinguishes between inline ($...$) and display ($$...$$) equations.
123
+
124
+ - **Intelligent Image Description**: Describes images within documents using structured `<img>` tags, making them digestible
125
+ for LLM processing. It can describe various image types, including logos, charts, graphs and so on, detailing their content,
126
+ style, and context.
127
+
128
+ - **Signature Detection & Isolation**: Identifies and isolates signatures from other text, outputting them within a `<signature>` tag.
129
+ This is crucial for processing legal and business documents.
130
+
131
+ - **Watermark Extraction**: Detects and extracts watermark text from documents, placing it within a `<watermark>` tag.
132
+
133
+ - **Smart Checkbox Handling**: Converts form checkboxes and radio buttons into standardized Unicode symbols (☐, ☑, ☒)
134
+ for consistent and reliable processing.
135
+
136
+ - **Complex Table Extraction**: Accurately extracts complex tables from documents and converts them into both markdown
137
+ and HTML table formats.
138
+ """)
139
+
140
+ # Add links section at the bottom
141
+ gr.Markdown("""
142
+ ---
143
+ ### Resources
144
+
145
+ - [📚 Hugging Face Model](https://huggingface.co/nanonets/Nanonets-OCR-s)
146
+ - [📝 Release Blog](https://nanonets.com/research/nanonets-ocr-s/)
147
+ - [💻 GitHub Repository](https://github.com/NanoNets/docext)
148
+
149
+ ---
150
+ """)
151
 
152
  if __name__ == "__main__":
153
  demo.queue().launch()