Spaces:
Running
Running
Commit
·
fc3376f
1
Parent(s):
545acf0
Refactor app.py to remove flash-attn installation; enhance Gradio interface with styled HTML header and detailed model information section.
Browse files
app.py
CHANGED
@@ -4,9 +4,6 @@ from transformers import AutoTokenizer, AutoProcessor, AutoModelForImageTextToTe
|
|
4 |
import torch
|
5 |
import spaces
|
6 |
|
7 |
-
import subprocess
|
8 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
9 |
-
|
10 |
model_path = "nanonets/Nanonets-OCR-s"
|
11 |
|
12 |
# Load model once at startup
|
@@ -60,8 +57,15 @@ def ocr_image_gradio(image, max_tokens=4096):
|
|
60 |
|
61 |
# Create Gradio interface
|
62 |
with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
|
63 |
-
|
64 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
with gr.Row():
|
67 |
with gr.Column(scale=1):
|
@@ -84,7 +88,6 @@ with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
|
|
84 |
output_text = gr.Textbox(
|
85 |
label="Extracted Text",
|
86 |
lines=20,
|
87 |
-
max_lines=30,
|
88 |
show_copy_button=True,
|
89 |
placeholder="Extracted text will appear here..."
|
90 |
)
|
@@ -103,6 +106,48 @@ with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
|
|
103 |
outputs=output_text,
|
104 |
show_progress=True
|
105 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
if __name__ == "__main__":
|
108 |
demo.queue().launch()
|
|
|
4 |
import torch
|
5 |
import spaces
|
6 |
|
|
|
|
|
|
|
7 |
model_path = "nanonets/Nanonets-OCR-s"
|
8 |
|
9 |
# Load model once at startup
|
|
|
57 |
|
58 |
# Create Gradio interface
|
59 |
with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
|
60 |
+
# Replace simple markdown with styled HTML header
|
61 |
+
gr.HTML("""
|
62 |
+
<div class="title" style="text-align: center">
|
63 |
+
<h1>🔍 Nanonets OCR - Document Text Extraction</h1>
|
64 |
+
<p style="font-size: 1.1em; color: #6b7280; margin-bottom: 0.6em;">
|
65 |
+
A state-of-the-art image-to-markdown OCR model for intelligent document processing
|
66 |
+
</p>
|
67 |
+
</div>
|
68 |
+
""")
|
69 |
|
70 |
with gr.Row():
|
71 |
with gr.Column(scale=1):
|
|
|
88 |
output_text = gr.Textbox(
|
89 |
label="Extracted Text",
|
90 |
lines=20,
|
|
|
91 |
show_copy_button=True,
|
92 |
placeholder="Extracted text will appear here..."
|
93 |
)
|
|
|
106 |
outputs=output_text,
|
107 |
show_progress=True
|
108 |
)
|
109 |
+
|
110 |
+
# Add model information section
|
111 |
+
with gr.Accordion("About Nanonets-OCR-s", open=False):
|
112 |
+
gr.Markdown("""
|
113 |
+
## Nanonets-OCR-s
|
114 |
+
|
115 |
+
Nanonets-OCR-s is a powerful, state-of-the-art image-to-markdown OCR model that goes far beyond traditional text extraction.
|
116 |
+
It transforms documents into structured markdown with intelligent content recognition and semantic tagging, making it ideal
|
117 |
+
for downstream processing by Large Language Models (LLMs).
|
118 |
+
|
119 |
+
### Key Features
|
120 |
+
|
121 |
+
- **LaTeX Equation Recognition**: Automatically converts mathematical equations and formulas into properly formatted LaTeX syntax.
|
122 |
+
It distinguishes between inline ($...$) and display ($$...$$) equations.
|
123 |
+
|
124 |
+
- **Intelligent Image Description**: Describes images within documents using structured `<img>` tags, making them digestible
|
125 |
+
for LLM processing. It can describe various image types, including logos, charts, graphs and so on, detailing their content,
|
126 |
+
style, and context.
|
127 |
+
|
128 |
+
- **Signature Detection & Isolation**: Identifies and isolates signatures from other text, outputting them within a `<signature>` tag.
|
129 |
+
This is crucial for processing legal and business documents.
|
130 |
+
|
131 |
+
- **Watermark Extraction**: Detects and extracts watermark text from documents, placing it within a `<watermark>` tag.
|
132 |
+
|
133 |
+
- **Smart Checkbox Handling**: Converts form checkboxes and radio buttons into standardized Unicode symbols (☐, ☑, ☒)
|
134 |
+
for consistent and reliable processing.
|
135 |
+
|
136 |
+
- **Complex Table Extraction**: Accurately extracts complex tables from documents and converts them into both markdown
|
137 |
+
and HTML table formats.
|
138 |
+
""")
|
139 |
+
|
140 |
+
# Add links section at the bottom
|
141 |
+
gr.Markdown("""
|
142 |
+
---
|
143 |
+
### Resources
|
144 |
+
|
145 |
+
- [📚 Hugging Face Model](https://huggingface.co/nanonets/Nanonets-OCR-s)
|
146 |
+
- [📝 Release Blog](https://nanonets.com/research/nanonets-ocr-s/)
|
147 |
+
- [💻 GitHub Repository](https://github.com/NanoNets/docext)
|
148 |
+
|
149 |
+
---
|
150 |
+
""")
|
151 |
|
152 |
if __name__ == "__main__":
|
153 |
demo.queue().launch()
|