Spaces:
Running
Running
Enhance document viewer UI: Focus on images, reduce clutter
Browse files- app.py +2 -1
- ui_components.py +127 -105
app.py
CHANGED
@@ -423,7 +423,8 @@ def process_document(uploaded_file, left_col, right_col, sidebar_options):
|
|
423 |
max_pages=sidebar_options.get("max_pages", 3),
|
424 |
pdf_rotation=sidebar_options.get("pdf_rotation", 0),
|
425 |
custom_prompt=sidebar_options.get("custom_prompt", ""),
|
426 |
-
perf_mode=sidebar_options.get("perf_mode", "Quality")
|
|
|
427 |
)
|
428 |
|
429 |
# Ensure temp_file_paths in session state is updated with any new paths
|
|
|
423 |
max_pages=sidebar_options.get("max_pages", 3),
|
424 |
pdf_rotation=sidebar_options.get("pdf_rotation", 0),
|
425 |
custom_prompt=sidebar_options.get("custom_prompt", ""),
|
426 |
+
perf_mode=sidebar_options.get("perf_mode", "Quality"),
|
427 |
+
use_segmentation=sidebar_options.get("use_segmentation", False)
|
428 |
)
|
429 |
|
430 |
# Ensure temp_file_paths in session state is updated with any new paths
|
ui_components.py
CHANGED
@@ -82,6 +82,13 @@ def create_sidebar_options():
|
|
82 |
doc_layout = st.selectbox("Document Layout", DOCUMENT_LAYOUTS,
|
83 |
help="Select the layout of your document")
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
# Custom prompt
|
86 |
custom_prompt = ""
|
87 |
if doc_type != DOCUMENT_TYPES[0]: # Not auto-detect
|
@@ -103,33 +110,43 @@ def create_sidebar_options():
|
|
103 |
help="Customize the instructions for processing this document",
|
104 |
height=80)
|
105 |
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
value=False,
|
111 |
help="Convert color images to grayscale for better OCR")
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
value=False,
|
116 |
help="Remove noise from the image")
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
min_value=-50,
|
121 |
max_value=50,
|
122 |
value=0,
|
123 |
step=10,
|
124 |
help="Adjust image contrast")
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
min_value=-45,
|
129 |
max_value=45,
|
130 |
value=0,
|
131 |
step=5,
|
132 |
help="Rotate image if needed")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
# Create preprocessing options dictionary
|
135 |
# Set document_type based on selection in UI
|
@@ -169,7 +186,8 @@ def create_sidebar_options():
|
|
169 |
"max_pages": max_pages,
|
170 |
"pdf_rotation": pdf_rotation,
|
171 |
"custom_prompt": custom_prompt,
|
172 |
-
"preprocessing_options": preprocessing_options
|
|
|
173 |
}
|
174 |
|
175 |
return options
|
@@ -198,10 +216,48 @@ def create_file_uploader():
|
|
198 |
)
|
199 |
return uploaded_file
|
200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
def display_results(result, container, custom_prompt=""):
|
202 |
"""Display OCR results in the provided container"""
|
203 |
with container:
|
204 |
-
#
|
|
|
205 |
|
206 |
# Create a compact metadata section
|
207 |
meta_html = '<div style="display: flex; flex-wrap: wrap; gap: 0.3rem; margin-bottom: 0.3rem;">'
|
@@ -459,19 +515,10 @@ def display_results(result, container, custom_prompt=""):
|
|
459 |
'index': img_idx
|
460 |
})
|
461 |
|
462 |
-
#
|
463 |
-
if images_to_display:
|
464 |
-
|
465 |
-
|
466 |
-
cols_count = min(2, len(images_to_display))
|
467 |
-
image_cols = st.columns(cols_count)
|
468 |
-
|
469 |
-
# Display each image in a column with minimal spacing
|
470 |
-
for i, img in enumerate(images_to_display):
|
471 |
-
with image_cols[i % cols_count]:
|
472 |
-
# Compact image display
|
473 |
-
st.image(img['data'], use_container_width=True)
|
474 |
-
st.markdown(f"<p style='margin-top:-5px; font-size:0.8rem; color:#666; text-align:center;'>Document Image {i+1}</p>", unsafe_allow_html=True)
|
475 |
|
476 |
# Organize sections in a logical order
|
477 |
section_order = ["title", "author", "date", "summary", "content", "transcript", "metadata"]
|
@@ -676,88 +723,63 @@ def display_document_with_images(result):
|
|
676 |
for i, page_data in enumerate(pages_data):
|
677 |
st.markdown(f"### Page {i+1}")
|
678 |
|
679 |
-
#
|
680 |
-
|
|
|
681 |
|
682 |
-
|
683 |
-
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
|
700 |
-
|
701 |
-
|
702 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
703 |
break
|
704 |
-
except Exception as e:
|
705 |
-
st.error(f"Error displaying image from images array: {str(e)}")
|
706 |
-
|
707 |
-
if not image_displayed:
|
708 |
-
st.info("No image available for this page.")
|
709 |
|
710 |
-
|
711 |
-
|
712 |
-
page_text = ""
|
713 |
-
if 'text' in page_data:
|
714 |
-
page_text = page_data['text']
|
715 |
-
elif 'markdown' in page_data:
|
716 |
-
page_text = page_data['markdown']
|
717 |
-
|
718 |
-
# Special handling for image markdown in page data
|
719 |
-
if page_text.startswith("![") and page_text.endswith(")"):
|
720 |
-
# Try to display image if not already displayed
|
721 |
-
if not image_displayed and 'raw_response_data' in result:
|
722 |
-
raw_data = result['raw_response_data']
|
723 |
-
if isinstance(raw_data, dict) and 'pages' in raw_data:
|
724 |
-
for raw_page in raw_data['pages']:
|
725 |
-
if isinstance(raw_page, dict) and 'images' in raw_page:
|
726 |
-
for img in raw_page['images']:
|
727 |
-
if isinstance(img, dict) and 'base64' in img:
|
728 |
-
st.image(img['base64'])
|
729 |
-
st.caption("Image from OCR response")
|
730 |
-
image_displayed = True
|
731 |
-
break
|
732 |
-
if image_displayed:
|
733 |
-
break
|
734 |
-
|
735 |
-
# Try to extract alt text
|
736 |
-
try:
|
737 |
-
alt_text = page_text[2:page_text.index(']')]
|
738 |
-
if alt_text and len(alt_text) > 5: # Only show if alt text is meaningful
|
739 |
-
st.info(f"Image description: {alt_text}")
|
740 |
-
else:
|
741 |
-
st.info("This page contains an image with minimal text")
|
742 |
-
except:
|
743 |
-
st.info("This page contains an image with minimal text")
|
744 |
-
|
745 |
-
# Show warning if no image displayed
|
746 |
-
if not image_displayed:
|
747 |
-
st.warning("Image reference found in text, but no image data is available.")
|
748 |
|
749 |
-
|
750 |
-
|
751 |
-
|
|
|
|
|
|
|
752 |
|
753 |
-
|
754 |
-
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
-
|
760 |
-
st.info("No text available for this page.")
|
761 |
|
762 |
def display_previous_results():
|
763 |
"""Display previous results tab content in a simplified, structured view"""
|
|
|
82 |
doc_layout = st.selectbox("Document Layout", DOCUMENT_LAYOUTS,
|
83 |
help="Select the layout of your document")
|
84 |
|
85 |
+
# Initialize preprocessing variables with default values
|
86 |
+
grayscale = False
|
87 |
+
denoise = False
|
88 |
+
contrast = 0
|
89 |
+
rotation = 0
|
90 |
+
use_segmentation = False
|
91 |
+
|
92 |
# Custom prompt
|
93 |
custom_prompt = ""
|
94 |
if doc_type != DOCUMENT_TYPES[0]: # Not auto-detect
|
|
|
110 |
help="Customize the instructions for processing this document",
|
111 |
height=80)
|
112 |
|
113 |
+
# Image preprocessing options in an expandable section
|
114 |
+
with st.expander("Image Preprocessing"):
|
115 |
+
# Grayscale conversion
|
116 |
+
grayscale = st.checkbox("Convert to Grayscale",
|
117 |
value=False,
|
118 |
help="Convert color images to grayscale for better OCR")
|
119 |
+
|
120 |
+
# Denoise
|
121 |
+
denoise = st.checkbox("Denoise Image",
|
122 |
value=False,
|
123 |
help="Remove noise from the image")
|
124 |
+
|
125 |
+
# Contrast adjustment
|
126 |
+
contrast = st.slider("Contrast Adjustment",
|
127 |
min_value=-50,
|
128 |
max_value=50,
|
129 |
value=0,
|
130 |
step=10,
|
131 |
help="Adjust image contrast")
|
132 |
+
|
133 |
+
# Rotation
|
134 |
+
rotation = st.slider("Rotation",
|
135 |
min_value=-45,
|
136 |
max_value=45,
|
137 |
value=0,
|
138 |
step=5,
|
139 |
help="Rotate image if needed")
|
140 |
+
|
141 |
+
# Add image segmentation option
|
142 |
+
st.markdown("### Advanced Options")
|
143 |
+
use_segmentation = st.toggle("Enable Image Segmentation",
|
144 |
+
value=False,
|
145 |
+
help="Segment the image into text and image regions for better OCR results on complex documents")
|
146 |
+
|
147 |
+
# Show explanation if segmentation is enabled
|
148 |
+
if use_segmentation:
|
149 |
+
st.info("Image segmentation identifies distinct text regions in complex documents, improving OCR accuracy. This is especially helpful for documents with mixed content like the Magician illustration.")
|
150 |
|
151 |
# Create preprocessing options dictionary
|
152 |
# Set document_type based on selection in UI
|
|
|
186 |
"max_pages": max_pages,
|
187 |
"pdf_rotation": pdf_rotation,
|
188 |
"custom_prompt": custom_prompt,
|
189 |
+
"preprocessing_options": preprocessing_options,
|
190 |
+
"use_segmentation": use_segmentation if 'use_segmentation' in locals() else False
|
191 |
}
|
192 |
|
193 |
return options
|
|
|
216 |
)
|
217 |
return uploaded_file
|
218 |
|
219 |
+
# Function removed - now using inline implementation in app.py
|
220 |
+
def _unused_display_preprocessing_preview(uploaded_file, preprocessing_options):
|
221 |
+
"""Display a preview of image with preprocessing options applied"""
|
222 |
+
if (any(preprocessing_options.values()) and
|
223 |
+
uploaded_file.type.startswith('image/')):
|
224 |
+
|
225 |
+
st.markdown("**Preprocessed Preview**")
|
226 |
+
try:
|
227 |
+
# Create a container for the preview
|
228 |
+
with st.container():
|
229 |
+
processed_bytes = preprocess_image(uploaded_file.getvalue(), preprocessing_options)
|
230 |
+
# Convert image to base64 and display as HTML to avoid fullscreen button
|
231 |
+
img_data = base64.b64encode(processed_bytes).decode()
|
232 |
+
img_html = f'<img src="data:image/jpeg;base64,{img_data}" style="width:100%; border-radius:4px;">'
|
233 |
+
st.markdown(img_html, unsafe_allow_html=True)
|
234 |
+
|
235 |
+
# Show preprocessing metadata in a well-formatted caption
|
236 |
+
meta_items = []
|
237 |
+
if preprocessing_options.get("document_type", "standard") != "standard":
|
238 |
+
meta_items.append(f"Document type ({preprocessing_options['document_type']})")
|
239 |
+
if preprocessing_options.get("grayscale", False):
|
240 |
+
meta_items.append("Grayscale")
|
241 |
+
if preprocessing_options.get("denoise", False):
|
242 |
+
meta_items.append("Denoise")
|
243 |
+
if preprocessing_options.get("contrast", 0) != 0:
|
244 |
+
meta_items.append(f"Contrast ({preprocessing_options['contrast']})")
|
245 |
+
if preprocessing_options.get("rotation", 0) != 0:
|
246 |
+
meta_items.append(f"Rotation ({preprocessing_options['rotation']}°)")
|
247 |
+
|
248 |
+
# Only show "Applied:" if there are actual preprocessing steps
|
249 |
+
if meta_items:
|
250 |
+
meta_text = "Applied: " + ", ".join(meta_items)
|
251 |
+
st.caption(meta_text)
|
252 |
+
except Exception as e:
|
253 |
+
st.error(f"Error in preprocessing: {str(e)}")
|
254 |
+
st.info("Try using grayscale preprocessing for PNG images with transparency")
|
255 |
+
|
256 |
def display_results(result, container, custom_prompt=""):
|
257 |
"""Display OCR results in the provided container"""
|
258 |
with container:
|
259 |
+
# Add heading for document metadata
|
260 |
+
st.markdown("### Document Metadata")
|
261 |
|
262 |
# Create a compact metadata section
|
263 |
meta_html = '<div style="display: flex; flex-wrap: wrap; gap: 0.3rem; margin-bottom: 0.3rem;">'
|
|
|
515 |
'index': img_idx
|
516 |
})
|
517 |
|
518 |
+
# Simple display of image without dropdown or Document Image tab
|
519 |
+
if images_to_display and len(images_to_display) > 0:
|
520 |
+
# Just display the first image directly
|
521 |
+
st.image(images_to_display[0]['data'], use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
522 |
|
523 |
# Organize sections in a logical order
|
524 |
section_order = ["title", "author", "date", "summary", "content", "transcript", "metadata"]
|
|
|
723 |
for i, page_data in enumerate(pages_data):
|
724 |
st.markdown(f"### Page {i+1}")
|
725 |
|
726 |
+
# Display only the image (removed text column)
|
727 |
+
# Display the image - check multiple possible field names
|
728 |
+
image_displayed = False
|
729 |
|
730 |
+
# Try 'image_data' field first
|
731 |
+
if 'image_data' in page_data:
|
732 |
+
try:
|
733 |
+
# Convert base64 to image
|
734 |
+
image_data = base64.b64decode(page_data['image_data'])
|
735 |
+
st.image(io.BytesIO(image_data), use_container_width=True)
|
736 |
+
image_displayed = True
|
737 |
+
except Exception as e:
|
738 |
+
st.error(f"Error displaying image from image_data: {str(e)}")
|
739 |
+
|
740 |
+
# Try 'images' array if image_data didn't work
|
741 |
+
if not image_displayed and 'images' in page_data and len(page_data['images']) > 0:
|
742 |
+
for img in page_data['images']:
|
743 |
+
if 'image_base64' in img:
|
744 |
+
try:
|
745 |
+
st.image(img['image_base64'], use_container_width=True)
|
746 |
+
image_displayed = True
|
747 |
+
break
|
748 |
+
except Exception as e:
|
749 |
+
st.error(f"Error displaying image from images array: {str(e)}")
|
750 |
+
|
751 |
+
# Try alternative image source if still not displayed
|
752 |
+
if not image_displayed and 'raw_response_data' in result:
|
753 |
+
raw_data = result['raw_response_data']
|
754 |
+
if isinstance(raw_data, dict) and 'pages' in raw_data:
|
755 |
+
for raw_page in raw_data['pages']:
|
756 |
+
if isinstance(raw_page, dict) and 'images' in raw_page:
|
757 |
+
for img in raw_page['images']:
|
758 |
+
if isinstance(img, dict) and 'base64' in img:
|
759 |
+
st.image(img['base64'])
|
760 |
+
st.caption("Image from OCR response")
|
761 |
+
image_displayed = True
|
762 |
+
break
|
763 |
+
if image_displayed:
|
764 |
break
|
|
|
|
|
|
|
|
|
|
|
765 |
|
766 |
+
if not image_displayed:
|
767 |
+
st.info("No image available for this page.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
768 |
|
769 |
+
# Extract and display alt text if available
|
770 |
+
page_text = ""
|
771 |
+
if 'text' in page_data:
|
772 |
+
page_text = page_data['text']
|
773 |
+
elif 'markdown' in page_data:
|
774 |
+
page_text = page_data['markdown']
|
775 |
|
776 |
+
if page_text and page_text.startswith("![") and page_text.endswith(")"):
|
777 |
+
try:
|
778 |
+
alt_text = page_text[2:page_text.index(']')]
|
779 |
+
if alt_text and len(alt_text) > 5: # Only show if alt text is meaningful
|
780 |
+
st.caption(f"Image description: {alt_text}")
|
781 |
+
except:
|
782 |
+
pass
|
|
|
783 |
|
784 |
def display_previous_results():
|
785 |
"""Display previous results tab content in a simplified, structured view"""
|