milwright commited on
Commit
9a2238e
·
1 Parent(s): aabc02c

Enhance document viewer UI: Focus on images, reduce clutter

Browse files
Files changed (2) hide show
  1. app.py +2 -1
  2. ui_components.py +127 -105
app.py CHANGED
@@ -423,7 +423,8 @@ def process_document(uploaded_file, left_col, right_col, sidebar_options):
423
  max_pages=sidebar_options.get("max_pages", 3),
424
  pdf_rotation=sidebar_options.get("pdf_rotation", 0),
425
  custom_prompt=sidebar_options.get("custom_prompt", ""),
426
- perf_mode=sidebar_options.get("perf_mode", "Quality")
 
427
  )
428
 
429
  # Ensure temp_file_paths in session state is updated with any new paths
 
423
  max_pages=sidebar_options.get("max_pages", 3),
424
  pdf_rotation=sidebar_options.get("pdf_rotation", 0),
425
  custom_prompt=sidebar_options.get("custom_prompt", ""),
426
+ perf_mode=sidebar_options.get("perf_mode", "Quality"),
427
+ use_segmentation=sidebar_options.get("use_segmentation", False)
428
  )
429
 
430
  # Ensure temp_file_paths in session state is updated with any new paths
ui_components.py CHANGED
@@ -82,6 +82,13 @@ def create_sidebar_options():
82
  doc_layout = st.selectbox("Document Layout", DOCUMENT_LAYOUTS,
83
  help="Select the layout of your document")
84
 
 
 
 
 
 
 
 
85
  # Custom prompt
86
  custom_prompt = ""
87
  if doc_type != DOCUMENT_TYPES[0]: # Not auto-detect
@@ -103,33 +110,43 @@ def create_sidebar_options():
103
  help="Customize the instructions for processing this document",
104
  height=80)
105
 
106
- # Image preprocessing options in an expandable section
107
- with st.expander("Image Preprocessing"):
108
- # Grayscale conversion
109
- grayscale = st.checkbox("Convert to Grayscale",
110
  value=False,
111
  help="Convert color images to grayscale for better OCR")
112
-
113
- # Denoise
114
- denoise = st.checkbox("Denoise Image",
115
  value=False,
116
  help="Remove noise from the image")
117
-
118
- # Contrast adjustment
119
- contrast = st.slider("Contrast Adjustment",
120
  min_value=-50,
121
  max_value=50,
122
  value=0,
123
  step=10,
124
  help="Adjust image contrast")
125
-
126
- # Rotation
127
- rotation = st.slider("Rotation",
128
  min_value=-45,
129
  max_value=45,
130
  value=0,
131
  step=5,
132
  help="Rotate image if needed")
 
 
 
 
 
 
 
 
 
 
133
 
134
  # Create preprocessing options dictionary
135
  # Set document_type based on selection in UI
@@ -169,7 +186,8 @@ def create_sidebar_options():
169
  "max_pages": max_pages,
170
  "pdf_rotation": pdf_rotation,
171
  "custom_prompt": custom_prompt,
172
- "preprocessing_options": preprocessing_options
 
173
  }
174
 
175
  return options
@@ -198,10 +216,48 @@ def create_file_uploader():
198
  )
199
  return uploaded_file
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  def display_results(result, container, custom_prompt=""):
202
  """Display OCR results in the provided container"""
203
  with container:
204
- # No heading for document metadata - start directly with content
 
205
 
206
  # Create a compact metadata section
207
  meta_html = '<div style="display: flex; flex-wrap: wrap; gap: 0.3rem; margin-bottom: 0.3rem;">'
@@ -459,19 +515,10 @@ def display_results(result, container, custom_prompt=""):
459
  'index': img_idx
460
  })
461
 
462
- # Display images at the top if available
463
- if images_to_display:
464
- st.markdown("### Document Images")
465
- # Create columns for a grid layout (up to 2 columns to make images larger)
466
- cols_count = min(2, len(images_to_display))
467
- image_cols = st.columns(cols_count)
468
-
469
- # Display each image in a column with minimal spacing
470
- for i, img in enumerate(images_to_display):
471
- with image_cols[i % cols_count]:
472
- # Compact image display
473
- st.image(img['data'], use_container_width=True)
474
- st.markdown(f"<p style='margin-top:-5px; font-size:0.8rem; color:#666; text-align:center;'>Document Image {i+1}</p>", unsafe_allow_html=True)
475
 
476
  # Organize sections in a logical order
477
  section_order = ["title", "author", "date", "summary", "content", "transcript", "metadata"]
@@ -676,88 +723,63 @@ def display_document_with_images(result):
676
  for i, page_data in enumerate(pages_data):
677
  st.markdown(f"### Page {i+1}")
678
 
679
- # Create columns for image and text
680
- img_col, text_col = st.columns([1, 1])
 
681
 
682
- with img_col:
683
- # Display the image - check multiple possible field names
684
- image_displayed = False
685
-
686
- # Try 'image_data' field first
687
- if 'image_data' in page_data:
688
- try:
689
- # Convert base64 to image
690
- image_data = base64.b64decode(page_data['image_data'])
691
- st.image(io.BytesIO(image_data), use_container_width=True)
692
- image_displayed = True
693
- except Exception as e:
694
- st.error(f"Error displaying image from image_data: {str(e)}")
695
-
696
- # Try 'images' array if image_data didn't work
697
- if not image_displayed and 'images' in page_data and len(page_data['images']) > 0:
698
- for img in page_data['images']:
699
- if 'image_base64' in img:
700
- try:
701
- st.image(img['image_base64'], use_container_width=True)
702
- image_displayed = True
 
 
 
 
 
 
 
 
 
 
 
 
 
703
  break
704
- except Exception as e:
705
- st.error(f"Error displaying image from images array: {str(e)}")
706
-
707
- if not image_displayed:
708
- st.info("No image available for this page.")
709
 
710
- with text_col:
711
- # Get text from various possible fields
712
- page_text = ""
713
- if 'text' in page_data:
714
- page_text = page_data['text']
715
- elif 'markdown' in page_data:
716
- page_text = page_data['markdown']
717
-
718
- # Special handling for image markdown in page data
719
- if page_text.startswith("![") and page_text.endswith(")"):
720
- # Try to display image if not already displayed
721
- if not image_displayed and 'raw_response_data' in result:
722
- raw_data = result['raw_response_data']
723
- if isinstance(raw_data, dict) and 'pages' in raw_data:
724
- for raw_page in raw_data['pages']:
725
- if isinstance(raw_page, dict) and 'images' in raw_page:
726
- for img in raw_page['images']:
727
- if isinstance(img, dict) and 'base64' in img:
728
- st.image(img['base64'])
729
- st.caption("Image from OCR response")
730
- image_displayed = True
731
- break
732
- if image_displayed:
733
- break
734
-
735
- # Try to extract alt text
736
- try:
737
- alt_text = page_text[2:page_text.index(']')]
738
- if alt_text and len(alt_text) > 5: # Only show if alt text is meaningful
739
- st.info(f"Image description: {alt_text}")
740
- else:
741
- st.info("This page contains an image with minimal text")
742
- except:
743
- st.info("This page contains an image with minimal text")
744
-
745
- # Show warning if no image displayed
746
- if not image_displayed:
747
- st.warning("Image reference found in text, but no image data is available.")
748
 
749
- # If no text found but we have raw_text in ocr_contents
750
- if not page_text and 'ocr_contents' in result and 'raw_text' in result['ocr_contents']:
751
- page_text = result['ocr_contents']['raw_text']
 
 
 
752
 
753
- # Display the text with editing capability
754
- if page_text:
755
- edited_text = st.text_area(f"Page {i+1} Text", page_text, height=300, key=f"page_text_{i}")
756
-
757
- # Add a simple button to copy the edited text to clipboard
758
- st.button(f"Copy Text", key=f"copy_btn_{i}")
759
- else:
760
- st.info("No text available for this page.")
761
 
762
  def display_previous_results():
763
  """Display previous results tab content in a simplified, structured view"""
 
82
  doc_layout = st.selectbox("Document Layout", DOCUMENT_LAYOUTS,
83
  help="Select the layout of your document")
84
 
85
+ # Initialize preprocessing variables with default values
86
+ grayscale = False
87
+ denoise = False
88
+ contrast = 0
89
+ rotation = 0
90
+ use_segmentation = False
91
+
92
  # Custom prompt
93
  custom_prompt = ""
94
  if doc_type != DOCUMENT_TYPES[0]: # Not auto-detect
 
110
  help="Customize the instructions for processing this document",
111
  height=80)
112
 
113
+ # Image preprocessing options in an expandable section
114
+ with st.expander("Image Preprocessing"):
115
+ # Grayscale conversion
116
+ grayscale = st.checkbox("Convert to Grayscale",
117
  value=False,
118
  help="Convert color images to grayscale for better OCR")
119
+
120
+ # Denoise
121
+ denoise = st.checkbox("Denoise Image",
122
  value=False,
123
  help="Remove noise from the image")
124
+
125
+ # Contrast adjustment
126
+ contrast = st.slider("Contrast Adjustment",
127
  min_value=-50,
128
  max_value=50,
129
  value=0,
130
  step=10,
131
  help="Adjust image contrast")
132
+
133
+ # Rotation
134
+ rotation = st.slider("Rotation",
135
  min_value=-45,
136
  max_value=45,
137
  value=0,
138
  step=5,
139
  help="Rotate image if needed")
140
+
141
+ # Add image segmentation option
142
+ st.markdown("### Advanced Options")
143
+ use_segmentation = st.toggle("Enable Image Segmentation",
144
+ value=False,
145
+ help="Segment the image into text and image regions for better OCR results on complex documents")
146
+
147
+ # Show explanation if segmentation is enabled
148
+ if use_segmentation:
149
+ st.info("Image segmentation identifies distinct text regions in complex documents, improving OCR accuracy. This is especially helpful for documents with mixed content like the Magician illustration.")
150
 
151
  # Create preprocessing options dictionary
152
  # Set document_type based on selection in UI
 
186
  "max_pages": max_pages,
187
  "pdf_rotation": pdf_rotation,
188
  "custom_prompt": custom_prompt,
189
+ "preprocessing_options": preprocessing_options,
190
+ "use_segmentation": use_segmentation if 'use_segmentation' in locals() else False
191
  }
192
 
193
  return options
 
216
  )
217
  return uploaded_file
218
 
219
+ # Function removed - now using inline implementation in app.py
220
+ def _unused_display_preprocessing_preview(uploaded_file, preprocessing_options):
221
+ """Display a preview of image with preprocessing options applied"""
222
+ if (any(preprocessing_options.values()) and
223
+ uploaded_file.type.startswith('image/')):
224
+
225
+ st.markdown("**Preprocessed Preview**")
226
+ try:
227
+ # Create a container for the preview
228
+ with st.container():
229
+ processed_bytes = preprocess_image(uploaded_file.getvalue(), preprocessing_options)
230
+ # Convert image to base64 and display as HTML to avoid fullscreen button
231
+ img_data = base64.b64encode(processed_bytes).decode()
232
+ img_html = f'<img src="data:image/jpeg;base64,{img_data}" style="width:100%; border-radius:4px;">'
233
+ st.markdown(img_html, unsafe_allow_html=True)
234
+
235
+ # Show preprocessing metadata in a well-formatted caption
236
+ meta_items = []
237
+ if preprocessing_options.get("document_type", "standard") != "standard":
238
+ meta_items.append(f"Document type ({preprocessing_options['document_type']})")
239
+ if preprocessing_options.get("grayscale", False):
240
+ meta_items.append("Grayscale")
241
+ if preprocessing_options.get("denoise", False):
242
+ meta_items.append("Denoise")
243
+ if preprocessing_options.get("contrast", 0) != 0:
244
+ meta_items.append(f"Contrast ({preprocessing_options['contrast']})")
245
+ if preprocessing_options.get("rotation", 0) != 0:
246
+ meta_items.append(f"Rotation ({preprocessing_options['rotation']}°)")
247
+
248
+ # Only show "Applied:" if there are actual preprocessing steps
249
+ if meta_items:
250
+ meta_text = "Applied: " + ", ".join(meta_items)
251
+ st.caption(meta_text)
252
+ except Exception as e:
253
+ st.error(f"Error in preprocessing: {str(e)}")
254
+ st.info("Try using grayscale preprocessing for PNG images with transparency")
255
+
256
  def display_results(result, container, custom_prompt=""):
257
  """Display OCR results in the provided container"""
258
  with container:
259
+ # Add heading for document metadata
260
+ st.markdown("### Document Metadata")
261
 
262
  # Create a compact metadata section
263
  meta_html = '<div style="display: flex; flex-wrap: wrap; gap: 0.3rem; margin-bottom: 0.3rem;">'
 
515
  'index': img_idx
516
  })
517
 
518
+ # Simple display of image without dropdown or Document Image tab
519
+ if images_to_display and len(images_to_display) > 0:
520
+ # Just display the first image directly
521
+ st.image(images_to_display[0]['data'], use_container_width=True)
 
 
 
 
 
 
 
 
 
522
 
523
  # Organize sections in a logical order
524
  section_order = ["title", "author", "date", "summary", "content", "transcript", "metadata"]
 
723
  for i, page_data in enumerate(pages_data):
724
  st.markdown(f"### Page {i+1}")
725
 
726
+ # Display only the image (removed text column)
727
+ # Display the image - check multiple possible field names
728
+ image_displayed = False
729
 
730
+ # Try 'image_data' field first
731
+ if 'image_data' in page_data:
732
+ try:
733
+ # Convert base64 to image
734
+ image_data = base64.b64decode(page_data['image_data'])
735
+ st.image(io.BytesIO(image_data), use_container_width=True)
736
+ image_displayed = True
737
+ except Exception as e:
738
+ st.error(f"Error displaying image from image_data: {str(e)}")
739
+
740
+ # Try 'images' array if image_data didn't work
741
+ if not image_displayed and 'images' in page_data and len(page_data['images']) > 0:
742
+ for img in page_data['images']:
743
+ if 'image_base64' in img:
744
+ try:
745
+ st.image(img['image_base64'], use_container_width=True)
746
+ image_displayed = True
747
+ break
748
+ except Exception as e:
749
+ st.error(f"Error displaying image from images array: {str(e)}")
750
+
751
+ # Try alternative image source if still not displayed
752
+ if not image_displayed and 'raw_response_data' in result:
753
+ raw_data = result['raw_response_data']
754
+ if isinstance(raw_data, dict) and 'pages' in raw_data:
755
+ for raw_page in raw_data['pages']:
756
+ if isinstance(raw_page, dict) and 'images' in raw_page:
757
+ for img in raw_page['images']:
758
+ if isinstance(img, dict) and 'base64' in img:
759
+ st.image(img['base64'])
760
+ st.caption("Image from OCR response")
761
+ image_displayed = True
762
+ break
763
+ if image_displayed:
764
  break
 
 
 
 
 
765
 
766
+ if not image_displayed:
767
+ st.info("No image available for this page.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
768
 
769
+ # Extract and display alt text if available
770
+ page_text = ""
771
+ if 'text' in page_data:
772
+ page_text = page_data['text']
773
+ elif 'markdown' in page_data:
774
+ page_text = page_data['markdown']
775
 
776
+ if page_text and page_text.startswith("![") and page_text.endswith(")"):
777
+ try:
778
+ alt_text = page_text[2:page_text.index(']')]
779
+ if alt_text and len(alt_text) > 5: # Only show if alt text is meaningful
780
+ st.caption(f"Image description: {alt_text}")
781
+ except:
782
+ pass
 
783
 
784
  def display_previous_results():
785
  """Display previous results tab content in a simplified, structured view"""