Spaces:

milwright
/

historical-ocr

Running

App Files Files Community

milwright commited on Apr 25

Commit

9a2238e

1 Parent(s): aabc02c

Enhance document viewer UI: Focus on images, reduce clutter

Browse files

Files changed (2) hide show

app.py +2 -1
ui_components.py +127 -105

app.py CHANGED Viewed

@@ -423,7 +423,8 @@ def process_document(uploaded_file, left_col, right_col, sidebar_options):
                 max_pages=sidebar_options.get("max_pages", 3),
                 pdf_rotation=sidebar_options.get("pdf_rotation", 0),
                 custom_prompt=sidebar_options.get("custom_prompt", ""),
-                perf_mode=sidebar_options.get("perf_mode", "Quality")
             )
             # Ensure temp_file_paths in session state is updated with any new paths

                 max_pages=sidebar_options.get("max_pages", 3),
                 pdf_rotation=sidebar_options.get("pdf_rotation", 0),
                 custom_prompt=sidebar_options.get("custom_prompt", ""),
+                perf_mode=sidebar_options.get("perf_mode", "Quality"),
+                use_segmentation=sidebar_options.get("use_segmentation", False)
             )
             # Ensure temp_file_paths in session state is updated with any new paths

ui_components.py CHANGED Viewed

@@ -82,6 +82,13 @@ def create_sidebar_options():
             doc_layout = st.selectbox("Document Layout", DOCUMENT_LAYOUTS,
                                      help="Select the layout of your document")
             # Custom prompt
             custom_prompt = ""
             if doc_type != DOCUMENT_TYPES[0]:  # Not auto-detect
@@ -103,33 +110,43 @@ def create_sidebar_options():
                                            help="Customize the instructions for processing this document",
                                            height=80)
-            # Image preprocessing options in an expandable section
-            with st.expander("Image Preprocessing"):
-                # Grayscale conversion
-                grayscale = st.checkbox("Convert to Grayscale",
                                       value=False,
                                       help="Convert color images to grayscale for better OCR")
-                # Denoise
-                denoise = st.checkbox("Denoise Image",
                                     value=False,
                                     help="Remove noise from the image")
-                # Contrast adjustment
-                contrast = st.slider("Contrast Adjustment",
                                    min_value=-50,
                                    max_value=50,
                                    value=0,
                                    step=10,
                                    help="Adjust image contrast")
-                # Rotation
-                rotation = st.slider("Rotation",
                                    min_value=-45,
                                    max_value=45,
                                    value=0,
                                    step=5,
                                    help="Rotate image if needed")
             # Create preprocessing options dictionary
             # Set document_type based on selection in UI
@@ -169,7 +186,8 @@ def create_sidebar_options():
                 "max_pages": max_pages,
                 "pdf_rotation": pdf_rotation,
                 "custom_prompt": custom_prompt,
-                "preprocessing_options": preprocessing_options
             }
             return options
@@ -198,10 +216,48 @@ def create_file_uploader():
     )
     return uploaded_file
 def display_results(result, container, custom_prompt=""):
     """Display OCR results in the provided container"""
     with container:
-        # No heading for document metadata - start directly with content
         # Create a compact metadata section
         meta_html = '<div style="display: flex; flex-wrap: wrap; gap: 0.3rem; margin-bottom: 0.3rem;">'
@@ -459,19 +515,10 @@ def display_results(result, container, custom_prompt=""):
                                 'index': img_idx
                             })
-                    # Display images at the top if available
-                    if images_to_display:
-                        st.markdown("### Document Images")
-                        # Create columns for a grid layout (up to 2 columns to make images larger)
-                        cols_count = min(2, len(images_to_display))
-                        image_cols = st.columns(cols_count)
-                        # Display each image in a column with minimal spacing
-                        for i, img in enumerate(images_to_display):
-                            with image_cols[i % cols_count]:
-                                # Compact image display
-                                st.image(img['data'], use_container_width=True)
-                                st.markdown(f"<p style='margin-top:-5px; font-size:0.8rem; color:#666; text-align:center;'>Document Image {i+1}</p>", unsafe_allow_html=True)
                     # Organize sections in a logical order
                     section_order = ["title", "author", "date", "summary", "content", "transcript", "metadata"]
@@ -676,88 +723,63 @@ def display_document_with_images(result):
     for i, page_data in enumerate(pages_data):
         st.markdown(f"### Page {i+1}")
-        # Create columns for image and text
-        img_col, text_col = st.columns([1, 1])
-        with img_col:
-            # Display the image - check multiple possible field names
-            image_displayed = False
-            # Try 'image_data' field first
-            if 'image_data' in page_data:
-                try:
-                    # Convert base64 to image
-                    image_data = base64.b64decode(page_data['image_data'])
-                    st.image(io.BytesIO(image_data), use_container_width=True)
-                    image_displayed = True
-                except Exception as e:
-                    st.error(f"Error displaying image from image_data: {str(e)}")
-            # Try 'images' array if image_data didn't work
-            if not image_displayed and 'images' in page_data and len(page_data['images']) > 0:
-                for img in page_data['images']:
-                    if 'image_base64' in img:
-                        try:
-                            st.image(img['image_base64'], use_container_width=True)
-                            image_displayed = True
                             break
-                        except Exception as e:
-                            st.error(f"Error displaying image from images array: {str(e)}")
-            if not image_displayed:
-                st.info("No image available for this page.")
-        with text_col:
-            # Get text from various possible fields
-            page_text = ""
-            if 'text' in page_data:
-                page_text = page_data['text']
-            elif 'markdown' in page_data:
-                page_text = page_data['markdown']
-            # Special handling for image markdown in page data
-            if page_text.startswith("![") and page_text.endswith(")"):
-                # Try to display image if not already displayed
-                if not image_displayed and 'raw_response_data' in result:
-                    raw_data = result['raw_response_data']
-                    if isinstance(raw_data, dict) and 'pages' in raw_data:
-                        for raw_page in raw_data['pages']:
-                            if isinstance(raw_page, dict) and 'images' in raw_page:
-                                for img in raw_page['images']:
-                                    if isinstance(img, dict) and 'base64' in img:
-                                        st.image(img['base64'])
-                                        st.caption("Image from OCR response")
-                                        image_displayed = True
-                                        break
-                                if image_displayed:
-                                    break
-                # Try to extract alt text
-                try:
-                    alt_text = page_text[2:page_text.index(']')]
-                    if alt_text and len(alt_text) > 5:  # Only show if alt text is meaningful
-                        st.info(f"Image description: {alt_text}")
-                    else:
-                        st.info("This page contains an image with minimal text")
-                except:
-                    st.info("This page contains an image with minimal text")
-                # Show warning if no image displayed
-                if not image_displayed:
-                    st.warning("Image reference found in text, but no image data is available.")
-            # If no text found but we have raw_text in ocr_contents
-            if not page_text and 'ocr_contents' in result and 'raw_text' in result['ocr_contents']:
-                page_text = result['ocr_contents']['raw_text']
-            # Display the text with editing capability
-            if page_text:
-                edited_text = st.text_area(f"Page {i+1} Text", page_text, height=300, key=f"page_text_{i}")
-                # Add a simple button to copy the edited text to clipboard
-                st.button(f"Copy Text", key=f"copy_btn_{i}")
-            else:
-                st.info("No text available for this page.")
 def display_previous_results():
     """Display previous results tab content in a simplified, structured view"""

             doc_layout = st.selectbox("Document Layout", DOCUMENT_LAYOUTS,
                                      help="Select the layout of your document")
+            # Initialize preprocessing variables with default values
+            grayscale = False
+            denoise = False
+            contrast = 0
+            rotation = 0
+            use_segmentation = False
             # Custom prompt
             custom_prompt = ""
             if doc_type != DOCUMENT_TYPES[0]:  # Not auto-detect
                                            help="Customize the instructions for processing this document",
                                            height=80)
+                # Image preprocessing options in an expandable section
+                with st.expander("Image Preprocessing"):
+                    # Grayscale conversion
+                    grayscale = st.checkbox("Convert to Grayscale",
                                       value=False,
                                       help="Convert color images to grayscale for better OCR")
+                    # Denoise
+                    denoise = st.checkbox("Denoise Image",
                                     value=False,
                                     help="Remove noise from the image")
+                    # Contrast adjustment
+                    contrast = st.slider("Contrast Adjustment",
                                    min_value=-50,
                                    max_value=50,
                                    value=0,
                                    step=10,
                                    help="Adjust image contrast")
+                    # Rotation
+                    rotation = st.slider("Rotation",
                                    min_value=-45,
                                    max_value=45,
                                    value=0,
                                    step=5,
                                    help="Rotate image if needed")
+                    # Add image segmentation option
+                    st.markdown("### Advanced Options")
+                    use_segmentation = st.toggle("Enable Image Segmentation",
+                                        value=False,
+                                        help="Segment the image into text and image regions for better OCR results on complex documents")
+                    # Show explanation if segmentation is enabled
+                    if use_segmentation:
+                        st.info("Image segmentation identifies distinct text regions in complex documents, improving OCR accuracy. This is especially helpful for documents with mixed content like the Magician illustration.")
             # Create preprocessing options dictionary
             # Set document_type based on selection in UI
                 "max_pages": max_pages,
                 "pdf_rotation": pdf_rotation,
                 "custom_prompt": custom_prompt,
+                "preprocessing_options": preprocessing_options,
+                "use_segmentation": use_segmentation if 'use_segmentation' in locals() else False
             }
             return options
     )
     return uploaded_file
+# Function removed - now using inline implementation in app.py
+def _unused_display_preprocessing_preview(uploaded_file, preprocessing_options):
+    """Display a preview of image with preprocessing options applied"""
+    if (any(preprocessing_options.values()) and
+        uploaded_file.type.startswith('image/')):
+        st.markdown("**Preprocessed Preview**")
+        try:
+            # Create a container for the preview
+            with st.container():
+                processed_bytes = preprocess_image(uploaded_file.getvalue(), preprocessing_options)
+                # Convert image to base64 and display as HTML to avoid fullscreen button
+                img_data = base64.b64encode(processed_bytes).decode()
+                img_html = f'<img src="data:image/jpeg;base64,{img_data}" style="width:100%; border-radius:4px;">'
+                st.markdown(img_html, unsafe_allow_html=True)
+                # Show preprocessing metadata in a well-formatted caption
+                meta_items = []
+                if preprocessing_options.get("document_type", "standard") != "standard":
+                    meta_items.append(f"Document type ({preprocessing_options['document_type']})")
+                if preprocessing_options.get("grayscale", False):
+                    meta_items.append("Grayscale")
+                if preprocessing_options.get("denoise", False):
+                    meta_items.append("Denoise")
+                if preprocessing_options.get("contrast", 0) != 0:
+                    meta_items.append(f"Contrast ({preprocessing_options['contrast']})")
+                if preprocessing_options.get("rotation", 0) != 0:
+                    meta_items.append(f"Rotation ({preprocessing_options['rotation']}°)")
+                # Only show "Applied:" if there are actual preprocessing steps
+                if meta_items:
+                    meta_text = "Applied: " + ", ".join(meta_items)
+                    st.caption(meta_text)
+        except Exception as e:
+            st.error(f"Error in preprocessing: {str(e)}")
+            st.info("Try using grayscale preprocessing for PNG images with transparency")
 def display_results(result, container, custom_prompt=""):
     """Display OCR results in the provided container"""
     with container:
+        # Add heading for document metadata
+        st.markdown("### Document Metadata")
         # Create a compact metadata section
         meta_html = '<div style="display: flex; flex-wrap: wrap; gap: 0.3rem; margin-bottom: 0.3rem;">'
                                 'index': img_idx
                             })
+                    # Simple display of image without dropdown or Document Image tab
+                    if images_to_display and len(images_to_display) > 0:
+                        # Just display the first image directly
+                        st.image(images_to_display[0]['data'], use_container_width=True)
                     # Organize sections in a logical order
                     section_order = ["title", "author", "date", "summary", "content", "transcript", "metadata"]
     for i, page_data in enumerate(pages_data):
         st.markdown(f"### Page {i+1}")
+        # Display only the image (removed text column)
+        # Display the image - check multiple possible field names
+        image_displayed = False
+        # Try 'image_data' field first
+        if 'image_data' in page_data:
+            try:
+                # Convert base64 to image
+                image_data = base64.b64decode(page_data['image_data'])
+                st.image(io.BytesIO(image_data), use_container_width=True)
+                image_displayed = True
+            except Exception as e:
+                st.error(f"Error displaying image from image_data: {str(e)}")
+        # Try 'images' array if image_data didn't work
+        if not image_displayed and 'images' in page_data and len(page_data['images']) > 0:
+            for img in page_data['images']:
+                if 'image_base64' in img:
+                    try:
+                        st.image(img['image_base64'], use_container_width=True)
+                        image_displayed = True
+                        break
+                    except Exception as e:
+                        st.error(f"Error displaying image from images array: {str(e)}")
+        # Try alternative image source if still not displayed
+        if not image_displayed and 'raw_response_data' in result:
+            raw_data = result['raw_response_data']
+            if isinstance(raw_data, dict) and 'pages' in raw_data:
+                for raw_page in raw_data['pages']:
+                    if isinstance(raw_page, dict) and 'images' in raw_page:
+                        for img in raw_page['images']:
+                            if isinstance(img, dict) and 'base64' in img:
+                                st.image(img['base64'])
+                                st.caption("Image from OCR response")
+                                image_displayed = True
+                                break
+                        if image_displayed:
                             break
+        if not image_displayed:
+            st.info("No image available for this page.")
+        # Extract and display alt text if available
+        page_text = ""
+        if 'text' in page_data:
+            page_text = page_data['text']
+        elif 'markdown' in page_data:
+            page_text = page_data['markdown']
+        if page_text and page_text.startswith("![") and page_text.endswith(")"):
+            try:
+                alt_text = page_text[2:page_text.index(']')]
+                if alt_text and len(alt_text) > 5:  # Only show if alt text is meaningful
+                    st.caption(f"Image description: {alt_text}")
+            except:
+                pass
 def display_previous_results():
     """Display previous results tab content in a simplified, structured view"""