Spaces:

milwright
/

historical-ocr

Running

milwright commited on Mar 28

Commit

fd88d14

1 Parent(s): 6601c56

Improve image preprocessing and layout for better performance

- Simplified preprocessing preview to show only processed image instead of side-by-side comparison
- Added preprocessing metadata to show which filters were applied
- Reorganized layout with metadata at top right and document contents below
- Added new metadata container with improved styling
- Removed duplicate metadata display in left column

Files changed (2) hide show

app.py +52 -41
ui/custom.css +9 -0

app.py CHANGED Viewed

@@ -928,25 +928,30 @@ with main_tab1:
         with left_col:
             process_button = st.button("Process Document")
-            # Image preprocessing preview in upload column, right after the process button
             if any(preprocessing_options.values()) and uploaded_file.type.startswith('image/'):
-                with st.expander("Image Preprocessing Preview"):
-                    preview_cols = st.columns(2)
-                    with preview_cols[0]:
-                        st.markdown("**Original Image**")
-                        # Fixed width parameter to ensure compatibility
-                        st.image(uploaded_file, width=300)
-                    with preview_cols[1]:
-                        st.markdown("**Preprocessed Image**")
-                        try:
-                            processed_bytes = preprocess_image(uploaded_file.getvalue(), preprocessing_options)
-                            # Fixed width parameter to ensure compatibility
-                            st.image(io.BytesIO(processed_bytes), width=300)
-                        except Exception as e:
-                            st.error(f"Error in preprocessing: {str(e)}")
-                            st.info("Try using grayscale preprocessing for PNG images with transparency")
             # Empty container for progress indicators - will be filled during processing
             progress_placeholder = st.empty()
@@ -1111,8 +1116,37 @@ with main_tab1:
                     # Standard processing without custom prompt
                     result = process_file(uploaded_file, use_vision, preprocessing_options, progress_container=progress_placeholder)
-                # Display Document Contents in the right column
                 with right_col:
                     st.subheader("Document Contents")
                     # Start document content div with consistent styling class
                     st.markdown('<div class="document-content">', unsafe_allow_html=True)
@@ -1528,33 +1562,10 @@ with main_tab1:
                     # Close document content div
                     st.markdown('</div>', unsafe_allow_html=True)
-                # Add Document Metadata in the left column placeholder
                 with metadata_placeholder.container():
-                    st.subheader("Document Metadata")
                     st.success("**Document processed successfully**")
-                    # Display file info
-                    st.write(f"**File Name:** {result.get('file_name', uploaded_file.name)}")
-                    # Display info if only limited pages were processed
-                    if 'limited_pages' in result:
-                        st.info(f"Processed {result['limited_pages']['processed']} of {result['limited_pages']['total']} pages")
-                    # Display languages if available
-                    if 'languages' in result:
-                        languages = [lang for lang in result['languages'] if lang is not None]
-                        if languages:
-                            st.write(f"**Languages:** {', '.join(languages)}")
-                    # Display topics if available
-                    if 'topics' in result and result['topics']:
-                        st.write(f"**Topics:** {', '.join(result['topics'])}")
-                    # Processing time if available
-                    if 'processing_time' in result:
-                        proc_time = result['processing_time']
-                        st.write(f"**Processing Time:** {proc_time:.1f}s")
                 # Store the result in the previous results list
                 # Add timestamp to result for history tracking
                 result_copy = result.copy()

         with left_col:
             process_button = st.button("Process Document")
+            # Image preprocessing preview - automatically show only the preprocessed version
             if any(preprocessing_options.values()) and uploaded_file.type.startswith('image/'):
+                st.markdown("**Preprocessed Preview**")
+                try:
+                    processed_bytes = preprocess_image(uploaded_file.getvalue(), preprocessing_options)
+                    # Fixed width parameter to ensure compatibility
+                    st.image(io.BytesIO(processed_bytes), width=300)
+                    # Show preprocessing metadata next to the image
+                    meta_text = "Applied: "
+                    if preprocessing_options.get("document_type", "standard") != "standard":
+                        meta_text += f"Document type ({preprocessing_options['document_type']}), "
+                    if preprocessing_options.get("grayscale", False):
+                        meta_text += "Grayscale, "
+                    if preprocessing_options.get("denoise", False):
+                        meta_text += "Denoise, "
+                    if preprocessing_options.get("contrast", 0) != 0:
+                        meta_text += f"Contrast ({preprocessing_options['contrast']}), "
+                    if preprocessing_options.get("rotation", 0) != 0:
+                        meta_text += f"Rotation ({preprocessing_options['rotation']}°), "
+                    # Remove trailing comma and space
+                    meta_text = meta_text.rstrip(", ")
+                    st.caption(meta_text)
             # Empty container for progress indicators - will be filled during processing
             progress_placeholder = st.empty()
                     # Standard processing without custom prompt
                     result = process_file(uploaded_file, use_vision, preprocessing_options, progress_container=progress_placeholder)
+                # Document Metadata in the top right of the right column
                 with right_col:
+                    metadata_container = st.container()
+                    with metadata_container:
+                        st.subheader("Document Metadata")
+                        # Create a subtle container for metadata
+                        st.markdown('<div class="metadata-container">', unsafe_allow_html=True)
+                        # Display file info
+                        st.write(f"**File Name:** {result.get('file_name', uploaded_file.name)}")
+                        # Display info if only limited pages were processed
+                        if 'limited_pages' in result:
+                            st.info(f"Processed {result['limited_pages']['processed']} of {result['limited_pages']['total']} pages")
+                        # Display languages if available
+                        if 'languages' in result:
+                            languages = [lang for lang in result['languages'] if lang is not None]
+                            if languages:
+                                st.write(f"**Languages:** {', '.join(languages)}")
+                        # Display topics if available
+                        if 'topics' in result and result['topics']:
+                            st.write(f"**Topics:** {', '.join(result['topics'])}")
+                        # Processing time if available
+                        if 'processing_time' in result:
+                            proc_time = result['processing_time']
+                            st.write(f"**Processing Time:** {proc_time:.1f}s")
+                        st.markdown('</div>', unsafe_allow_html=True)
+                    # Display Document Contents below the metadata in the right column
                     st.subheader("Document Contents")
                     # Start document content div with consistent styling class
                     st.markdown('<div class="document-content">', unsafe_allow_html=True)
                     # Close document content div
                     st.markdown('</div>', unsafe_allow_html=True)
+                # Update the placeholder with a success message
                 with metadata_placeholder.container():
                     st.success("**Document processed successfully**")
                 # Store the result in the previous results list
                 # Add timestamp to result for history tracking
                 result_copy = result.copy()

ui/custom.css CHANGED Viewed

@@ -88,6 +88,15 @@
     object-fit: contain !important;
 }
 /* Additional image fixes for all containers */
 .document-content img,
 .markdown-text-container img,

     object-fit: contain !important;
 }
+/* Metadata container styling */
+.metadata-container {
+    background-color: #f8f9fa;
+    border-radius: 4px;
+    padding: 12px;
+    margin-bottom: 20px;
+    border-left: 3px solid #4285f4;
+}
 /* Additional image fixes for all containers */
 .document-content img,
 .markdown-text-container img,