Spaces:

milwright
/

historical-ocr

Running

App Files Files Community

milwright commited on Mar 28

Commit

9f42b50

1 Parent(s): 6931bb0

Fix document metadata display and simplify example documents section

Browse files

Files changed (1) hide show

app.py +45 -178

app.py CHANGED Viewed

@@ -1153,40 +1153,44 @@ with main_tab1:
                     # Standard processing without custom prompt
                     result = process_file(uploaded_file, use_vision, preprocessing_options, progress_container=progress_placeholder)
-                # Document Metadata in the top right of the right column
                 with right_col:
-                    # Add the subheader separately to avoid the white bar
-                    st.subheader("Document Metadata")
-                    # Create a clean metadata container
-                    st.markdown('<div class="metadata-container">', unsafe_allow_html=True)
-                    # Display file info
-                    st.write(f"**File Name:** {result.get('file_name', uploaded_file.name)}")
-                    # Display info if only limited pages were processed
-                    if 'limited_pages' in result:
-                        st.info(f"Processed {result['limited_pages']['processed']} of {result['limited_pages']['total']} pages")
-                    # Display languages if available
-                    if 'languages' in result:
-                        languages = [lang for lang in result['languages'] if lang is not None]
-                        if languages:
-                            st.write(f"**Languages:** {', '.join(languages)}")
-                    # Display topics if available
-                    if 'topics' in result and result['topics']:
-                        st.write(f"**Topics:** {', '.join(result['topics'])}")
-                    # Processing time if available
-                    if 'processing_time' in result:
-                        proc_time = result['processing_time']
-                        st.write(f"**Processing Time:** {proc_time:.1f}s")
-                    # Close the metadata container
-                    st.markdown('</div>', unsafe_allow_html=True)
-                    # Display Document Contents below the metadata in the right column
-                    st.subheader("Document Contents")
                     # Start document content div with consistent styling class
                     st.markdown('<div class="document-content">', unsafe_allow_html=True)
                     if 'ocr_contents' in result:
@@ -1637,156 +1641,19 @@ with main_tab1:
     else:
         # Empty placeholder - we've moved the upload instruction to the file_uploader
-        # Show example images in a responsive grid
         st.subheader("Example Documents")
-        # Add a dedicated container with custom styling for the examples
         st.markdown("""
-        <style>
-        .example-grid {
-            display: grid;
-            grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
-            gap: 20px;
-            margin-top: 20px;
-        }
-        .example-item {
-            border-radius: 8px;
-            overflow: hidden;
-            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
-            background: white;
-            transition: transform 0.2s;
-            height: 100%;
-        }
-        .example-item:hover {
-            transform: translateY(-5px);
-            box-shadow: 0 5px 15px rgba(0,0,0,0.1);
-        }
-        .example-image {
-            width: 100%;
-            aspect-ratio: 4/3;
-            object-fit: cover;
-        }
-        .example-caption {
-            padding: 10px;
-            font-size: 14px;
-            text-align: center;
-            color: #333;
-        }
-        @media (max-width: 768px) {
-            .example-grid {
-                grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
-            }
-        }
-        @media (max-width: 640px) {
-            .example-grid {
-                grid-template-columns: repeat(auto-fill, minmax(130px, 1fr));
-            }
-        }
-        </style>
-        """, unsafe_allow_html=True)
-        # Find sample images from the input directory
-        input_dir = Path(__file__).parent / "input"
-        sample_images = []
-        backup_dir = Path(__file__).parent / "backup" / "input"
-        if input_dir.exists():
-            # Define images in specific order per requirements
-            ordered_image_names = [
-                "magellan-travels.jpg",
-                "americae-retectio.jpg",
-                "handwritten-letter.jpg",
-                "milgram-flier.png",
-                "recipe.jpg",
-                "The Magician, or Bottle Cungerer.jpeg"
-            ]
-            # Create the image list in the desired order
-            ordered_sample_images = []
-            for img_name in ordered_image_names:
-                img_path = input_dir / img_name
-                if img_path.exists():
-                    ordered_sample_images.append(img_path)
-            # Use ordered images
-            sample_images = ordered_sample_images
-            # Fill in with additional images if needed
-            if len(sample_images) < 6:
-                # Get all remaining images from input directory
-                all_images = set(
-                    list(input_dir.glob("*.jpg")) +
-                    list(input_dir.glob("*.jpeg")) +
-                    list(input_dir.glob("*.png")) +
-                    list(input_dir.glob("*.tif"))
-                )
-                # Remove the already selected images
-                remaining_images = [img for img in all_images if img not in sample_images]
-                # Add remaining images to fill the grid
-                sample_images.extend(remaining_images[:6-len(sample_images)])
-            # Try backup directory if still not enough
-            if len(sample_images) < 6 and backup_dir.exists():
-                remaining = 6 - len(sample_images)
-                backup_samples = (
-                    list(backup_dir.glob("*.jpg")) +
-                    list(backup_dir.glob("*.jpeg")) +
-                    list(backup_dir.glob("*.png"))
-                )[:remaining]
-                sample_images.extend(backup_samples)
-        # Render the examples using custom HTML for better layout control
-        if sample_images:
-            # Start the grid container
-            grid_html = '<div class="example-grid">'
-            # Add each example to the grid
-            for img_path in sample_images:
-                if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
-                    try:
-                        # Convert image to base64 for embedding
-                        with open(img_path, "rb") as img_file:
-                            img_data = base64.b64encode(img_file.read()).decode()
-                        # Create item HTML with optimized responsive layout
-                        img_name = img_path.name
-                        # Trim long names
-                        display_name = img_name[:18] + "..." if len(img_name) > 20 else img_name
-                        grid_html += f'''
-                        <div class="example-item">
-                            <img src="data:image/{img_path.suffix.lower().replace('.', '')};base64,{img_data}"
-                                 alt="{img_name}" class="example-image">
-                            <div class="example-caption">{display_name}</div>
-                        </div>
-                        '''
-                    except Exception as e:
-                        # Fallback for any loading errors
-                        grid_html += f'''
-                        <div class="example-item">
-                            <div style="height:150px;display:flex;align-items:center;justify-content:center;background:#f8f9fa;">
-                                <span style="color:#666;">Image Error</span>
-                            </div>
-                            <div class="example-caption">{img_path.name}</div>
-                        </div>
-                        '''
-                else:
-                    # For PDFs, show placeholder
-                    grid_html += f'''
-                    <div class="example-item">
-                        <div style="height:150px;display:flex;align-items:center;justify-content:center;background:#f8f9fa;">
-                            <span style="font-size:32px;">📄</span>
-                        </div>
-                        <div class="example-caption">{img_path.name}</div>
-                    </div>
-                    '''
-            # Close the grid container
-            grid_html += '</div>'
-            # Render the grid with unsafe_allow_html
-            st.markdown(grid_html, unsafe_allow_html=True)
-        else:
-            st.info("No example documents found. Upload your own document to get started.")# Minor update

                     # Standard processing without custom prompt
                     result = process_file(uploaded_file, use_vision, preprocessing_options, progress_container=progress_placeholder)
+                # Document results will be shown in the right column
                 with right_col:
+                    # Initial placeholder for the document title/heading
+                    st.markdown(f"## Document: {result.get('file_name', uploaded_file.name)}")
+                    # Display Document Contents section
+                    st.subheader("Document Contents")
+                    # Add Document Metadata section inside the content area
+                    with st.expander("Document Metadata", expanded=True):
+                        # Create a clean metadata container
+                        st.markdown('<div class="metadata-container">', unsafe_allow_html=True)
+                        # Display file info
+                        st.write(f"**File Name:** {result.get('file_name', uploaded_file.name)}")
+                        # Display info if only limited pages were processed
+                        if 'limited_pages' in result:
+                            st.info(f"Processed {result['limited_pages']['processed']} of {result['limited_pages']['total']} pages")
+                        # Display languages if available
+                        if 'languages' in result:
+                            languages = [lang for lang in result['languages'] if lang is not None]
+                            if languages:
+                                st.write(f"**Languages:** {', '.join(languages)}")
+                        # Display topics if available
+                        if 'topics' in result and result['topics']:
+                            st.write(f"**Topics:** {', '.join(result['topics'])}")
+                        # Processing time if available
+                        if 'processing_time' in result:
+                            proc_time = result['processing_time']
+                            st.write(f"**Processing Time:** {proc_time:.1f}s")
+                        # Close the metadata container
+                        st.markdown('</div>', unsafe_allow_html=True)
                     # Start document content div with consistent styling class
                     st.markdown('<div class="document-content">', unsafe_allow_html=True)
                     if 'ocr_contents' in result:
     else:
         # Empty placeholder - we've moved the upload instruction to the file_uploader
+        # Show example images in a simpler layout
         st.subheader("Example Documents")
+        # Add a simplified info message about examples
         st.markdown("""
+        This app can process various historical documents:
+        - Historical photographs, maps, and manuscripts
+        - Handwritten letters and documents
+        - Printed books and articles
+        - Multi-page PDFs
+        Upload your own document to get started or explore the 'About' tab for more information.
+        """)
+        # Display a direct message about sample documents
+        st.info("Sample documents are available in the input directory. Upload a document to begin analysis.")# Minor update