milwright commited on
Commit
9f42b50
·
1 Parent(s): 6931bb0

Fix document metadata display and simplify example documents section

Browse files
Files changed (1) hide show
  1. app.py +45 -178
app.py CHANGED
@@ -1153,40 +1153,44 @@ with main_tab1:
1153
  # Standard processing without custom prompt
1154
  result = process_file(uploaded_file, use_vision, preprocessing_options, progress_container=progress_placeholder)
1155
 
1156
- # Document Metadata in the top right of the right column
1157
  with right_col:
1158
- # Add the subheader separately to avoid the white bar
1159
- st.subheader("Document Metadata")
1160
- # Create a clean metadata container
1161
- st.markdown('<div class="metadata-container">', unsafe_allow_html=True)
1162
 
1163
- # Display file info
1164
- st.write(f"**File Name:** {result.get('file_name', uploaded_file.name)}")
1165
-
1166
- # Display info if only limited pages were processed
1167
- if 'limited_pages' in result:
1168
- st.info(f"Processed {result['limited_pages']['processed']} of {result['limited_pages']['total']} pages")
1169
-
1170
- # Display languages if available
1171
- if 'languages' in result:
1172
- languages = [lang for lang in result['languages'] if lang is not None]
1173
- if languages:
1174
- st.write(f"**Languages:** {', '.join(languages)}")
1175
-
1176
- # Display topics if available
1177
- if 'topics' in result and result['topics']:
1178
- st.write(f"**Topics:** {', '.join(result['topics'])}")
1179
-
1180
- # Processing time if available
1181
- if 'processing_time' in result:
1182
- proc_time = result['processing_time']
1183
- st.write(f"**Processing Time:** {proc_time:.1f}s")
1184
 
1185
- # Close the metadata container
1186
- st.markdown('</div>', unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1187
 
1188
- # Display Document Contents below the metadata in the right column
1189
- st.subheader("Document Contents")
1190
  # Start document content div with consistent styling class
1191
  st.markdown('<div class="document-content">', unsafe_allow_html=True)
1192
  if 'ocr_contents' in result:
@@ -1637,156 +1641,19 @@ with main_tab1:
1637
  else:
1638
  # Empty placeholder - we've moved the upload instruction to the file_uploader
1639
 
1640
- # Show example images in a responsive grid
1641
  st.subheader("Example Documents")
1642
 
1643
- # Add a dedicated container with custom styling for the examples
1644
  st.markdown("""
1645
- <style>
1646
- .example-grid {
1647
- display: grid;
1648
- grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
1649
- gap: 20px;
1650
- margin-top: 20px;
1651
- }
1652
- .example-item {
1653
- border-radius: 8px;
1654
- overflow: hidden;
1655
- box-shadow: 0 2px 8px rgba(0,0,0,0.1);
1656
- background: white;
1657
- transition: transform 0.2s;
1658
- height: 100%;
1659
- }
1660
- .example-item:hover {
1661
- transform: translateY(-5px);
1662
- box-shadow: 0 5px 15px rgba(0,0,0,0.1);
1663
- }
1664
- .example-image {
1665
- width: 100%;
1666
- aspect-ratio: 4/3;
1667
- object-fit: cover;
1668
- }
1669
- .example-caption {
1670
- padding: 10px;
1671
- font-size: 14px;
1672
- text-align: center;
1673
- color: #333;
1674
- }
1675
- @media (max-width: 768px) {
1676
- .example-grid {
1677
- grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
1678
- }
1679
- }
1680
- @media (max-width: 640px) {
1681
- .example-grid {
1682
- grid-template-columns: repeat(auto-fill, minmax(130px, 1fr));
1683
- }
1684
- }
1685
- </style>
1686
- """, unsafe_allow_html=True)
1687
 
1688
- # Find sample images from the input directory
1689
- input_dir = Path(__file__).parent / "input"
1690
- sample_images = []
1691
- backup_dir = Path(__file__).parent / "backup" / "input"
1692
-
1693
- if input_dir.exists():
1694
- # Define images in specific order per requirements
1695
- ordered_image_names = [
1696
- "magellan-travels.jpg",
1697
- "americae-retectio.jpg",
1698
- "handwritten-letter.jpg",
1699
- "milgram-flier.png",
1700
- "recipe.jpg",
1701
- "The Magician, or Bottle Cungerer.jpeg"
1702
- ]
1703
-
1704
- # Create the image list in the desired order
1705
- ordered_sample_images = []
1706
- for img_name in ordered_image_names:
1707
- img_path = input_dir / img_name
1708
- if img_path.exists():
1709
- ordered_sample_images.append(img_path)
1710
-
1711
- # Use ordered images
1712
- sample_images = ordered_sample_images
1713
-
1714
- # Fill in with additional images if needed
1715
- if len(sample_images) < 6:
1716
- # Get all remaining images from input directory
1717
- all_images = set(
1718
- list(input_dir.glob("*.jpg")) +
1719
- list(input_dir.glob("*.jpeg")) +
1720
- list(input_dir.glob("*.png")) +
1721
- list(input_dir.glob("*.tif"))
1722
- )
1723
-
1724
- # Remove the already selected images
1725
- remaining_images = [img for img in all_images if img not in sample_images]
1726
-
1727
- # Add remaining images to fill the grid
1728
- sample_images.extend(remaining_images[:6-len(sample_images)])
1729
-
1730
- # Try backup directory if still not enough
1731
- if len(sample_images) < 6 and backup_dir.exists():
1732
- remaining = 6 - len(sample_images)
1733
- backup_samples = (
1734
- list(backup_dir.glob("*.jpg")) +
1735
- list(backup_dir.glob("*.jpeg")) +
1736
- list(backup_dir.glob("*.png"))
1737
- )[:remaining]
1738
- sample_images.extend(backup_samples)
1739
 
1740
- # Render the examples using custom HTML for better layout control
1741
- if sample_images:
1742
- # Start the grid container
1743
- grid_html = '<div class="example-grid">'
1744
-
1745
- # Add each example to the grid
1746
- for img_path in sample_images:
1747
- if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
1748
- try:
1749
- # Convert image to base64 for embedding
1750
- with open(img_path, "rb") as img_file:
1751
- img_data = base64.b64encode(img_file.read()).decode()
1752
-
1753
- # Create item HTML with optimized responsive layout
1754
- img_name = img_path.name
1755
- # Trim long names
1756
- display_name = img_name[:18] + "..." if len(img_name) > 20 else img_name
1757
-
1758
- grid_html += f'''
1759
- <div class="example-item">
1760
- <img src="data:image/{img_path.suffix.lower().replace('.', '')};base64,{img_data}"
1761
- alt="{img_name}" class="example-image">
1762
- <div class="example-caption">{display_name}</div>
1763
- </div>
1764
- '''
1765
- except Exception as e:
1766
- # Fallback for any loading errors
1767
- grid_html += f'''
1768
- <div class="example-item">
1769
- <div style="height:150px;display:flex;align-items:center;justify-content:center;background:#f8f9fa;">
1770
- <span style="color:#666;">Image Error</span>
1771
- </div>
1772
- <div class="example-caption">{img_path.name}</div>
1773
- </div>
1774
- '''
1775
- else:
1776
- # For PDFs, show placeholder
1777
- grid_html += f'''
1778
- <div class="example-item">
1779
- <div style="height:150px;display:flex;align-items:center;justify-content:center;background:#f8f9fa;">
1780
- <span style="font-size:32px;">📄</span>
1781
- </div>
1782
- <div class="example-caption">{img_path.name}</div>
1783
- </div>
1784
- '''
1785
-
1786
- # Close the grid container
1787
- grid_html += '</div>'
1788
-
1789
- # Render the grid with unsafe_allow_html
1790
- st.markdown(grid_html, unsafe_allow_html=True)
1791
- else:
1792
- st.info("No example documents found. Upload your own document to get started.")# Minor update
 
1153
  # Standard processing without custom prompt
1154
  result = process_file(uploaded_file, use_vision, preprocessing_options, progress_container=progress_placeholder)
1155
 
1156
+ # Document results will be shown in the right column
1157
  with right_col:
1158
+ # Initial placeholder for the document title/heading
1159
+ st.markdown(f"## Document: {result.get('file_name', uploaded_file.name)}")
 
 
1160
 
1161
+ # Display Document Contents section
1162
+ st.subheader("Document Contents")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1163
 
1164
+ # Add Document Metadata section inside the content area
1165
+ with st.expander("Document Metadata", expanded=True):
1166
+ # Create a clean metadata container
1167
+ st.markdown('<div class="metadata-container">', unsafe_allow_html=True)
1168
+
1169
+ # Display file info
1170
+ st.write(f"**File Name:** {result.get('file_name', uploaded_file.name)}")
1171
+
1172
+ # Display info if only limited pages were processed
1173
+ if 'limited_pages' in result:
1174
+ st.info(f"Processed {result['limited_pages']['processed']} of {result['limited_pages']['total']} pages")
1175
+
1176
+ # Display languages if available
1177
+ if 'languages' in result:
1178
+ languages = [lang for lang in result['languages'] if lang is not None]
1179
+ if languages:
1180
+ st.write(f"**Languages:** {', '.join(languages)}")
1181
+
1182
+ # Display topics if available
1183
+ if 'topics' in result and result['topics']:
1184
+ st.write(f"**Topics:** {', '.join(result['topics'])}")
1185
+
1186
+ # Processing time if available
1187
+ if 'processing_time' in result:
1188
+ proc_time = result['processing_time']
1189
+ st.write(f"**Processing Time:** {proc_time:.1f}s")
1190
+
1191
+ # Close the metadata container
1192
+ st.markdown('</div>', unsafe_allow_html=True)
1193
 
 
 
1194
  # Start document content div with consistent styling class
1195
  st.markdown('<div class="document-content">', unsafe_allow_html=True)
1196
  if 'ocr_contents' in result:
 
1641
  else:
1642
  # Empty placeholder - we've moved the upload instruction to the file_uploader
1643
 
1644
+ # Show example images in a simpler layout
1645
  st.subheader("Example Documents")
1646
 
1647
+ # Add a simplified info message about examples
1648
  st.markdown("""
1649
+ This app can process various historical documents:
1650
+ - Historical photographs, maps, and manuscripts
1651
+ - Handwritten letters and documents
1652
+ - Printed books and articles
1653
+ - Multi-page PDFs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1654
 
1655
+ Upload your own document to get started or explore the 'About' tab for more information.
1656
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1657
 
1658
+ # Display a direct message about sample documents
1659
+ st.info("Sample documents are available in the input directory. Upload a document to begin analysis.")# Minor update