Spaces:
Running
Running
Fix document metadata display and simplify example documents section
Browse files
app.py
CHANGED
@@ -1153,40 +1153,44 @@ with main_tab1:
|
|
1153 |
# Standard processing without custom prompt
|
1154 |
result = process_file(uploaded_file, use_vision, preprocessing_options, progress_container=progress_placeholder)
|
1155 |
|
1156 |
-
# Document
|
1157 |
with right_col:
|
1158 |
-
#
|
1159 |
-
st.
|
1160 |
-
# Create a clean metadata container
|
1161 |
-
st.markdown('<div class="metadata-container">', unsafe_allow_html=True)
|
1162 |
|
1163 |
-
# Display
|
1164 |
-
st.
|
1165 |
-
|
1166 |
-
# Display info if only limited pages were processed
|
1167 |
-
if 'limited_pages' in result:
|
1168 |
-
st.info(f"Processed {result['limited_pages']['processed']} of {result['limited_pages']['total']} pages")
|
1169 |
-
|
1170 |
-
# Display languages if available
|
1171 |
-
if 'languages' in result:
|
1172 |
-
languages = [lang for lang in result['languages'] if lang is not None]
|
1173 |
-
if languages:
|
1174 |
-
st.write(f"**Languages:** {', '.join(languages)}")
|
1175 |
-
|
1176 |
-
# Display topics if available
|
1177 |
-
if 'topics' in result and result['topics']:
|
1178 |
-
st.write(f"**Topics:** {', '.join(result['topics'])}")
|
1179 |
-
|
1180 |
-
# Processing time if available
|
1181 |
-
if 'processing_time' in result:
|
1182 |
-
proc_time = result['processing_time']
|
1183 |
-
st.write(f"**Processing Time:** {proc_time:.1f}s")
|
1184 |
|
1185 |
-
#
|
1186 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1187 |
|
1188 |
-
# Display Document Contents below the metadata in the right column
|
1189 |
-
st.subheader("Document Contents")
|
1190 |
# Start document content div with consistent styling class
|
1191 |
st.markdown('<div class="document-content">', unsafe_allow_html=True)
|
1192 |
if 'ocr_contents' in result:
|
@@ -1637,156 +1641,19 @@ with main_tab1:
|
|
1637 |
else:
|
1638 |
# Empty placeholder - we've moved the upload instruction to the file_uploader
|
1639 |
|
1640 |
-
# Show example images in a
|
1641 |
st.subheader("Example Documents")
|
1642 |
|
1643 |
-
# Add a
|
1644 |
st.markdown("""
|
1645 |
-
|
1646 |
-
|
1647 |
-
|
1648 |
-
|
1649 |
-
|
1650 |
-
margin-top: 20px;
|
1651 |
-
}
|
1652 |
-
.example-item {
|
1653 |
-
border-radius: 8px;
|
1654 |
-
overflow: hidden;
|
1655 |
-
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
1656 |
-
background: white;
|
1657 |
-
transition: transform 0.2s;
|
1658 |
-
height: 100%;
|
1659 |
-
}
|
1660 |
-
.example-item:hover {
|
1661 |
-
transform: translateY(-5px);
|
1662 |
-
box-shadow: 0 5px 15px rgba(0,0,0,0.1);
|
1663 |
-
}
|
1664 |
-
.example-image {
|
1665 |
-
width: 100%;
|
1666 |
-
aspect-ratio: 4/3;
|
1667 |
-
object-fit: cover;
|
1668 |
-
}
|
1669 |
-
.example-caption {
|
1670 |
-
padding: 10px;
|
1671 |
-
font-size: 14px;
|
1672 |
-
text-align: center;
|
1673 |
-
color: #333;
|
1674 |
-
}
|
1675 |
-
@media (max-width: 768px) {
|
1676 |
-
.example-grid {
|
1677 |
-
grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
|
1678 |
-
}
|
1679 |
-
}
|
1680 |
-
@media (max-width: 640px) {
|
1681 |
-
.example-grid {
|
1682 |
-
grid-template-columns: repeat(auto-fill, minmax(130px, 1fr));
|
1683 |
-
}
|
1684 |
-
}
|
1685 |
-
</style>
|
1686 |
-
""", unsafe_allow_html=True)
|
1687 |
|
1688 |
-
|
1689 |
-
|
1690 |
-
sample_images = []
|
1691 |
-
backup_dir = Path(__file__).parent / "backup" / "input"
|
1692 |
-
|
1693 |
-
if input_dir.exists():
|
1694 |
-
# Define images in specific order per requirements
|
1695 |
-
ordered_image_names = [
|
1696 |
-
"magellan-travels.jpg",
|
1697 |
-
"americae-retectio.jpg",
|
1698 |
-
"handwritten-letter.jpg",
|
1699 |
-
"milgram-flier.png",
|
1700 |
-
"recipe.jpg",
|
1701 |
-
"The Magician, or Bottle Cungerer.jpeg"
|
1702 |
-
]
|
1703 |
-
|
1704 |
-
# Create the image list in the desired order
|
1705 |
-
ordered_sample_images = []
|
1706 |
-
for img_name in ordered_image_names:
|
1707 |
-
img_path = input_dir / img_name
|
1708 |
-
if img_path.exists():
|
1709 |
-
ordered_sample_images.append(img_path)
|
1710 |
-
|
1711 |
-
# Use ordered images
|
1712 |
-
sample_images = ordered_sample_images
|
1713 |
-
|
1714 |
-
# Fill in with additional images if needed
|
1715 |
-
if len(sample_images) < 6:
|
1716 |
-
# Get all remaining images from input directory
|
1717 |
-
all_images = set(
|
1718 |
-
list(input_dir.glob("*.jpg")) +
|
1719 |
-
list(input_dir.glob("*.jpeg")) +
|
1720 |
-
list(input_dir.glob("*.png")) +
|
1721 |
-
list(input_dir.glob("*.tif"))
|
1722 |
-
)
|
1723 |
-
|
1724 |
-
# Remove the already selected images
|
1725 |
-
remaining_images = [img for img in all_images if img not in sample_images]
|
1726 |
-
|
1727 |
-
# Add remaining images to fill the grid
|
1728 |
-
sample_images.extend(remaining_images[:6-len(sample_images)])
|
1729 |
-
|
1730 |
-
# Try backup directory if still not enough
|
1731 |
-
if len(sample_images) < 6 and backup_dir.exists():
|
1732 |
-
remaining = 6 - len(sample_images)
|
1733 |
-
backup_samples = (
|
1734 |
-
list(backup_dir.glob("*.jpg")) +
|
1735 |
-
list(backup_dir.glob("*.jpeg")) +
|
1736 |
-
list(backup_dir.glob("*.png"))
|
1737 |
-
)[:remaining]
|
1738 |
-
sample_images.extend(backup_samples)
|
1739 |
|
1740 |
-
#
|
1741 |
-
|
1742 |
-
# Start the grid container
|
1743 |
-
grid_html = '<div class="example-grid">'
|
1744 |
-
|
1745 |
-
# Add each example to the grid
|
1746 |
-
for img_path in sample_images:
|
1747 |
-
if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
|
1748 |
-
try:
|
1749 |
-
# Convert image to base64 for embedding
|
1750 |
-
with open(img_path, "rb") as img_file:
|
1751 |
-
img_data = base64.b64encode(img_file.read()).decode()
|
1752 |
-
|
1753 |
-
# Create item HTML with optimized responsive layout
|
1754 |
-
img_name = img_path.name
|
1755 |
-
# Trim long names
|
1756 |
-
display_name = img_name[:18] + "..." if len(img_name) > 20 else img_name
|
1757 |
-
|
1758 |
-
grid_html += f'''
|
1759 |
-
<div class="example-item">
|
1760 |
-
<img src="data:image/{img_path.suffix.lower().replace('.', '')};base64,{img_data}"
|
1761 |
-
alt="{img_name}" class="example-image">
|
1762 |
-
<div class="example-caption">{display_name}</div>
|
1763 |
-
</div>
|
1764 |
-
'''
|
1765 |
-
except Exception as e:
|
1766 |
-
# Fallback for any loading errors
|
1767 |
-
grid_html += f'''
|
1768 |
-
<div class="example-item">
|
1769 |
-
<div style="height:150px;display:flex;align-items:center;justify-content:center;background:#f8f9fa;">
|
1770 |
-
<span style="color:#666;">Image Error</span>
|
1771 |
-
</div>
|
1772 |
-
<div class="example-caption">{img_path.name}</div>
|
1773 |
-
</div>
|
1774 |
-
'''
|
1775 |
-
else:
|
1776 |
-
# For PDFs, show placeholder
|
1777 |
-
grid_html += f'''
|
1778 |
-
<div class="example-item">
|
1779 |
-
<div style="height:150px;display:flex;align-items:center;justify-content:center;background:#f8f9fa;">
|
1780 |
-
<span style="font-size:32px;">📄</span>
|
1781 |
-
</div>
|
1782 |
-
<div class="example-caption">{img_path.name}</div>
|
1783 |
-
</div>
|
1784 |
-
'''
|
1785 |
-
|
1786 |
-
# Close the grid container
|
1787 |
-
grid_html += '</div>'
|
1788 |
-
|
1789 |
-
# Render the grid with unsafe_allow_html
|
1790 |
-
st.markdown(grid_html, unsafe_allow_html=True)
|
1791 |
-
else:
|
1792 |
-
st.info("No example documents found. Upload your own document to get started.")# Minor update
|
|
|
1153 |
# Standard processing without custom prompt
|
1154 |
result = process_file(uploaded_file, use_vision, preprocessing_options, progress_container=progress_placeholder)
|
1155 |
|
1156 |
+
# Document results will be shown in the right column
|
1157 |
with right_col:
|
1158 |
+
# Initial placeholder for the document title/heading
|
1159 |
+
st.markdown(f"## Document: {result.get('file_name', uploaded_file.name)}")
|
|
|
|
|
1160 |
|
1161 |
+
# Display Document Contents section
|
1162 |
+
st.subheader("Document Contents")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1163 |
|
1164 |
+
# Add Document Metadata section inside the content area
|
1165 |
+
with st.expander("Document Metadata", expanded=True):
|
1166 |
+
# Create a clean metadata container
|
1167 |
+
st.markdown('<div class="metadata-container">', unsafe_allow_html=True)
|
1168 |
+
|
1169 |
+
# Display file info
|
1170 |
+
st.write(f"**File Name:** {result.get('file_name', uploaded_file.name)}")
|
1171 |
+
|
1172 |
+
# Display info if only limited pages were processed
|
1173 |
+
if 'limited_pages' in result:
|
1174 |
+
st.info(f"Processed {result['limited_pages']['processed']} of {result['limited_pages']['total']} pages")
|
1175 |
+
|
1176 |
+
# Display languages if available
|
1177 |
+
if 'languages' in result:
|
1178 |
+
languages = [lang for lang in result['languages'] if lang is not None]
|
1179 |
+
if languages:
|
1180 |
+
st.write(f"**Languages:** {', '.join(languages)}")
|
1181 |
+
|
1182 |
+
# Display topics if available
|
1183 |
+
if 'topics' in result and result['topics']:
|
1184 |
+
st.write(f"**Topics:** {', '.join(result['topics'])}")
|
1185 |
+
|
1186 |
+
# Processing time if available
|
1187 |
+
if 'processing_time' in result:
|
1188 |
+
proc_time = result['processing_time']
|
1189 |
+
st.write(f"**Processing Time:** {proc_time:.1f}s")
|
1190 |
+
|
1191 |
+
# Close the metadata container
|
1192 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
1193 |
|
|
|
|
|
1194 |
# Start document content div with consistent styling class
|
1195 |
st.markdown('<div class="document-content">', unsafe_allow_html=True)
|
1196 |
if 'ocr_contents' in result:
|
|
|
1641 |
else:
|
1642 |
# Empty placeholder - we've moved the upload instruction to the file_uploader
|
1643 |
|
1644 |
+
# Show example images in a simpler layout
|
1645 |
st.subheader("Example Documents")
|
1646 |
|
1647 |
+
# Add a simplified info message about examples
|
1648 |
st.markdown("""
|
1649 |
+
This app can process various historical documents:
|
1650 |
+
- Historical photographs, maps, and manuscripts
|
1651 |
+
- Handwritten letters and documents
|
1652 |
+
- Printed books and articles
|
1653 |
+
- Multi-page PDFs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1654 |
|
1655 |
+
Upload your own document to get started or explore the 'About' tab for more information.
|
1656 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1657 |
|
1658 |
+
# Display a direct message about sample documents
|
1659 |
+
st.info("Sample documents are available in the input directory. Upload a document to begin analysis.")# Minor update
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|