Spaces:
Running
Running
Fix syntax error and responsive layout issues
Browse files- Fix unterminated string literal in structured_ocr.py line 1672
- Redesign example documents section with CSS grid for proper responsive layout
- Fix narrow viewport issues to prevent column 1 and 2 overlap
- Enhance preprocessing preview with responsive design
- app.py +152 -99
- structured_ocr.py +1 -1
- ui/custom.css +30 -0
app.py
CHANGED
@@ -964,11 +964,13 @@ with main_tab1:
|
|
964 |
if any(preprocessing_options.values()) and uploaded_file.type.startswith('image/'):
|
965 |
st.markdown("**Preprocessed Preview**")
|
966 |
try:
|
967 |
-
|
968 |
-
|
969 |
-
|
|
|
|
|
970 |
|
971 |
-
# Show preprocessing metadata
|
972 |
meta_items = []
|
973 |
if preprocessing_options.get("document_type", "standard") != "standard":
|
974 |
meta_items.append(f"Document type ({preprocessing_options['document_type']})")
|
@@ -1635,105 +1637,156 @@ with main_tab1:
|
|
1635 |
else:
|
1636 |
# Empty placeholder - we've moved the upload instruction to the file_uploader
|
1637 |
|
1638 |
-
# Show example images in a grid
|
1639 |
st.subheader("Example Documents")
|
1640 |
|
1641 |
-
# Add a
|
1642 |
-
|
1643 |
-
|
1644 |
-
|
1645 |
-
|
1646 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1647 |
|
1648 |
-
|
1649 |
-
|
1650 |
-
|
1651 |
-
|
1652 |
-
|
1653 |
-
|
1654 |
-
|
1655 |
-
|
1656 |
-
|
1657 |
-
|
1658 |
-
|
1659 |
-
|
1660 |
-
|
1661 |
-
|
1662 |
-
|
1663 |
-
|
1664 |
-
|
1665 |
-
|
1666 |
-
|
1667 |
|
1668 |
-
#
|
1669 |
-
|
1670 |
|
1671 |
-
#
|
1672 |
-
|
1673 |
-
# Get all remaining images from input directory
|
1674 |
-
all_images = set(
|
1675 |
-
list(input_dir.glob("*.jpg")) +
|
1676 |
-
list(input_dir.glob("*.jpeg")) +
|
1677 |
-
list(input_dir.glob("*.png")) +
|
1678 |
-
list(input_dir.glob("*.tif"))
|
1679 |
-
)
|
1680 |
-
|
1681 |
-
# Remove the already selected images
|
1682 |
-
remaining_images = [img for img in all_images if img not in sample_images]
|
1683 |
-
|
1684 |
-
# Add remaining images to fill the grid
|
1685 |
-
sample_images.extend(remaining_images[:6-len(sample_images)])
|
1686 |
-
|
1687 |
-
# If still not enough, try backup directory
|
1688 |
-
if len(sample_images) < 6 and backup_dir.exists():
|
1689 |
-
remaining = 6 - len(sample_images)
|
1690 |
-
backup_samples = (
|
1691 |
-
list(backup_dir.glob("*.jpg")) +
|
1692 |
-
list(backup_dir.glob("*.jpeg")) +
|
1693 |
-
list(backup_dir.glob("*.png"))
|
1694 |
-
)[:remaining]
|
1695 |
-
sample_images.extend(backup_samples)
|
1696 |
|
1697 |
-
if
|
1698 |
-
|
1699 |
-
|
1700 |
-
|
1701 |
-
|
1702 |
-
|
1703 |
-
|
1704 |
-
|
1705 |
-
|
1706 |
-
|
1707 |
-
|
1708 |
-
|
1709 |
-
|
1710 |
-
|
1711 |
-
|
1712 |
-
|
1713 |
-
|
1714 |
-
|
1715 |
-
|
1716 |
-
|
1717 |
-
|
1718 |
-
|
1719 |
-
|
1720 |
-
|
1721 |
-
|
1722 |
-
|
1723 |
-
|
1724 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1725 |
else:
|
1726 |
-
#
|
1727 |
-
|
1728 |
-
|
1729 |
-
|
1730 |
-
|
1731 |
-
|
1732 |
-
|
1733 |
-
|
1734 |
-
|
1735 |
-
|
1736 |
-
|
1737 |
-
|
1738 |
-
|
1739 |
-
|
|
|
|
|
|
|
|
964 |
if any(preprocessing_options.values()) and uploaded_file.type.startswith('image/'):
|
965 |
st.markdown("**Preprocessed Preview**")
|
966 |
try:
|
967 |
+
# Create a container for the preview to better control layout
|
968 |
+
with st.container():
|
969 |
+
processed_bytes = preprocess_image(uploaded_file.getvalue(), preprocessing_options)
|
970 |
+
# Use use_column_width=True for responsive design
|
971 |
+
st.image(io.BytesIO(processed_bytes), use_column_width=True)
|
972 |
|
973 |
+
# Show preprocessing metadata in a well-formatted caption
|
974 |
meta_items = []
|
975 |
if preprocessing_options.get("document_type", "standard") != "standard":
|
976 |
meta_items.append(f"Document type ({preprocessing_options['document_type']})")
|
|
|
1637 |
else:
|
1638 |
# Empty placeholder - we've moved the upload instruction to the file_uploader
|
1639 |
|
1640 |
+
# Show example images in a responsive grid
|
1641 |
st.subheader("Example Documents")
|
1642 |
|
1643 |
+
# Add a dedicated container with custom styling for the examples
|
1644 |
+
st.markdown("""
|
1645 |
+
<style>
|
1646 |
+
.example-grid {
|
1647 |
+
display: grid;
|
1648 |
+
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
1649 |
+
gap: 20px;
|
1650 |
+
margin-top: 20px;
|
1651 |
+
}
|
1652 |
+
.example-item {
|
1653 |
+
border-radius: 8px;
|
1654 |
+
overflow: hidden;
|
1655 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
1656 |
+
background: white;
|
1657 |
+
transition: transform 0.2s;
|
1658 |
+
height: 100%;
|
1659 |
+
}
|
1660 |
+
.example-item:hover {
|
1661 |
+
transform: translateY(-5px);
|
1662 |
+
box-shadow: 0 5px 15px rgba(0,0,0,0.1);
|
1663 |
+
}
|
1664 |
+
.example-image {
|
1665 |
+
width: 100%;
|
1666 |
+
aspect-ratio: 4/3;
|
1667 |
+
object-fit: cover;
|
1668 |
+
}
|
1669 |
+
.example-caption {
|
1670 |
+
padding: 10px;
|
1671 |
+
font-size: 14px;
|
1672 |
+
text-align: center;
|
1673 |
+
color: #333;
|
1674 |
+
}
|
1675 |
+
@media (max-width: 768px) {
|
1676 |
+
.example-grid {
|
1677 |
+
grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
|
1678 |
+
}
|
1679 |
+
}
|
1680 |
+
@media (max-width: 640px) {
|
1681 |
+
.example-grid {
|
1682 |
+
grid-template-columns: repeat(auto-fill, minmax(130px, 1fr));
|
1683 |
+
}
|
1684 |
+
}
|
1685 |
+
</style>
|
1686 |
+
""", unsafe_allow_html=True)
|
1687 |
+
|
1688 |
+
# Find sample images from the input directory
|
1689 |
+
input_dir = Path(__file__).parent / "input"
|
1690 |
+
sample_images = []
|
1691 |
+
backup_dir = Path(__file__).parent / "backup" / "input"
|
1692 |
+
|
1693 |
+
if input_dir.exists():
|
1694 |
+
# Define images in specific order per requirements
|
1695 |
+
ordered_image_names = [
|
1696 |
+
"magellan-travels.jpg",
|
1697 |
+
"americae-retectio.jpg",
|
1698 |
+
"handwritten-letter.jpg",
|
1699 |
+
"milgram-flier.png",
|
1700 |
+
"recipe.jpg",
|
1701 |
+
"The Magician, or Bottle Cungerer.jpeg"
|
1702 |
+
]
|
1703 |
|
1704 |
+
# Create the image list in the desired order
|
1705 |
+
ordered_sample_images = []
|
1706 |
+
for img_name in ordered_image_names:
|
1707 |
+
img_path = input_dir / img_name
|
1708 |
+
if img_path.exists():
|
1709 |
+
ordered_sample_images.append(img_path)
|
1710 |
+
|
1711 |
+
# Use ordered images
|
1712 |
+
sample_images = ordered_sample_images
|
1713 |
+
|
1714 |
+
# Fill in with additional images if needed
|
1715 |
+
if len(sample_images) < 6:
|
1716 |
+
# Get all remaining images from input directory
|
1717 |
+
all_images = set(
|
1718 |
+
list(input_dir.glob("*.jpg")) +
|
1719 |
+
list(input_dir.glob("*.jpeg")) +
|
1720 |
+
list(input_dir.glob("*.png")) +
|
1721 |
+
list(input_dir.glob("*.tif"))
|
1722 |
+
)
|
1723 |
|
1724 |
+
# Remove the already selected images
|
1725 |
+
remaining_images = [img for img in all_images if img not in sample_images]
|
1726 |
|
1727 |
+
# Add remaining images to fill the grid
|
1728 |
+
sample_images.extend(remaining_images[:6-len(sample_images)])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1729 |
|
1730 |
+
# Try backup directory if still not enough
|
1731 |
+
if len(sample_images) < 6 and backup_dir.exists():
|
1732 |
+
remaining = 6 - len(sample_images)
|
1733 |
+
backup_samples = (
|
1734 |
+
list(backup_dir.glob("*.jpg")) +
|
1735 |
+
list(backup_dir.glob("*.jpeg")) +
|
1736 |
+
list(backup_dir.glob("*.png"))
|
1737 |
+
)[:remaining]
|
1738 |
+
sample_images.extend(backup_samples)
|
1739 |
+
|
1740 |
+
# Render the examples using custom HTML for better layout control
|
1741 |
+
if sample_images:
|
1742 |
+
# Start the grid container
|
1743 |
+
grid_html = '<div class="example-grid">'
|
1744 |
+
|
1745 |
+
# Add each example to the grid
|
1746 |
+
for img_path in sample_images:
|
1747 |
+
if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
|
1748 |
+
try:
|
1749 |
+
# Convert image to base64 for embedding
|
1750 |
+
with open(img_path, "rb") as img_file:
|
1751 |
+
img_data = base64.b64encode(img_file.read()).decode()
|
1752 |
+
|
1753 |
+
# Create item HTML with optimized responsive layout
|
1754 |
+
img_name = img_path.name
|
1755 |
+
# Trim long names
|
1756 |
+
display_name = img_name[:18] + "..." if len(img_name) > 20 else img_name
|
1757 |
+
|
1758 |
+
grid_html += f'''
|
1759 |
+
<div class="example-item">
|
1760 |
+
<img src="data:image/{img_path.suffix.lower().replace('.', '')};base64,{img_data}"
|
1761 |
+
alt="{img_name}" class="example-image">
|
1762 |
+
<div class="example-caption">{display_name}</div>
|
1763 |
+
</div>
|
1764 |
+
'''
|
1765 |
+
except Exception as e:
|
1766 |
+
# Fallback for any loading errors
|
1767 |
+
grid_html += f'''
|
1768 |
+
<div class="example-item">
|
1769 |
+
<div style="height:150px;display:flex;align-items:center;justify-content:center;background:#f8f9fa;">
|
1770 |
+
<span style="color:#666;">Image Error</span>
|
1771 |
+
</div>
|
1772 |
+
<div class="example-caption">{img_path.name}</div>
|
1773 |
+
</div>
|
1774 |
+
'''
|
1775 |
else:
|
1776 |
+
# For PDFs, show placeholder
|
1777 |
+
grid_html += f'''
|
1778 |
+
<div class="example-item">
|
1779 |
+
<div style="height:150px;display:flex;align-items:center;justify-content:center;background:#f8f9fa;">
|
1780 |
+
<span style="font-size:32px;">📄</span>
|
1781 |
+
</div>
|
1782 |
+
<div class="example-caption">{img_path.name}</div>
|
1783 |
+
</div>
|
1784 |
+
'''
|
1785 |
+
|
1786 |
+
# Close the grid container
|
1787 |
+
grid_html += '</div>'
|
1788 |
+
|
1789 |
+
# Render the grid with unsafe_allow_html
|
1790 |
+
st.markdown(grid_html, unsafe_allow_html=True)
|
1791 |
+
else:
|
1792 |
+
st.info("No example documents found. Upload your own document to get started.")# Minor update
|
structured_ocr.py
CHANGED
@@ -1669,7 +1669,7 @@ class StructuredOCR:
|
|
1669 |
"languages": ["English"],
|
1670 |
"ocr_contents": {
|
1671 |
"raw_text": ocr_markdown[:50000] if ocr_markdown else "No text could be extracted",
|
1672 |
-
"error": f"AI processing failed: {str(e).replace('\"', '
|
1673 |
},
|
1674 |
"processing_method": "fallback",
|
1675 |
"processing_error": str(e),
|
|
|
1669 |
"languages": ["English"],
|
1670 |
"ocr_contents": {
|
1671 |
"raw_text": ocr_markdown[:50000] if ocr_markdown else "No text could be extracted",
|
1672 |
+
"error": f"AI processing failed: {str(e).replace('\"', '\\\"')}"
|
1673 |
},
|
1674 |
"processing_method": "fallback",
|
1675 |
"processing_error": str(e),
|
ui/custom.css
CHANGED
@@ -265,6 +265,16 @@ button.streamlit-expanderHeader {
|
|
265 |
gap: 20px !important;
|
266 |
margin-bottom: 20px !important;
|
267 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
}
|
269 |
|
270 |
/* Modern Streamlit styling - better responsive behavior */
|
@@ -282,10 +292,22 @@ button.streamlit-expanderHeader {
|
|
282 |
width: 100% !important;
|
283 |
}
|
284 |
|
|
|
285 |
[data-testid="column"] {
|
286 |
width: 100% !important;
|
287 |
flex: 1 1 100% !important;
|
288 |
padding: 0 !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
289 |
}
|
290 |
|
291 |
/* Make images more visible on small screens */
|
@@ -295,6 +317,14 @@ button.streamlit-expanderHeader {
|
|
295 |
width: 100% !important;
|
296 |
margin-bottom: 15px !important;
|
297 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
}
|
299 |
|
300 |
/* Fix image display in grid layout */
|
|
|
265 |
gap: 20px !important;
|
266 |
margin-bottom: 20px !important;
|
267 |
}
|
268 |
+
|
269 |
+
/* Force separate columns on mid-sized screens */
|
270 |
+
[data-testid="stHorizontalBlock"] {
|
271 |
+
flex-wrap: wrap !important;
|
272 |
+
}
|
273 |
+
|
274 |
+
[data-testid="stHorizontalBlock"] > div {
|
275 |
+
min-width: 45% !important;
|
276 |
+
flex: 1 1 45% !important;
|
277 |
+
}
|
278 |
}
|
279 |
|
280 |
/* Modern Streamlit styling - better responsive behavior */
|
|
|
292 |
width: 100% !important;
|
293 |
}
|
294 |
|
295 |
+
/* Critical fix for column display to prevent overlapping */
|
296 |
[data-testid="column"] {
|
297 |
width: 100% !important;
|
298 |
flex: 1 1 100% !important;
|
299 |
padding: 0 !important;
|
300 |
+
min-width: 100% !important;
|
301 |
+
max-width: 100% !important;
|
302 |
+
float: none !important;
|
303 |
+
clear: both !important;
|
304 |
+
display: block !important;
|
305 |
+
}
|
306 |
+
|
307 |
+
/* Enforce correct column layout for Streamlit's container elements */
|
308 |
+
div[data-testid="stHorizontalBlock"] {
|
309 |
+
flex-direction: column !important;
|
310 |
+
display: block !important;
|
311 |
}
|
312 |
|
313 |
/* Make images more visible on small screens */
|
|
|
317 |
width: 100% !important;
|
318 |
margin-bottom: 15px !important;
|
319 |
}
|
320 |
+
|
321 |
+
/* Fix example documents grid layout */
|
322 |
+
.stImage {
|
323 |
+
display: block !important;
|
324 |
+
margin-left: auto !important;
|
325 |
+
margin-right: auto !important;
|
326 |
+
width: 100% !important;
|
327 |
+
}
|
328 |
}
|
329 |
|
330 |
/* Fix image display in grid layout */
|