Spaces:
Running
Running
Fix document metadata white bar issue and JSON parsing error
Browse files- app.py +50 -15
- structured_ocr.py +1 -1
- ui/custom.css +12 -3
app.py
CHANGED
@@ -800,17 +800,32 @@ with main_tab2:
|
|
800 |
|
801 |
with view_tab2:
|
802 |
# Show the raw JSON with an option to download it
|
803 |
-
|
|
|
|
|
|
|
|
|
|
|
804 |
|
805 |
# Add JSON download button
|
806 |
-
|
807 |
-
|
808 |
-
|
809 |
-
|
810 |
-
|
811 |
-
|
812 |
-
|
813 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
814 |
|
815 |
if has_images and 'pages_data' in selected_result:
|
816 |
with view_tab3:
|
@@ -1138,8 +1153,10 @@ with main_tab1:
|
|
1138 |
|
1139 |
# Document Metadata in the top right of the right column
|
1140 |
with right_col:
|
1141 |
-
#
|
1142 |
-
st.
|
|
|
|
|
1143 |
|
1144 |
# Display file info
|
1145 |
st.write(f"**File Name:** {result.get('file_name', uploaded_file.name)}")
|
@@ -1240,14 +1257,22 @@ with main_tab1:
|
|
1240 |
html_list += f"<li>{item}</li>"
|
1241 |
st.write(f"- {item}")
|
1242 |
elif isinstance(item, dict):
|
1243 |
-
|
|
|
|
|
|
|
|
|
1244 |
else:
|
1245 |
for item in content:
|
1246 |
if isinstance(item, str):
|
1247 |
html_list += f"<li>{item}</li>"
|
1248 |
st.write(f"- {item}")
|
1249 |
elif isinstance(item, dict):
|
1250 |
-
|
|
|
|
|
|
|
|
|
1251 |
html_list += "</ul>"
|
1252 |
html_content += html_list
|
1253 |
elif isinstance(content, dict):
|
@@ -1276,9 +1301,19 @@ with main_tab1:
|
|
1276 |
# Show the raw JSON for developers, with an expander for large results
|
1277 |
if len(json.dumps(result)) > 5000:
|
1278 |
with st.expander("View full JSON"):
|
1279 |
-
|
|
|
|
|
|
|
|
|
|
|
1280 |
else:
|
1281 |
-
|
|
|
|
|
|
|
|
|
|
|
1282 |
|
1283 |
if has_images and 'pages_data' in result:
|
1284 |
with view_tab3:
|
|
|
800 |
|
801 |
with view_tab2:
|
802 |
# Show the raw JSON with an option to download it
|
803 |
+
try:
|
804 |
+
st.json(selected_result)
|
805 |
+
except Exception as e:
|
806 |
+
st.error(f"Error displaying JSON: {str(e)}")
|
807 |
+
# Try a safer approach with string representation
|
808 |
+
st.code(str(selected_result))
|
809 |
|
810 |
# Add JSON download button
|
811 |
+
try:
|
812 |
+
json_str = json.dumps(selected_result, indent=2)
|
813 |
+
filename = selected_result.get('file_name', 'document').split('.')[0]
|
814 |
+
st.download_button(
|
815 |
+
label="Download JSON",
|
816 |
+
data=json_str,
|
817 |
+
file_name=f"{filename}_data.json",
|
818 |
+
mime="application/json"
|
819 |
+
)
|
820 |
+
except Exception as e:
|
821 |
+
st.error(f"Error creating JSON download: {str(e)}")
|
822 |
+
# Fallback to string representation for download
|
823 |
+
st.download_button(
|
824 |
+
label="Download as Text",
|
825 |
+
data=str(selected_result),
|
826 |
+
file_name=f"{filename}_data.txt",
|
827 |
+
mime="text/plain"
|
828 |
+
)
|
829 |
|
830 |
if has_images and 'pages_data' in selected_result:
|
831 |
with view_tab3:
|
|
|
1153 |
|
1154 |
# Document Metadata in the top right of the right column
|
1155 |
with right_col:
|
1156 |
+
# Add the subheader separately to avoid the white bar
|
1157 |
+
st.subheader("Document Metadata")
|
1158 |
+
# Create a clean metadata container
|
1159 |
+
st.markdown('<div class="metadata-container">', unsafe_allow_html=True)
|
1160 |
|
1161 |
# Display file info
|
1162 |
st.write(f"**File Name:** {result.get('file_name', uploaded_file.name)}")
|
|
|
1257 |
html_list += f"<li>{item}</li>"
|
1258 |
st.write(f"- {item}")
|
1259 |
elif isinstance(item, dict):
|
1260 |
+
try:
|
1261 |
+
st.json(item)
|
1262 |
+
except Exception as e:
|
1263 |
+
st.error(f"Error displaying JSON: {str(e)}")
|
1264 |
+
st.code(str(item))
|
1265 |
else:
|
1266 |
for item in content:
|
1267 |
if isinstance(item, str):
|
1268 |
html_list += f"<li>{item}</li>"
|
1269 |
st.write(f"- {item}")
|
1270 |
elif isinstance(item, dict):
|
1271 |
+
try:
|
1272 |
+
st.json(item)
|
1273 |
+
except Exception as e:
|
1274 |
+
st.error(f"Error displaying JSON: {str(e)}")
|
1275 |
+
st.code(str(item))
|
1276 |
html_list += "</ul>"
|
1277 |
html_content += html_list
|
1278 |
elif isinstance(content, dict):
|
|
|
1301 |
# Show the raw JSON for developers, with an expander for large results
|
1302 |
if len(json.dumps(result)) > 5000:
|
1303 |
with st.expander("View full JSON"):
|
1304 |
+
try:
|
1305 |
+
st.json(result)
|
1306 |
+
except Exception as e:
|
1307 |
+
st.error(f"Error displaying JSON: {str(e)}")
|
1308 |
+
# Fallback to string representation
|
1309 |
+
st.code(str(result))
|
1310 |
else:
|
1311 |
+
try:
|
1312 |
+
st.json(result)
|
1313 |
+
except Exception as e:
|
1314 |
+
st.error(f"Error displaying JSON: {str(e)}")
|
1315 |
+
# Fallback to string representation
|
1316 |
+
st.code(str(result))
|
1317 |
|
1318 |
if has_images and 'pages_data' in result:
|
1319 |
with view_tab3:
|
structured_ocr.py
CHANGED
@@ -1669,7 +1669,7 @@ class StructuredOCR:
|
|
1669 |
"languages": ["English"],
|
1670 |
"ocr_contents": {
|
1671 |
"raw_text": ocr_markdown[:50000] if ocr_markdown else "No text could be extracted",
|
1672 |
-
"error": f"AI processing failed: {str(e)}"
|
1673 |
},
|
1674 |
"processing_method": "fallback",
|
1675 |
"processing_error": str(e),
|
|
|
1669 |
"languages": ["English"],
|
1670 |
"ocr_contents": {
|
1671 |
"raw_text": ocr_markdown[:50000] if ocr_markdown else "No text could be extracted",
|
1672 |
+
"error": f"AI processing failed: {str(e).replace('\"', '\\"')}"
|
1673 |
},
|
1674 |
"processing_method": "fallback",
|
1675 |
"processing_error": str(e),
|
ui/custom.css
CHANGED
@@ -99,7 +99,7 @@
|
|
99 |
border-radius: 4px;
|
100 |
padding: 12px;
|
101 |
margin-bottom: 20px;
|
102 |
-
margin-top:
|
103 |
border-left: 3px solid #4285f4;
|
104 |
}
|
105 |
|
@@ -111,15 +111,24 @@
|
|
111 |
/* Fix spacing for headings above metadata container */
|
112 |
.element-container h3 + div .metadata-container,
|
113 |
.element-container h1 + div .metadata-container,
|
114 |
-
.element-container h2 + div .metadata-container
|
|
|
115 |
margin-top: 0 !important;
|
116 |
}
|
117 |
|
|
|
|
|
|
|
|
|
|
|
118 |
/* Remove excess space between metadata heading and content */
|
119 |
.stMarkdown + div div.element-container,
|
120 |
.stMarkdown + div,
|
121 |
.stHeading + div,
|
122 |
-
.stHeading + div div.element-container
|
|
|
|
|
|
|
123 |
margin-top: 0 !important;
|
124 |
padding-top: 0 !important;
|
125 |
}
|
|
|
99 |
border-radius: 4px;
|
100 |
padding: 12px;
|
101 |
margin-bottom: 20px;
|
102 |
+
margin-top: -10px !important; /* Negative margin to reduce gap with header */
|
103 |
border-left: 3px solid #4285f4;
|
104 |
}
|
105 |
|
|
|
111 |
/* Fix spacing for headings above metadata container */
|
112 |
.element-container h3 + div .metadata-container,
|
113 |
.element-container h1 + div .metadata-container,
|
114 |
+
.element-container h2 + div .metadata-container,
|
115 |
+
.stHeading + div div {
|
116 |
margin-top: 0 !important;
|
117 |
}
|
118 |
|
119 |
+
/* Fix for subheader and metadata container spacing */
|
120 |
+
.stHeading ~ div {
|
121 |
+
margin-top: -10px !important;
|
122 |
+
}
|
123 |
+
|
124 |
/* Remove excess space between metadata heading and content */
|
125 |
.stMarkdown + div div.element-container,
|
126 |
.stMarkdown + div,
|
127 |
.stHeading + div,
|
128 |
+
.stHeading + div div.element-container,
|
129 |
+
header + div.stMarkdown + div,
|
130 |
+
[data-testid="stHeader"] + div,
|
131 |
+
.stHeading + * {
|
132 |
margin-top: 0 !important;
|
133 |
padding-top: 0 !important;
|
134 |
}
|