milwright commited on
Commit
2a692ed
·
1 Parent(s): 7397882

Fix document metadata white bar issue and JSON parsing error

Browse files
Files changed (3) hide show
  1. app.py +50 -15
  2. structured_ocr.py +1 -1
  3. ui/custom.css +12 -3
app.py CHANGED
@@ -800,17 +800,32 @@ with main_tab2:
800
 
801
  with view_tab2:
802
  # Show the raw JSON with an option to download it
803
- st.json(selected_result)
 
 
 
 
 
804
 
805
  # Add JSON download button
806
- json_str = json.dumps(selected_result, indent=2)
807
- filename = selected_result.get('file_name', 'document').split('.')[0]
808
- st.download_button(
809
- label="Download JSON",
810
- data=json_str,
811
- file_name=f"{filename}_data.json",
812
- mime="application/json"
813
- )
 
 
 
 
 
 
 
 
 
 
814
 
815
  if has_images and 'pages_data' in selected_result:
816
  with view_tab3:
@@ -1138,8 +1153,10 @@ with main_tab1:
1138
 
1139
  # Document Metadata in the top right of the right column
1140
  with right_col:
1141
- # Directly add the subheader and container without nesting - fix white container
1142
- st.markdown('<h3 style="margin-bottom:0; padding-bottom:0;">Document Metadata</h3><div class="metadata-container" style="margin-top:0;">', unsafe_allow_html=True)
 
 
1143
 
1144
  # Display file info
1145
  st.write(f"**File Name:** {result.get('file_name', uploaded_file.name)}")
@@ -1240,14 +1257,22 @@ with main_tab1:
1240
  html_list += f"<li>{item}</li>"
1241
  st.write(f"- {item}")
1242
  elif isinstance(item, dict):
1243
- st.json(item)
 
 
 
 
1244
  else:
1245
  for item in content:
1246
  if isinstance(item, str):
1247
  html_list += f"<li>{item}</li>"
1248
  st.write(f"- {item}")
1249
  elif isinstance(item, dict):
1250
- st.json(item)
 
 
 
 
1251
  html_list += "</ul>"
1252
  html_content += html_list
1253
  elif isinstance(content, dict):
@@ -1276,9 +1301,19 @@ with main_tab1:
1276
  # Show the raw JSON for developers, with an expander for large results
1277
  if len(json.dumps(result)) > 5000:
1278
  with st.expander("View full JSON"):
1279
- st.json(result)
 
 
 
 
 
1280
  else:
1281
- st.json(result)
 
 
 
 
 
1282
 
1283
  if has_images and 'pages_data' in result:
1284
  with view_tab3:
 
800
 
801
  with view_tab2:
802
  # Show the raw JSON with an option to download it
803
+ try:
804
+ st.json(selected_result)
805
+ except Exception as e:
806
+ st.error(f"Error displaying JSON: {str(e)}")
807
+ # Try a safer approach with string representation
808
+ st.code(str(selected_result))
809
 
810
  # Add JSON download button
811
+ try:
812
+ json_str = json.dumps(selected_result, indent=2)
813
+ filename = selected_result.get('file_name', 'document').split('.')[0]
814
+ st.download_button(
815
+ label="Download JSON",
816
+ data=json_str,
817
+ file_name=f"{filename}_data.json",
818
+ mime="application/json"
819
+ )
820
+ except Exception as e:
821
+ st.error(f"Error creating JSON download: {str(e)}")
822
+ # Fallback to string representation for download
823
+ st.download_button(
824
+ label="Download as Text",
825
+ data=str(selected_result),
826
+ file_name=f"{filename}_data.txt",
827
+ mime="text/plain"
828
+ )
829
 
830
  if has_images and 'pages_data' in selected_result:
831
  with view_tab3:
 
1153
 
1154
  # Document Metadata in the top right of the right column
1155
  with right_col:
1156
+ # Add the subheader separately to avoid the white bar
1157
+ st.subheader("Document Metadata")
1158
+ # Create a clean metadata container
1159
+ st.markdown('<div class="metadata-container">', unsafe_allow_html=True)
1160
 
1161
  # Display file info
1162
  st.write(f"**File Name:** {result.get('file_name', uploaded_file.name)}")
 
1257
  html_list += f"<li>{item}</li>"
1258
  st.write(f"- {item}")
1259
  elif isinstance(item, dict):
1260
+ try:
1261
+ st.json(item)
1262
+ except Exception as e:
1263
+ st.error(f"Error displaying JSON: {str(e)}")
1264
+ st.code(str(item))
1265
  else:
1266
  for item in content:
1267
  if isinstance(item, str):
1268
  html_list += f"<li>{item}</li>"
1269
  st.write(f"- {item}")
1270
  elif isinstance(item, dict):
1271
+ try:
1272
+ st.json(item)
1273
+ except Exception as e:
1274
+ st.error(f"Error displaying JSON: {str(e)}")
1275
+ st.code(str(item))
1276
  html_list += "</ul>"
1277
  html_content += html_list
1278
  elif isinstance(content, dict):
 
1301
  # Show the raw JSON for developers, with an expander for large results
1302
  if len(json.dumps(result)) > 5000:
1303
  with st.expander("View full JSON"):
1304
+ try:
1305
+ st.json(result)
1306
+ except Exception as e:
1307
+ st.error(f"Error displaying JSON: {str(e)}")
1308
+ # Fallback to string representation
1309
+ st.code(str(result))
1310
  else:
1311
+ try:
1312
+ st.json(result)
1313
+ except Exception as e:
1314
+ st.error(f"Error displaying JSON: {str(e)}")
1315
+ # Fallback to string representation
1316
+ st.code(str(result))
1317
 
1318
  if has_images and 'pages_data' in result:
1319
  with view_tab3:
structured_ocr.py CHANGED
@@ -1669,7 +1669,7 @@ class StructuredOCR:
1669
  "languages": ["English"],
1670
  "ocr_contents": {
1671
  "raw_text": ocr_markdown[:50000] if ocr_markdown else "No text could be extracted",
1672
- "error": f"AI processing failed: {str(e)}"
1673
  },
1674
  "processing_method": "fallback",
1675
  "processing_error": str(e),
 
1669
  "languages": ["English"],
1670
  "ocr_contents": {
1671
  "raw_text": ocr_markdown[:50000] if ocr_markdown else "No text could be extracted",
1672
+ "error": f"AI processing failed: {str(e).replace('\"', '\\"')}"
1673
  },
1674
  "processing_method": "fallback",
1675
  "processing_error": str(e),
ui/custom.css CHANGED
@@ -99,7 +99,7 @@
99
  border-radius: 4px;
100
  padding: 12px;
101
  margin-bottom: 20px;
102
- margin-top: 0px !important;
103
  border-left: 3px solid #4285f4;
104
  }
105
 
@@ -111,15 +111,24 @@
111
  /* Fix spacing for headings above metadata container */
112
  .element-container h3 + div .metadata-container,
113
  .element-container h1 + div .metadata-container,
114
- .element-container h2 + div .metadata-container {
 
115
  margin-top: 0 !important;
116
  }
117
 
 
 
 
 
 
118
  /* Remove excess space between metadata heading and content */
119
  .stMarkdown + div div.element-container,
120
  .stMarkdown + div,
121
  .stHeading + div,
122
- .stHeading + div div.element-container {
 
 
 
123
  margin-top: 0 !important;
124
  padding-top: 0 !important;
125
  }
 
99
  border-radius: 4px;
100
  padding: 12px;
101
  margin-bottom: 20px;
102
+ margin-top: -10px !important; /* Negative margin to reduce gap with header */
103
  border-left: 3px solid #4285f4;
104
  }
105
 
 
111
  /* Fix spacing for headings above metadata container */
112
  .element-container h3 + div .metadata-container,
113
  .element-container h1 + div .metadata-container,
114
+ .element-container h2 + div .metadata-container,
115
+ .stHeading + div div {
116
  margin-top: 0 !important;
117
  }
118
 
119
+ /* Fix for subheader and metadata container spacing */
120
+ .stHeading ~ div {
121
+ margin-top: -10px !important;
122
+ }
123
+
124
  /* Remove excess space between metadata heading and content */
125
  .stMarkdown + div div.element-container,
126
  .stMarkdown + div,
127
  .stHeading + div,
128
+ .stHeading + div div.element-container,
129
+ header + div.stMarkdown + div,
130
+ [data-testid="stHeader"] + div,
131
+ .stHeading + * {
132
  margin-top: 0 !important;
133
  padding-top: 0 !important;
134
  }