CCockrum commited on
Commit
1ce0089
·
verified ·
1 Parent(s): 627b03f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -14
app.py CHANGED
@@ -34,20 +34,20 @@ st.markdown("""
34
  border-radius: 12px;
35
  margin-bottom: 1rem;
36
  }
37
- .stAlert {
38
  background-color: #f0f0f5 !important;
39
- color: #D3D3D3 !important;
40
  padding: 1.25rem !important;
41
  font-size: 1rem !important;
42
  border-radius: 0.5rem !important;
43
  box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05) !important;
44
  }
45
  header[data-testid="stHeader"] {
46
- background-color: gray !important;
47
  }
48
  section[data-testid="stSidebar"] > div:first-child {
49
  background-color: #1A1A1A !important;
50
- color: #D3D3D3 !important;
51
  padding: 2rem 1.5rem 1.5rem 1.5rem !important;
52
  border-radius: 12px;
53
  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08);
@@ -60,16 +60,18 @@ st.markdown("""
60
  padding-left: 2rem !important;
61
  padding-right: 2rem !important;
62
  box-shadow: none !important;
63
-
64
  }
 
 
 
65
  </style>
66
  """, unsafe_allow_html=True)
67
 
68
  # Optional: Add a banner image (replace with your image URL)
69
- st.markdown('<img src="https://cdn-uploads.huggingface.co/production/uploads/67351c643fe51cb1aa28f2e5/7ThcAOjbuM8ajrP85bGs4.jpeg" class="banner">', unsafe_allow_html=True)
70
 
71
  # Streamlit app header
72
- st.title("LOC MetaDiscovery Agent")
73
  st.markdown("""
74
  This tool connects to the LOC API, retrieves metadata from a selected collection, and performs
75
  an analysis of metadata completeness, suggests enhancements, and identifies authority gaps.
@@ -88,11 +90,22 @@ st.sidebar.markdown("## Settings")
88
  selected = st.sidebar.selectbox("Select a collection", list(collections.keys()))
89
  search_query = collections[selected]
90
 
91
- # Use the main search endpoint (most reliable)
92
- collection_url = f"https://www.loc.gov/search/?q={search_query}&fo=json"
 
 
 
 
 
 
93
  st.sidebar.write(f"Selected Collection: {selected}")
94
  st.sidebar.markdown(f"<span style='color: lightgray;'>API URL: {collection_url}</span>", unsafe_allow_html=True)
95
 
 
 
 
 
 
96
 
97
  # Fetch data from LOC API with spoofed User-Agent header
98
  headers = {
@@ -155,11 +168,11 @@ def is_valid_date(value):
155
  return False
156
 
157
  if not metadata_df.empty:
158
- st.subheader("Retrieved Metadata Sample")
159
  st.dataframe(metadata_df.head())
160
 
161
  # Metadata completeness analysis (enhanced)
162
- st.subheader("Metadata Completeness Analysis")
163
  completeness = metadata_df.map(lambda x: not is_incomplete(x)).mean() * 100
164
  completeness_df = pd.DataFrame({"Field": completeness.index, "Completeness (%)": completeness.values})
165
  fig = px.bar(completeness_df, x="Field", y="Completeness (%)", title="Metadata Completeness by Field")
@@ -169,19 +182,19 @@ if not metadata_df.empty:
169
  incomplete_mask = metadata_df.map(is_incomplete).any(axis=1)
170
  incomplete_records = metadata_df[incomplete_mask]
171
 
172
- st.subheader("Records with Incomplete Metadata")
173
  if not incomplete_records.empty:
174
  st.dataframe(incomplete_records.astype(str))
175
  else:
176
  st.success("All metadata fields are complete in this collection!")
177
 
178
- st.subheader("Identifiers of Items Needing Metadata Updates")
179
  if not incomplete_records.empty:
180
  st.write(incomplete_records[['id', 'title']])
181
  else:
182
  st.success("All records are complete!")
183
 
184
- st.subheader("Suggested Metadata Enhancements")
185
  filled_descriptions = metadata_df[metadata_df['description'].notnull()]['description'].astype(str)
186
  if len(filled_descriptions) > 1:
187
  try:
 
34
  border-radius: 12px;
35
  margin-bottom: 1rem;
36
  }
37
+ .stAlert {
38
  background-color: #f0f0f5 !important;
39
+ color: #333333 !important;
40
  padding: 1.25rem !important;
41
  font-size: 1rem !important;
42
  border-radius: 0.5rem !important;
43
  box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05) !important;
44
  }
45
  header[data-testid="stHeader"] {
46
+ background-color: #D3D3D3 !important;
47
  }
48
  section[data-testid="stSidebar"] > div:first-child {
49
  background-color: #1A1A1A !important;
50
+ color: #FFFFFF !important;
51
  padding: 2rem 1.5rem 1.5rem 1.5rem !important;
52
  border-radius: 12px;
53
  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08);
 
60
  padding-left: 2rem !important;
61
  padding-right: 2rem !important;
62
  box-shadow: none !important;
 
63
  }
64
+ html, body, [data-testid="stApp"] {
65
+ background-color: #FFFFFF !important;
66
+ }
67
  </style>
68
  """, unsafe_allow_html=True)
69
 
70
  # Optional: Add a banner image (replace with your image URL)
71
+ st.image("banner.jpg", use_column_width=True)
72
 
73
  # Streamlit app header
74
+ st.title("MetaDiscovery Agent for Library of Congress Collections")
75
  st.markdown("""
76
  This tool connects to the LOC API, retrieves metadata from a selected collection, and performs
77
  an analysis of metadata completeness, suggests enhancements, and identifies authority gaps.
 
90
  selected = st.sidebar.selectbox("Select a collection", list(collections.keys()))
91
  search_query = collections[selected]
92
 
93
+ # About / Help
94
+ with st.sidebar.expander("ℹ️ About This Tool"):
95
+ st.markdown(
96
+ "This agent audits and enhances metadata from Library of Congress digital collections. "
97
+ "Select a collection from the dropdown to begin your analysis."
98
+ )
99
+
100
+ # Display API URL
101
  st.sidebar.write(f"Selected Collection: {selected}")
102
  st.sidebar.markdown(f"<span style='color: lightgray;'>API URL: {collection_url}</span>", unsafe_allow_html=True)
103
 
104
+ # Summary Stats (conditionally displayed after data load)
105
+ if 'metadata_df' in locals() and not metadata_df.empty:
106
+ st.sidebar.markdown("### 📊 Quick Stats")
107
+ st.sidebar.write(f"Total Records: {len(metadata_df)}")
108
+ st.sidebar.write(f"Incomplete Records: {incomplete_records.shape[0]}")
109
 
110
  # Fetch data from LOC API with spoofed User-Agent header
111
  headers = {
 
168
  return False
169
 
170
  if not metadata_df.empty:
171
+ st.subheader("📦 Retrieved Metadata Sample")
172
  st.dataframe(metadata_df.head())
173
 
174
  # Metadata completeness analysis (enhanced)
175
+ st.subheader("🧠 Metadata Completeness Analysis")
176
  completeness = metadata_df.map(lambda x: not is_incomplete(x)).mean() * 100
177
  completeness_df = pd.DataFrame({"Field": completeness.index, "Completeness (%)": completeness.values})
178
  fig = px.bar(completeness_df, x="Field", y="Completeness (%)", title="Metadata Completeness by Field")
 
182
  incomplete_mask = metadata_df.map(is_incomplete).any(axis=1)
183
  incomplete_records = metadata_df[incomplete_mask]
184
 
185
+ st.subheader("⚠️ Records with Incomplete Metadata")
186
  if not incomplete_records.empty:
187
  st.dataframe(incomplete_records.astype(str))
188
  else:
189
  st.success("All metadata fields are complete in this collection!")
190
 
191
+ st.subheader("📌 Identifiers of Items Needing Metadata Updates")
192
  if not incomplete_records.empty:
193
  st.write(incomplete_records[['id', 'title']])
194
  else:
195
  st.success("All records are complete!")
196
 
197
+ st.subheader("Suggested Metadata Enhancements")
198
  filled_descriptions = metadata_df[metadata_df['description'].notnull()]['description'].astype(str)
199
  if len(filled_descriptions) > 1:
200
  try: