Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -34,20 +34,20 @@ st.markdown("""
|
|
34 |
border-radius: 12px;
|
35 |
margin-bottom: 1rem;
|
36 |
}
|
37 |
-
|
38 |
background-color: #f0f0f5 !important;
|
39 |
-
color: #
|
40 |
padding: 1.25rem !important;
|
41 |
font-size: 1rem !important;
|
42 |
border-radius: 0.5rem !important;
|
43 |
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05) !important;
|
44 |
}
|
45 |
header[data-testid="stHeader"] {
|
46 |
-
background-color:
|
47 |
}
|
48 |
section[data-testid="stSidebar"] > div:first-child {
|
49 |
background-color: #1A1A1A !important;
|
50 |
-
color: #
|
51 |
padding: 2rem 1.5rem 1.5rem 1.5rem !important;
|
52 |
border-radius: 12px;
|
53 |
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08);
|
@@ -60,16 +60,18 @@ st.markdown("""
|
|
60 |
padding-left: 2rem !important;
|
61 |
padding-right: 2rem !important;
|
62 |
box-shadow: none !important;
|
63 |
-
|
64 |
}
|
|
|
|
|
|
|
65 |
</style>
|
66 |
""", unsafe_allow_html=True)
|
67 |
|
68 |
# Optional: Add a banner image (replace with your image URL)
|
69 |
-
st.
|
70 |
|
71 |
# Streamlit app header
|
72 |
-
st.title("
|
73 |
st.markdown("""
|
74 |
This tool connects to the LOC API, retrieves metadata from a selected collection, and performs
|
75 |
an analysis of metadata completeness, suggests enhancements, and identifies authority gaps.
|
@@ -88,11 +90,22 @@ st.sidebar.markdown("## Settings")
|
|
88 |
selected = st.sidebar.selectbox("Select a collection", list(collections.keys()))
|
89 |
search_query = collections[selected]
|
90 |
|
91 |
-
#
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
st.sidebar.write(f"Selected Collection: {selected}")
|
94 |
st.sidebar.markdown(f"<span style='color: lightgray;'>API URL: {collection_url}</span>", unsafe_allow_html=True)
|
95 |
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
# Fetch data from LOC API with spoofed User-Agent header
|
98 |
headers = {
|
@@ -155,11 +168,11 @@ def is_valid_date(value):
|
|
155 |
return False
|
156 |
|
157 |
if not metadata_df.empty:
|
158 |
-
st.subheader("Retrieved Metadata Sample")
|
159 |
st.dataframe(metadata_df.head())
|
160 |
|
161 |
# Metadata completeness analysis (enhanced)
|
162 |
-
st.subheader("Metadata Completeness Analysis")
|
163 |
completeness = metadata_df.map(lambda x: not is_incomplete(x)).mean() * 100
|
164 |
completeness_df = pd.DataFrame({"Field": completeness.index, "Completeness (%)": completeness.values})
|
165 |
fig = px.bar(completeness_df, x="Field", y="Completeness (%)", title="Metadata Completeness by Field")
|
@@ -169,19 +182,19 @@ if not metadata_df.empty:
|
|
169 |
incomplete_mask = metadata_df.map(is_incomplete).any(axis=1)
|
170 |
incomplete_records = metadata_df[incomplete_mask]
|
171 |
|
172 |
-
st.subheader("Records with Incomplete Metadata")
|
173 |
if not incomplete_records.empty:
|
174 |
st.dataframe(incomplete_records.astype(str))
|
175 |
else:
|
176 |
st.success("All metadata fields are complete in this collection!")
|
177 |
|
178 |
-
st.subheader("Identifiers of Items Needing Metadata Updates")
|
179 |
if not incomplete_records.empty:
|
180 |
st.write(incomplete_records[['id', 'title']])
|
181 |
else:
|
182 |
st.success("All records are complete!")
|
183 |
|
184 |
-
st.subheader("Suggested Metadata Enhancements")
|
185 |
filled_descriptions = metadata_df[metadata_df['description'].notnull()]['description'].astype(str)
|
186 |
if len(filled_descriptions) > 1:
|
187 |
try:
|
|
|
34 |
border-radius: 12px;
|
35 |
margin-bottom: 1rem;
|
36 |
}
|
37 |
+
.stAlert {
|
38 |
background-color: #f0f0f5 !important;
|
39 |
+
color: #333333 !important;
|
40 |
padding: 1.25rem !important;
|
41 |
font-size: 1rem !important;
|
42 |
border-radius: 0.5rem !important;
|
43 |
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05) !important;
|
44 |
}
|
45 |
header[data-testid="stHeader"] {
|
46 |
+
background-color: #D3D3D3 !important;
|
47 |
}
|
48 |
section[data-testid="stSidebar"] > div:first-child {
|
49 |
background-color: #1A1A1A !important;
|
50 |
+
color: #FFFFFF !important;
|
51 |
padding: 2rem 1.5rem 1.5rem 1.5rem !important;
|
52 |
border-radius: 12px;
|
53 |
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08);
|
|
|
60 |
padding-left: 2rem !important;
|
61 |
padding-right: 2rem !important;
|
62 |
box-shadow: none !important;
|
|
|
63 |
}
|
64 |
+
html, body, [data-testid="stApp"] {
|
65 |
+
background-color: #FFFFFF !important;
|
66 |
+
}
|
67 |
</style>
|
68 |
""", unsafe_allow_html=True)
|
69 |
|
70 |
# Optional: Add a banner image (replace with your image URL)
|
71 |
+
st.image("banner.jpg", use_column_width=True)
|
72 |
|
73 |
# Streamlit app header
|
74 |
+
st.title("MetaDiscovery Agent for Library of Congress Collections")
|
75 |
st.markdown("""
|
76 |
This tool connects to the LOC API, retrieves metadata from a selected collection, and performs
|
77 |
an analysis of metadata completeness, suggests enhancements, and identifies authority gaps.
|
|
|
90 |
selected = st.sidebar.selectbox("Select a collection", list(collections.keys()))
|
91 |
search_query = collections[selected]
|
92 |
|
93 |
+
# About / Help
|
94 |
+
with st.sidebar.expander("ℹ️ About This Tool"):
|
95 |
+
st.markdown(
|
96 |
+
"This agent audits and enhances metadata from Library of Congress digital collections. "
|
97 |
+
"Select a collection from the dropdown to begin your analysis."
|
98 |
+
)
|
99 |
+
|
100 |
+
# Display API URL
|
101 |
st.sidebar.write(f"Selected Collection: {selected}")
|
102 |
st.sidebar.markdown(f"<span style='color: lightgray;'>API URL: {collection_url}</span>", unsafe_allow_html=True)
|
103 |
|
104 |
+
# Summary Stats (conditionally displayed after data load)
|
105 |
+
if 'metadata_df' in locals() and not metadata_df.empty:
|
106 |
+
st.sidebar.markdown("### 📊 Quick Stats")
|
107 |
+
st.sidebar.write(f"Total Records: {len(metadata_df)}")
|
108 |
+
st.sidebar.write(f"Incomplete Records: {incomplete_records.shape[0]}")
|
109 |
|
110 |
# Fetch data from LOC API with spoofed User-Agent header
|
111 |
headers = {
|
|
|
168 |
return False
|
169 |
|
170 |
if not metadata_df.empty:
|
171 |
+
st.subheader("📦 Retrieved Metadata Sample")
|
172 |
st.dataframe(metadata_df.head())
|
173 |
|
174 |
# Metadata completeness analysis (enhanced)
|
175 |
+
st.subheader("🧠 Metadata Completeness Analysis")
|
176 |
completeness = metadata_df.map(lambda x: not is_incomplete(x)).mean() * 100
|
177 |
completeness_df = pd.DataFrame({"Field": completeness.index, "Completeness (%)": completeness.values})
|
178 |
fig = px.bar(completeness_df, x="Field", y="Completeness (%)", title="Metadata Completeness by Field")
|
|
|
182 |
incomplete_mask = metadata_df.map(is_incomplete).any(axis=1)
|
183 |
incomplete_records = metadata_df[incomplete_mask]
|
184 |
|
185 |
+
st.subheader("⚠️ Records with Incomplete Metadata")
|
186 |
if not incomplete_records.empty:
|
187 |
st.dataframe(incomplete_records.astype(str))
|
188 |
else:
|
189 |
st.success("All metadata fields are complete in this collection!")
|
190 |
|
191 |
+
st.subheader("📌 Identifiers of Items Needing Metadata Updates")
|
192 |
if not incomplete_records.empty:
|
193 |
st.write(incomplete_records[['id', 'title']])
|
194 |
else:
|
195 |
st.success("All records are complete!")
|
196 |
|
197 |
+
st.subheader("✨ Suggested Metadata Enhancements")
|
198 |
filled_descriptions = metadata_df[metadata_df['description'].notnull()]['description'].astype(str)
|
199 |
if len(filled_descriptions) > 1:
|
200 |
try:
|