Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# MetaDiscovery Agent - LOC API with Enhanced Completeness and Quality Analysis
|
2 |
import requests
|
3 |
import pandas as pd
|
4 |
import numpy as np
|
@@ -8,7 +7,7 @@ import plotly.express as px
|
|
8 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
9 |
from sklearn.metrics.pairwise import cosine_similarity
|
10 |
|
11 |
-
# Custom CSS
|
12 |
st.markdown("""
|
13 |
<style>
|
14 |
|
@@ -119,7 +118,7 @@ st.markdown("""
|
|
119 |
</style>
|
120 |
""", unsafe_allow_html=True)
|
121 |
|
122 |
-
#
|
123 |
st.image("https://cdn-uploads.huggingface.co/production/uploads/67351c643fe51cb1aa28f2e5/7ThcAOjbuM8ajrP85bGs4.jpeg", use_container_width=True)
|
124 |
|
125 |
# Streamlit app header
|
@@ -129,7 +128,7 @@ This tool connects to the LOC API, retrieves metadata from a selected collection
|
|
129 |
an analysis of metadata completeness, suggests enhancements, and identifies authority gaps.
|
130 |
""")
|
131 |
|
132 |
-
# Updated collection URLs using the correct LOC API
|
133 |
collections = {
|
134 |
"American Revolutionary War Maps": "american+revolutionary+war+maps",
|
135 |
"Civil War Maps": "civil+war+maps",
|
@@ -240,12 +239,13 @@ if fetch_data:
|
|
240 |
completeness_df = pd.DataFrame({"Field": completeness.index, "Completeness (%)": completeness.values})
|
241 |
completeness_table = completeness_df.set_index("Field")
|
242 |
|
243 |
-
#
|
244 |
quick_stats_df = pd.DataFrame({
|
245 |
"Metric": ["Total Records", "Incomplete Records", "Overall Completeness (%)"],
|
246 |
"Value": [len(metadata_df), incomplete_count, round(overall_percent, 1)]
|
247 |
})
|
248 |
|
|
|
249 |
st.sidebar.markdown("""
|
250 |
<div style='
|
251 |
background-color: #2b2b2b;
|
@@ -254,17 +254,18 @@ if fetch_data:
|
|
254 |
margin-bottom: 1.5rem;
|
255 |
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
|
256 |
'>
|
257 |
-
<h4 style='color: #
|
258 |
</div>
|
259 |
""", unsafe_allow_html=True)
|
260 |
|
|
|
261 |
st.sidebar.dataframe(
|
262 |
quick_stats_df.style
|
263 |
-
.hide(axis="index") # π₯ Hide index here!
|
264 |
.background_gradient(cmap="Oranges", subset=["Value"])
|
265 |
.format({"Value": "{:.1f}"}),
|
266 |
use_container_width=True,
|
267 |
-
height=240
|
|
|
268 |
)
|
269 |
|
270 |
# Calculate Top 10 Subjects
|
@@ -272,15 +273,15 @@ if fetch_data:
|
|
272 |
top_subjects = (
|
273 |
metadata_df['subject']
|
274 |
.dropna()
|
275 |
-
.str.split(',')
|
276 |
-
.explode()
|
277 |
-
.str.strip()
|
278 |
.value_counts()
|
279 |
.head(10)
|
280 |
.to_frame(name="Count")
|
281 |
)
|
282 |
|
283 |
-
#
|
284 |
with st.sidebar.expander("Top 10 Most Common Subjects", expanded=True):
|
285 |
st.dataframe(
|
286 |
top_subjects.style.background_gradient(cmap="Greens").format("{:.0f}"),
|
@@ -288,7 +289,7 @@ if fetch_data:
|
|
288 |
height=240
|
289 |
)
|
290 |
|
291 |
-
with st.sidebar.expander("
|
292 |
st.markdown("""
|
293 |
<style>
|
294 |
.sidebar-links a {
|
@@ -326,8 +327,7 @@ if fetch_data:
|
|
326 |
st.dataframe(metadata_df.head())
|
327 |
|
328 |
|
329 |
-
#
|
330 |
-
|
331 |
st.subheader("Field Completeness Breakdown")
|
332 |
|
333 |
st.markdown("""
|
@@ -392,5 +392,5 @@ if fetch_data:
|
|
392 |
</div>
|
393 |
""", unsafe_allow_html=True)
|
394 |
else:
|
395 |
-
st.warning("
|
396 |
|
|
|
|
|
1 |
import requests
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
|
|
7 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
|
10 |
+
# Custom CSS
|
11 |
st.markdown("""
|
12 |
<style>
|
13 |
|
|
|
118 |
</style>
|
119 |
""", unsafe_allow_html=True)
|
120 |
|
121 |
+
# Use an image from a URL for the banner
|
122 |
st.image("https://cdn-uploads.huggingface.co/production/uploads/67351c643fe51cb1aa28f2e5/7ThcAOjbuM8ajrP85bGs4.jpeg", use_container_width=True)
|
123 |
|
124 |
# Streamlit app header
|
|
|
128 |
an analysis of metadata completeness, suggests enhancements, and identifies authority gaps.
|
129 |
""")
|
130 |
|
131 |
+
# Updated collection URLs using the correct LOC API
|
132 |
collections = {
|
133 |
"American Revolutionary War Maps": "american+revolutionary+war+maps",
|
134 |
"Civil War Maps": "civil+war+maps",
|
|
|
239 |
completeness_df = pd.DataFrame({"Field": completeness.index, "Completeness (%)": completeness.values})
|
240 |
completeness_table = completeness_df.set_index("Field")
|
241 |
|
242 |
+
# Sidebar Quick Stats (fancy card version)
|
243 |
quick_stats_df = pd.DataFrame({
|
244 |
"Metric": ["Total Records", "Incomplete Records", "Overall Completeness (%)"],
|
245 |
"Value": [len(metadata_df), incomplete_count, round(overall_percent, 1)]
|
246 |
})
|
247 |
|
248 |
+
# Card-like background container
|
249 |
st.sidebar.markdown("""
|
250 |
<div style='
|
251 |
background-color: #2b2b2b;
|
|
|
254 |
margin-bottom: 1.5rem;
|
255 |
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
|
256 |
'>
|
257 |
+
<h4 style='color: #FFA500; margin-bottom: 1rem;'>Quick Stats</h4>
|
258 |
</div>
|
259 |
""", unsafe_allow_html=True)
|
260 |
|
261 |
+
# Now display the styled dataframe
|
262 |
st.sidebar.dataframe(
|
263 |
quick_stats_df.style
|
|
|
264 |
.background_gradient(cmap="Oranges", subset=["Value"])
|
265 |
.format({"Value": "{:.1f}"}),
|
266 |
use_container_width=True,
|
267 |
+
height=240,
|
268 |
+
hide_index=True #<<--- add THIS here to hide 0,1,2
|
269 |
)
|
270 |
|
271 |
# Calculate Top 10 Subjects
|
|
|
273 |
top_subjects = (
|
274 |
metadata_df['subject']
|
275 |
.dropna()
|
276 |
+
.str.split(',')
|
277 |
+
.explode()
|
278 |
+
.str.strip()
|
279 |
.value_counts()
|
280 |
.head(10)
|
281 |
.to_frame(name="Count")
|
282 |
)
|
283 |
|
284 |
+
#Most Common Subjects in Sidebar
|
285 |
with st.sidebar.expander("Top 10 Most Common Subjects", expanded=True):
|
286 |
st.dataframe(
|
287 |
top_subjects.style.background_gradient(cmap="Greens").format("{:.0f}"),
|
|
|
289 |
height=240
|
290 |
)
|
291 |
|
292 |
+
with st.sidebar.expander("Helpful Resources", expanded=False):
|
293 |
st.markdown("""
|
294 |
<style>
|
295 |
.sidebar-links a {
|
|
|
327 |
st.dataframe(metadata_df.head())
|
328 |
|
329 |
|
330 |
+
# Fill the placeholder created earlier
|
|
|
331 |
st.subheader("Field Completeness Breakdown")
|
332 |
|
333 |
st.markdown("""
|
|
|
392 |
</div>
|
393 |
""", unsafe_allow_html=True)
|
394 |
else:
|
395 |
+
st.warning("No metadata records found for this collection. Try selecting another one.")
|
396 |
|