Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -27,8 +27,8 @@ st.sidebar.markdown("## Settings")
|
|
27 |
selected = st.sidebar.selectbox("Select a collection", list(collections.keys()))
|
28 |
collection_path = collections[selected]
|
29 |
|
30 |
-
# Updated: Use LOC Search API with partof filter
|
31 |
-
collection_url = f"https://www.loc.gov/search/?q=&fa=partof
|
32 |
st.sidebar.write(f"Selected Collection: {selected}")
|
33 |
|
34 |
# Fetch data from LOC API
|
@@ -42,6 +42,7 @@ records = data.get("results", [])
|
|
42 |
items = []
|
43 |
for record in records:
|
44 |
items.append({
|
|
|
45 |
"title": record.get("title"),
|
46 |
"date": record.get("date"),
|
47 |
"subject": record.get("subject"),
|
@@ -70,6 +71,13 @@ if not metadata_df.empty:
|
|
70 |
incomplete_records = metadata_df[metadata_df.isnull().any(axis=1)]
|
71 |
st.dataframe(incomplete_records)
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
# Suggest metadata using text similarity (basic example)
|
74 |
st.subheader("✨ Suggested Metadata Enhancements")
|
75 |
filled_descriptions = metadata_df[metadata_df['description'].notnull()]['description'].astype(str)
|
@@ -80,7 +88,6 @@ if not metadata_df.empty:
|
|
80 |
suggestions = []
|
81 |
for idx, row in incomplete_records.iterrows():
|
82 |
if pd.isna(row['subject']) and pd.notna(row['description']):
|
83 |
-
# Find most similar description
|
84 |
desc_vec = tfidf.transform([str(row['description'])])
|
85 |
sims = cosine_similarity(desc_vec, tfidf_matrix).flatten()
|
86 |
top_idx = sims.argmax()
|
|
|
27 |
selected = st.sidebar.selectbox("Select a collection", list(collections.keys()))
|
28 |
collection_path = collections[selected]
|
29 |
|
30 |
+
# Updated: Use LOC Search API with partof filter (URL encoding for colon)
|
31 |
+
collection_url = f"https://www.loc.gov/search/?q=&fa=partof%3A{collection_path}&fo=json"
|
32 |
st.sidebar.write(f"Selected Collection: {selected}")
|
33 |
|
34 |
# Fetch data from LOC API
|
|
|
42 |
items = []
|
43 |
for record in records:
|
44 |
items.append({
|
45 |
+
"id": record.get("id"),
|
46 |
"title": record.get("title"),
|
47 |
"date": record.get("date"),
|
48 |
"subject": record.get("subject"),
|
|
|
71 |
incomplete_records = metadata_df[metadata_df.isnull().any(axis=1)]
|
72 |
st.dataframe(incomplete_records)
|
73 |
|
74 |
+
# Show exact items that need updates
|
75 |
+
st.subheader("📌 Identifiers of Items Needing Metadata Updates")
|
76 |
+
if not incomplete_records.empty:
|
77 |
+
st.write(incomplete_records[['id', 'title']])
|
78 |
+
else:
|
79 |
+
st.success("All records are complete!")
|
80 |
+
|
81 |
# Suggest metadata using text similarity (basic example)
|
82 |
st.subheader("✨ Suggested Metadata Enhancements")
|
83 |
filled_descriptions = metadata_df[metadata_df['description'].notnull()]['description'].astype(str)
|
|
|
88 |
suggestions = []
|
89 |
for idx, row in incomplete_records.iterrows():
|
90 |
if pd.isna(row['subject']) and pd.notna(row['description']):
|
|
|
91 |
desc_vec = tfidf.transform([str(row['description'])])
|
92 |
sims = cosine_similarity(desc_vec, tfidf_matrix).flatten()
|
93 |
top_idx = sims.argmax()
|