Spaces:
Running
Running
# MetaDiscovery Agent - Phase 1: LOC API Integration and Metadata Gap Analysis | |
import requests | |
import pandas as pd | |
import streamlit as st | |
import plotly.express as px | |
# Streamlit app header | |
st.title("MetaDiscovery Agent for Library of Congress Collections") | |
st.markdown(""" | |
This tool connects to the LOC API, retrieves metadata from a selected collection, and performs | |
an initial analysis of metadata completeness. | |
""") | |
# User selects a collection (predefined for prototype) | |
collection_url = "https://www.loc.gov/collections/american-revolutionary-war-maps/?fo=json" | |
st.sidebar.markdown("## Settings") | |
st.sidebar.write("Collection: American Revolutionary War Maps") | |
# Fetch data from LOC API | |
response = requests.get(collection_url) | |
data = response.json() | |
# Parse metadata records | |
records = data.get("results", []) | |
# Extract selected metadata fields | |
items = [] | |
for record in records: | |
items.append({ | |
"title": record.get("title"), | |
"date": record.get("date"), | |
"subject": record.get("subject"), | |
"creator": record.get("creator"), | |
"description": record.get("description") | |
}) | |
# Create DataFrame | |
metadata_df = pd.DataFrame(items) | |
st.subheader("📦 Retrieved Metadata Sample") | |
st.dataframe(metadata_df.head()) | |
# Metadata completeness analysis | |
st.subheader("🧠 Metadata Completeness Analysis") | |
completeness = metadata_df.notnull().mean() * 100 | |
completeness_df = pd.DataFrame({"Field": completeness.index, "Completeness (%)": completeness.values}) | |
# Plot completeness | |
fig = px.bar(completeness_df, x="Field", y="Completeness (%)", title="Metadata Completeness by Field") | |
st.plotly_chart(fig) | |
# List records with missing values | |
st.subheader("⚠️ Records with Incomplete Metadata") | |
incomplete_records = metadata_df[metadata_df.isnull().any(axis=1)] | |
st.dataframe(incomplete_records) | |