CCockrum commited on
Commit
d707455
·
verified ·
1 Parent(s): 23ac8a9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MetaDiscovery Agent - Phase 1: LOC API Integration and Metadata Gap Analysis
2
+
3
+ import requests
4
+ import pandas as pd
5
+ import streamlit as st
6
+ import plotly.express as px
7
+
8
+ # Streamlit app header
9
+ st.title("MetaDiscovery Agent for Library of Congress Collections")
10
+ st.markdown("""
11
+ This tool connects to the LOC API, retrieves metadata from a selected collection, and performs
12
+ an initial analysis of metadata completeness.
13
+ """)
14
+
15
+ # User selects a collection (predefined for prototype)
16
+ collection_url = "https://www.loc.gov/collections/american-revolutionary-war-maps/?fo=json"
17
+ st.sidebar.markdown("## Settings")
18
+ st.sidebar.write("Collection: American Revolutionary War Maps")
19
+
20
+ # Fetch data from LOC API
21
+ response = requests.get(collection_url)
22
+ data = response.json()
23
+
24
+ # Parse metadata records
25
+ records = data.get("results", [])
26
+
27
+ # Extract selected metadata fields
28
+ items = []
29
+ for record in records:
30
+ items.append({
31
+ "title": record.get("title"),
32
+ "date": record.get("date"),
33
+ "subject": record.get("subject"),
34
+ "creator": record.get("creator"),
35
+ "description": record.get("description")
36
+ })
37
+
38
+ # Create DataFrame
39
+ metadata_df = pd.DataFrame(items)
40
+ st.subheader("📦 Retrieved Metadata Sample")
41
+ st.dataframe(metadata_df.head())
42
+
43
+ # Metadata completeness analysis
44
+ st.subheader("🧠 Metadata Completeness Analysis")
45
+ completeness = metadata_df.notnull().mean() * 100
46
+ completeness_df = pd.DataFrame({"Field": completeness.index, "Completeness (%)": completeness.values})
47
+
48
+ # Plot completeness
49
+ fig = px.bar(completeness_df, x="Field", y="Completeness (%)", title="Metadata Completeness by Field")
50
+ st.plotly_chart(fig)
51
+
52
+ # List records with missing values
53
+ st.subheader("⚠️ Records with Incomplete Metadata")
54
+ incomplete_records = metadata_df[metadata_df.isnull().any(axis=1)]
55
+ st.dataframe(incomplete_records)