Spaces:

CCockrum
/

LOC-Metadate-Analyzer

Running

App Files Files Community

CCockrum commited on Apr 26

Commit

58fb3b6

verified ·

1 Parent(s): 20d01cc

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -16

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# MetaDiscovery Agent - LOC API with Enhanced Completeness and Quality Analysis
 import requests
 import pandas as pd
 import numpy as np
@@ -8,7 +7,7 @@ import plotly.express as px
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
-# Custom CSS for white background, styled sidebar, banner, and dark grey font
 st.markdown("""
     <style>
@@ -119,7 +118,7 @@ st.markdown("""
 </style>
 """, unsafe_allow_html=True)
-# OPTION 1: Use an image from a URL for the banner
 st.image("https://cdn-uploads.huggingface.co/production/uploads/67351c643fe51cb1aa28f2e5/7ThcAOjbuM8ajrP85bGs4.jpeg", use_container_width=True)
 # Streamlit app header
@@ -129,7 +128,7 @@ This tool connects to the LOC API, retrieves metadata from a selected collection
 an analysis of metadata completeness, suggests enhancements, and identifies authority gaps.
 """)
-# Updated collection URLs using the correct LOC API format
 collections = {
     "American Revolutionary War Maps": "american+revolutionary+war+maps",
     "Civil War Maps": "civil+war+maps",
@@ -240,12 +239,13 @@ if fetch_data:
             completeness_df = pd.DataFrame({"Field": completeness.index, "Completeness (%)": completeness.values})
             completeness_table = completeness_df.set_index("Field")
-            # 📊 Sidebar Quick Stats (fancy card version)
             quick_stats_df = pd.DataFrame({
                 "Metric": ["Total Records", "Incomplete Records", "Overall Completeness (%)"],
                 "Value": [len(metadata_df), incomplete_count, round(overall_percent, 1)]
             })
             st.sidebar.markdown("""
                 <div style='
                     background-color: #2b2b2b;
@@ -254,17 +254,18 @@ if fetch_data:
                     margin-bottom: 1.5rem;
                     box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
                 '>
-                    <h4 style='color: #90EE90; margin-bottom: 1rem;'>📊 Quick Stats</h4>
                 </div>
             """, unsafe_allow_html=True)
             st.sidebar.dataframe(
                 quick_stats_df.style
-                    .hide(axis="index")  # 🔥 Hide index here!
                     .background_gradient(cmap="Oranges", subset=["Value"])
                     .format({"Value": "{:.1f}"}),
                 use_container_width=True,
-                height=240
             )
             # Calculate Top 10 Subjects
@@ -272,15 +273,15 @@ if fetch_data:
                 top_subjects = (
                     metadata_df['subject']
                     .dropna()
-                    .str.split(',')              # Split multiple subjects per record
-                    .explode()                   # Expand into separate rows
-                    .str.strip()                  # Remove whitespace
                     .value_counts()
                     .head(10)
                     .to_frame(name="Count")
                 )
-            # 📚 Most Common Subjects in Sidebar
             with st.sidebar.expander("Top 10 Most Common Subjects", expanded=True):
                 st.dataframe(
                     top_subjects.style.background_gradient(cmap="Greens").format("{:.0f}"),
@@ -288,7 +289,7 @@ if fetch_data:
                     height=240
     )
-        with st.sidebar.expander("🔗 Helpful Resources", expanded=False):
             st.markdown("""
                 <style>
                     .sidebar-links a {
@@ -326,8 +327,7 @@ if fetch_data:
             st.dataframe(metadata_df.head())
-            # FILL THE PLACEHOLDER created earlier
             st.subheader("Field Completeness Breakdown")
             st.markdown("""
@@ -392,5 +392,5 @@ if fetch_data:
                     </div>
                     """, unsafe_allow_html=True)
         else:
-            st.warning("⚠️ No metadata records found for this collection. Try selecting another one.")

 import requests
 import pandas as pd
 import numpy as np
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+# Custom CSS
 st.markdown("""
     <style>
 </style>
 """, unsafe_allow_html=True)
+# Use an image from a URL for the banner
 st.image("https://cdn-uploads.huggingface.co/production/uploads/67351c643fe51cb1aa28f2e5/7ThcAOjbuM8ajrP85bGs4.jpeg", use_container_width=True)
 # Streamlit app header
 an analysis of metadata completeness, suggests enhancements, and identifies authority gaps.
 """)
+# Updated collection URLs using the correct LOC API
 collections = {
     "American Revolutionary War Maps": "american+revolutionary+war+maps",
     "Civil War Maps": "civil+war+maps",
             completeness_df = pd.DataFrame({"Field": completeness.index, "Completeness (%)": completeness.values})
             completeness_table = completeness_df.set_index("Field")
+            # Sidebar Quick Stats (fancy card version)
             quick_stats_df = pd.DataFrame({
                 "Metric": ["Total Records", "Incomplete Records", "Overall Completeness (%)"],
                 "Value": [len(metadata_df), incomplete_count, round(overall_percent, 1)]
             })
+            # Card-like background container
             st.sidebar.markdown("""
                 <div style='
                     background-color: #2b2b2b;
                     margin-bottom: 1.5rem;
                     box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
                 '>
+                    <h4 style='color: #FFA500; margin-bottom: 1rem;'>Quick Stats</h4>
                 </div>
             """, unsafe_allow_html=True)
+            # Now display the styled dataframe
             st.sidebar.dataframe(
                 quick_stats_df.style
                     .background_gradient(cmap="Oranges", subset=["Value"])
                     .format({"Value": "{:.1f}"}),
                 use_container_width=True,
+                height=240,
+                hide_index=True  #<<--- add THIS here to hide 0,1,2
             )
             # Calculate Top 10 Subjects
                 top_subjects = (
                     metadata_df['subject']
                     .dropna()
+                    .str.split(',')
+                    .explode()
+                    .str.strip()
                     .value_counts()
                     .head(10)
                     .to_frame(name="Count")
                 )
+            #Most Common Subjects in Sidebar
             with st.sidebar.expander("Top 10 Most Common Subjects", expanded=True):
                 st.dataframe(
                     top_subjects.style.background_gradient(cmap="Greens").format("{:.0f}"),
                     height=240
     )
+        with st.sidebar.expander("Helpful Resources", expanded=False):
             st.markdown("""
                 <style>
                     .sidebar-links a {
             st.dataframe(metadata_df.head())
+            # Fill the placeholder created earlier
             st.subheader("Field Completeness Breakdown")
             st.markdown("""
                     </div>
                     """, unsafe_allow_html=True)
         else:
+            st.warning("No metadata records found for this collection. Try selecting another one.")