Spaces:

awacke1
/

Freedom-of-Information-Act-FOIA-Datasets

Sleeping

App Files Files Community

awacke1 commited on Mar 19

Commit

0d8fa25

verified ·

1 Parent(s): d447935

Update app.py

Browse files

Files changed (1) hide show

app.py +152 -166

app.py CHANGED Viewed

@@ -6,32 +6,50 @@ import os
 import xml.etree.ElementTree as ET
 from typing import Dict, List
-# Sample agency data
-AGENCIES = {
-    "Department of Justice": {
-        "name": "Department of Justice",
-        "summary": {
-            "name": "Department of Justice",
-            "description": "The mission of the DOJ is to enforce the law...",
-            "abbreviation": "DOJ",
-            "website": "https://www.justice.gov"
-        },
-        "website": "https://www.justice.gov",
-        "emails": ["[email protected]"],
-        "address": {
-            "address_lines": ["950 Pennsylvania Avenue, NW"],
-            "street": "",
-            "city": "Washington",
-            "state": "DC",
-            "zip": "20530-0001"
-        },
-        "service_center": {"phone": ["202-514-2000"]},
-        "request_form": "https://www.justice.gov/oip/foia-request",
-        "request_time_stats": {
-            "2022": {"simple_median_days": 20, "complex_median_days": 45}
-        }
     }
-}
 class Agency:
     def __init__(self, data: Dict):
@@ -79,77 +97,50 @@ def create_search_url_wikipedia(search_query):
     base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search="
     return base_url + search_query.replace(' ', '+').replace('–', '%E2%80%93').replace('&', 'and')
-def process_foia_xml_file(filename: str):
-    """Process FOIA XML file and display results with emojis"""
     try:
         tree = ET.parse(filename)
         root = tree.getroot()
-        # Namespace dictionary
         ns = {
             'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03',
             'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03',
             'nc': 'http://niem.gov/niem/niem-core/2.0',
             'j': 'http://niem.gov/niem/domains/jxdm/4.1'
         }
-        # Extract basic info
-        org = root.find(".//nc:Organization", ns)
-        org_name = org.find("nc:OrganizationName", ns).text if org is not None else "Unknown"
-        org_abbr = org.find("nc:OrganizationAbbreviationText", ns).text if org is not None else "N/A"
-        fiscal_year = root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A"
-        st.subheader(f"📋 {org_name} ({org_abbr}) FOIA Report {fiscal_year}")
-        st.write(f"[Wikipedia - {org_name}]({create_search_url_wikipedia(org_name)})")
-        # Processing Statistics
-        proc_stats = root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns)
-        if proc_stats is not None:
-            st.write("📊 **Request Processing Statistics**")
-            st.write(f"- Pending at Start: {proc_stats.find('foia:ProcessingStatisticsPendingAtStartQuantity', ns).text} ⏳")
-            st.write(f"- Received: {proc_stats.find('foia:ProcessingStatisticsReceivedQuantity', ns).text} 📥")
-            st.write(f"- Processed: {proc_stats.find('foia:ProcessingStatisticsProcessedQuantity', ns).text} ✅")
-            st.write(f"- Pending at End: {proc_stats.find('foia:ProcessingStatisticsPendingAtEndQuantity', ns).text} ⏳")
-        # Request Dispositions
-        disp = root.find(".//foia:RequestDispositionSection/foia:RequestDisposition", ns)
-        if disp is not None:
-            st.write("📑 **Request Dispositions**")
-            st.write(f"- Full Grants: {disp.find('foia:RequestDispositionFullGrantQuantity', ns).text} ✅")
-            st.write(f"- Partial Grants: {disp.find('foia:RequestDispositionPartialGrantQuantity', ns).text} ➕")
-            st.write(f"- Full Denials: {disp.find('foia:RequestDispositionFullExemptionDenialQuantity', ns).text} ❌")
-            denial = disp.find('foia:NonExemptionDenial', ns)
-            if denial is not None:
-                st.write(f"- Non-Exemption Denial ({denial.find('foia:NonExemptionDenialReasonCode', ns).text}): {denial.find('foia:NonExemptionDenialQuantity', ns).text} 📭")
-        # Applied Exemptions
-        exemptions = root.findall(".//foia:RequestDispositionAppliedExemptionsSection/foia:ComponentAppliedExemptions/foia:AppliedExemption", ns)
-        if exemptions:
-            st.write("🚫 **Applied Exemptions**")
-            for ex in exemptions:
-                code = ex.find('foia:AppliedExemptionCode', ns).text
-                qty = ex.find('foia:AppliedExemptionQuantity', ns).text
-                st.write(f"- {code}: {qty} times")
-        # Response Times
-        resp_time = root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime", ns)
-        if resp_time is not None:
-            st.write("⏰ **Response Times**")
-            simple = resp_time.find('foia:SimpleResponseTime', ns)
-            if simple is not None:
-                st.write(f"- Simple Requests:")
-                st.write(f"  - Median: {simple.find('foia:ResponseTimeMedianDaysValue', ns).text} days ⏱️")
-                st.write(f"  - Average: {simple.find('foia:ResponseTimeAverageDaysValue', ns).text} days 📈")
     except ET.ParseError:
-        st.error(f"❌ Error parsing XML file: {filename}")
-def list_xml_files():
-    xml_files = [f[:-4] for f in os.listdir('.') if f.endswith('.xml')]
     if not xml_files:
         st.markdown("No XML files found in the directory. 📂❓")
-        return []
-    return xml_files
 def search_foia_content(query: str, agency: str = None) -> Dict:
     results = {
@@ -163,94 +154,89 @@ def search_foia_content(query: str, agency: str = None) -> Dict:
     return results
 def main():
     st.title("Freedom of Information Act (FOIA) Explorer 🌍📊")
-    tab1, tab2 = st.tabs(["Agency Browser", "FOIA Datasets & XML Scanner"])
-    with tab1:
-        st.header("Agency Browser")
-        agency_names = sorted(list(AGENCIES.keys()))
-        selected_agency = st.selectbox("Select Agency", [""] + agency_names)
-        if selected_agency:
-            agency = Agency(AGENCIES[selected_agency])
-            st.subheader(f"{agency.name} Details")
-            st.write(f"Description: {agency.summary.get('description', 'N/A')}")
-            st.write(f"Abbreviation: {agency.summary.get('abbreviation', 'N/A')}")
-            if agency.website:
-                st.write(f"Website: [{agency.website}]({agency.website})")
-            st.subheader("Contact Information")
-            contact_info = [
-                "\n".join(agency.address.get("address_lines", [])),
-                agency.address.get("street", ""),
-                f"{agency.address.get('city', '')}, {agency.address.get('state', '')} {agency.address.get('zip', '')}",
-                agency.service_center.get("phone", [""])[0]
-            ]
-            st.write("\n".join([line for line in contact_info if line]))
-            if agency.emails:
-                st.write(f"Email: [{agency.emails[0]}](mailto:{agency.emails[0]})")
-            if agency.hasRequestForm():
-                form_url = "https://foiaonline.regulations.gov/foia/action/public/request/createRequest" if agency.isFOIAonline() else agency.request_form
-                st.write(f"[Submit FOIA Request]({form_url})")
-            st.subheader("Median Processing Times")
-            if agency.request_time_stats:
-                for year, stats in agency.request_time_stats.items():
-                    col1, col2 = st.columns(2)
-                    with col1:
-                        st.write(f"Year: {year}")
-                    with col2:
-                        for key, value in stats.items():
-                            if "median" in key:
-                                st.write(f"{key.replace('_median_days', '').title()}: {value} days")
-        st.subheader("Search FOIA Documents")
-        search_query = st.text_input("Enter search query")
-        if st.button("Search") and search_query:
-            with st.spinner("Searching..."):
-                results = search_foia_content(search_query, selected_agency)
-                st.write(f"Found {len(results['results'])} results for '{search_query}':")
-                for result in results["results"]:
-                    st.write(f"- {result['title']} ({result['date']})")
-    with tab2:
-        st.header("FOIA Datasets & XML Scanner")
-        st.write("""
-        The Freedom of Information Act (FOIA) empowers individuals by granting access to previously unreleased information and documents controlled by the United States government. Championing transparency and accountability, FOIA serves as a foundation for democratic engagement and open government initiatives. 🎉✨
-        Below is a list of datasets available under FOIA, alongside guessed Wikipedia URLs for more information. 📚🔍
-        """)
-        st.markdown("""
-        - [FOIA.Gov](https://www.foia.gov/foia-dataset-download.html)
-        - [Data.Gov](https://catalog.data.gov/dataset?tags=foia)
-        """)
-        datasets = [
-            "Provider Taxonomy",
-            "Consumer Complaint Database",
-            "Medicare Provider Utilization and Payment Data",
-            "Global Terrorism Database",
-            "National Nutrient Database",
-            "Patent Grant Full Text Data",
-            "Toxic Release Inventory",
-            "Residential Energy Consumption Survey",
         ]
-        st.markdown("### FOIA Datasets and Wikipedia URLs")
-        for dataset in datasets:
-            st.markdown(f"- **{dataset}**: [Wikipedia]({create_search_url_wikipedia(dataset)})")
-        st.markdown("### Available FOIA XML Reports")
-        xml_files = list_xml_files()
-        for xml_file in xml_files:
-            col1, col2 = st.columns([3, 1])
-            with col1:
-                st.markdown(f"- [{xml_file}]({xml_file}.xml)")
-            with col2:
-                if st.button(f"📊 Process {xml_file}", key=f"process_{xml_file}"):
-                    process_foia_xml_file(f"{xml_file}.xml")
 if __name__ == "__main__":
     main()

 import xml.etree.ElementTree as ET
 from typing import Dict, List
+# Function to parse XML and build AGENCIES dictionary
+def load_agencies_from_xml() -> Dict:
+    AGENCIES = {}
+    xml_files = [f for f in os.listdir('.') if f.endswith('.xml')]
+    ns = {
+        'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03',
+        'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03',
+        'nc': 'http://niem.gov/niem/niem-core/2.0',
+        'j': 'http://niem.gov/niem/domains/jxdm/4.1'
     }
+    for xml_file in xml_files:
+        try:
+            tree = ET.parse(xml_file)
+            root = tree.getroot()
+            org = root.find(".//nc:Organization", ns)
+            if org is not None:
+                name = org.find("nc:OrganizationName", ns).text
+                abbr = org.find("nc:OrganizationAbbreviationText", ns).text
+                fiscal_year = root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A"
+                # Build minimal agency data (can be expanded with more XML data)
+                AGENCIES[name] = {
+                    "name": name,
+                    "summary": {
+                        "name": name,
+                        "description": f"FOIA data for {name} ({fiscal_year})",
+                        "abbreviation": abbr,
+                        "website": f"https://www.{abbr.lower()}.gov"  # Guessed URL, adjust as needed
+                    },
+                    "website": f"https://www.{abbr.lower()}.gov",
+                    "emails": [f"foia@{abbr.lower()}.gov"],  # Guessed email
+                    "address": {"address_lines": [], "street": "", "city": "", "state": "", "zip": ""},  # Placeholder
+                    "service_center": {"phone": ["N/A"]},  # Placeholder
+                    "request_form": f"https://www.{abbr.lower()}.gov/foia",  # Guessed form URL
+                    "request_time_stats": {
+                        fiscal_year: {
+                            "simple_median_days": float(root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime/foia:SimpleResponseTime/foia:ResponseTimeMedianDaysValue", ns).text) if root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime/foia:SimpleResponseTime/foia:ResponseTimeMedianDaysValue", ns) is not None else 0
+                        }
+                    }
+                }
+        except ET.ParseError:
+            st.warning(f"Could not parse {xml_file}")
+    return AGENCIES
 class Agency:
     def __init__(self, data: Dict):
     base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search="
     return base_url + search_query.replace(' ', '+').replace('–', '%E2%80%93').replace('&', 'and')
+def parse_foia_xml(filename: str) -> Dict:
+    """Parse FOIA XML file and return key information"""
     try:
         tree = ET.parse(filename)
         root = tree.getroot()
         ns = {
             'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03',
             'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03',
             'nc': 'http://niem.gov/niem/niem-core/2.0',
             'j': 'http://niem.gov/niem/domains/jxdm/4.1'
         }
+        org = root.find(".//nc:Organization", ns)
+        data = {
+            "name": org.find("nc:OrganizationName", ns).text if org is not None else "Unknown",
+            "abbr": org.find("nc:OrganizationAbbreviationText", ns).text if org is not None else "N/A",
+            "fiscal_year": root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A",
+            "pending_start": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsPendingAtStartQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
+            "received": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsReceivedQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
+            "processed": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsProcessedQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
+            "pending_end": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsPendingAtEndQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
+            "full_grants": root.find(".//foia:RequestDispositionSection/foia:RequestDisposition/foia:RequestDispositionFullGrantQuantity", ns).text if root.find(".//foia:RequestDispositionSection/foia:RequestDisposition", ns) is not None else "N/A"
+        }
+        return data
     except ET.ParseError:
+        return {"name": "Error", "abbr": "N/A", "fiscal_year": "N/A", "pending_start": "N/A", "received": "N/A", "processed": "N/A", "pending_end": "N/A", "full_grants": "N/A"}
+def list_and_display_xml_files():
+    xml_files = [f for f in os.listdir('.') if f.endswith('.xml')]
     if not xml_files:
         st.markdown("No XML files found in the directory. 📂❓")
+        return
+    for xml_file in xml_files:
+        file_name = xml_file[:-4]  # Remove .xml extension
+        data = parse_foia_xml(xml_file)
+        with st.expander(f"📋 {file_name} - {data['name']} ({data['abbr']})"):
+            st.write(f"📅 Fiscal Year: {data['fiscal_year']}")
+            st.write(f"⏳ Pending at Start: {data['pending_start']}")
+            st.write(f"📥 Received: {data['received']}")
+            st.write(f"✅ Processed: {data['processed']}")
+            st.write(f"⏳ Pending at End: {data['pending_end']}")
+            st.write(f"👍 Full Grants: {data['full_grants']}")
+            st.write(f"[Wikipedia - {data['name']}]({create_search_url_wikipedia(data['name'])})")
 def search_foia_content(query: str, agency: str = None) -> Dict:
     results = {
     return results
 def main():
+    # Load agencies from XML files
+    AGENCIES = load_agencies_from_xml()
     st.title("Freedom of Information Act (FOIA) Explorer 🌍📊")
+    st.write("""
+    The Freedom of Information Act (FOIA) empowers individuals by granting access to previously unreleased information and documents controlled by the United States government. Championing transparency and accountability, FOIA serves as a foundation for democratic engagement and open government initiatives. 🎉✨
+    Below is a list of datasets available under FOIA, alongside guessed Wikipedia URLs for more information. 📚🔍
+    """)
+    st.markdown("""
+    - [FOIA.Gov](https://www.foia.gov/foia-dataset-download.html)
+    - [Data.Gov](https://catalog.data.gov/dataset?tags=foia)
+    """)
+    # FOIA Datasets with Emojis
+    datasets = [
+        ("Provider Taxonomy", "🩺"),
+        ("Consumer Complaint Database", "📞"),
+        ("Medicare Provider Utilization and Payment Data", "💊"),
+        ("Global Terrorism Database", "🌍"),
+        ("National Nutrient Database", "🍎"),
+        ("Patent Grant Full Text Data", "📜"),
+        ("Toxic Release Inventory", "☣️"),
+        ("Residential Energy Consumption Survey", "🏠")
+    ]
+    st.markdown("### FOIA Datasets and Wikipedia URLs")
+    for dataset, emoji in datasets:
+        st.markdown(f"- {emoji} **{dataset}**: [Wikipedia]({create_search_url_wikipedia(dataset)})")
+    # Agency Browser
+    st.header("Agency Browser")
+    agency_names = sorted(list(AGENCIES.keys()))
+    selected_agency = st.selectbox("Select Agency", [""] + agency_names)
+    if selected_agency:
+        agency = Agency(AGENCIES[selected_agency])
+        st.subheader(f"{agency.name} Details")
+        st.write(f"Description: {agency.summary.get('description', 'N/A')}")
+        st.write(f"Abbreviation: {agency.summary.get('abbreviation', 'N/A')}")
+        if agency.website:
+            st.write(f"Website: [{agency.website}]({agency.website})")
+        st.subheader("Contact Information")
+        contact_info = [
+            "\n".join(agency.address.get("address_lines", [])),
+            agency.address.get("street", ""),
+            f"{agency.address.get('city', '')}, {agency.address.get('state', '')} {agency.address.get('zip', '')}",
+            agency.service_center.get("phone", [""])[0]
         ]
+        st.write("\n".join([line for line in contact_info if line]))
+        if agency.emails:
+            st.write(f"Email: [{agency.emails[0]}](mailto:{agency.emails[0]})")
+        if agency.hasRequestForm():
+            form_url = "https://foiaonline.regulations.gov/foia/action/public/request/createRequest" if agency.isFOIAonline() else agency.request_form
+            st.write(f"[Submit FOIA Request]({form_url})")
+        st.subheader("Median Processing Times")
+        if agency.request_time_stats:
+            for year, stats in agency.request_time_stats.items():
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.write(f"Year: {year}")
+                with col2:
+                    for key, value in stats.items():
+                        if "median" in key:
+                            st.write(f"{key.replace('_median_days', '').title()}: {value} days")
+    # FOIA Document Search
+    st.subheader("Search FOIA Documents")
+    search_query = st.text_input("Enter search query")
+    if st.button("Search") and search_query:
+        with st.spinner("Searching..."):
+            results = search_foia_content(search_query, selected_agency)
+            st.write(f"Found {len(results['results'])} results for '{search_query}':")
+            for result in results["results"]:
+                st.write(f"- {result['title']} ({result['date']})")
+    # XML Files Display
+    st.header("FOIA XML Reports")
+    list_and_display_xml_files()
 if __name__ == "__main__":
     main()