Spaces:

awacke1
/

Freedom-of-Information-Act-FOIA-Datasets

Sleeping

Freedom-of-Information-Act-FOIA-Datasets

File size: 14,350 Bytes

import streamlit as st
import pandas as pd
import requests
import json
import os
import xml.etree.ElementTree as ET
from typing import Dict, List

def load_agencies_from_xml() -> Dict:
    AGENCIES = {}
    xml_files = [f for f in os.listdir('.') if f.endswith('.xml')]
    ns = {
        'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03',
        'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03',
        'nc': 'http://niem.gov/niem/niem-core/2.0',
        'j': 'http://niem.gov/niem/domains/jxdm/4.1'
    }
    
    for xml_file in xml_files:
        try:
            tree = ET.parse(xml_file)
            root = tree.getroot()
            org = root.find(".//nc:Organization", ns)
            if org is not None:
                name = org.find("nc:OrganizationName", ns).text
                abbr = org.find("nc:OrganizationAbbreviationText", ns).text
                fiscal_year = root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A"
                
                AGENCIES[name] = {
                    "name": name,
                    "summary": {
                        "name": name,
                        "description": f"FOIA data for {name} ({fiscal_year})",
                        "abbreviation": abbr,
                        "website": f"https://www.{abbr.lower()}.gov"
                    },
                    "website": f"https://www.{abbr.lower()}.gov",
                    "emails": [f"foia@{abbr.lower()}.gov"],
                    "address": {"address_lines": [], "street": "", "city": "", "state": "", "zip": ""},
                    "service_center": {"phone": ["N/A"]},
                    "request_form": f"https://www.{abbr.lower()}.gov/foia",
                    "request_time_stats": {
                        fiscal_year: {
                            "simple_median_days": float(root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime/foia:SimpleResponseTime/foia:ResponseTimeMedianDaysValue", ns).text) if root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime/foia:SimpleResponseTime/foia:ResponseTimeMedianDaysValue", ns) is not None else 0
                        }
                    }
                }
        except ET.ParseError:
            st.warning(f"Could not parse {xml_file}")
    return AGENCIES

class Agency:
    def __init__(self, data: Dict):
        self.data = data
    
    @property
    def name(self) -> str:
        return self.data.get("name", "")
    
    @property
    def summary(self) -> Dict:
        return self.data.get("summary", {})
    
    @property
    def website(self) -> str:
        return self.data.get("website", "")
    
    @property
    def emails(self) -> List[str]:
        return self.data.get("emails", [])
    
    @property
    def address(self) -> Dict:
        return self.data.get("address", {})
    
    @property
    def service_center(self) -> Dict:
        return self.data.get("service_center", {})
    
    @property
    def request_form(self) -> str:
        return self.data.get("request_form", "")
    
    @property
    def request_time_stats(self) -> Dict:
        return self.data.get("request_time_stats", {})
    
    def isFOIAonline(self) -> bool:
        return "foiaonline" in self.request_form.lower()
    
    def hasRequestForm(self) -> bool:
        return bool(self.request_form)

AGENCY_LOGOS = {
    "CFA": "https://upload.wikimedia.org/wikipedia/en/e/e8/U.S._Commission_of_Fine_Arts_logo.png",
    "CEQ": "https://upload.wikimedia.org/wikipedia/commons/5/58/CEQ-Seal.png",
    "DOJ": "https://upload.wikimedia.org/wikipedia/commons/f/f8/Seal_of_the_United_States_Department_of_Justice.svg",
    "EPA": "https://upload.wikimedia.org/wikipedia/commons/7/78/Environmental_Protection_Agency_logo.svg",
    "FTC": "https://upload.wikimedia.org/wikipedia/commons/5/5b/US-FederalTradeCommission-Seal.svg",
    "ABMC": "https://upload.wikimedia.org/wikipedia/commons/2/2f/ABMC_seal.png",
    "AFRH": "https://upload.wikimedia.org/wikipedia/commons/2/2e/Armed_Forces_Retirement_Home_logo.png",
    "DHS": "https://upload.wikimedia.org/wikipedia/commons/5/52/Seal_of_the_United_States_Department_of_Homeland_Security.svg",
    "DOC": "https://upload.wikimedia.org/wikipedia/commons/e/e0/Seal_of_the_United_States_Department_of_Commerce.svg",
    "DOD": "https://upload.wikimedia.org/wikipedia/commons/e/e0/Seal_of_the_United_States_Department_of_Defense_%282004%29.svg",
    "DOE": "https://upload.wikimedia.org/wikipedia/commons/e/e0/United_States_Department_of_Energy_Seal.svg",
    "DOI": "https://upload.wikimedia.org/wikipedia/commons/e/e3/Seal_of_the_United_States_Department_of_the_Interior.svg",
    "DOL": "https://upload.wikimedia.org/wikipedia/commons/4/47/Seal_of_the_United_States_Department_of_Labor.svg",
    "DOS": "https://upload.wikimedia.org/wikipedia/commons/1/1b/Seal_of_the_United_States_Department_of_State.svg",
    "DOT": "https://upload.wikimedia.org/wikipedia/commons/8/88/Seal_of_the_United_States_Department_of_Transportation.svg",
    "HHS": "https://upload.wikimedia.org/wikipedia/commons/0/03/Seal_of_the_U.S._Department_of_Health_and_Human_Services.svg",
    "HUD": "https://upload.wikimedia.org/wikipedia/commons/6/6e/Seal_of_the_United_States_Department_of_Housing_and_Urban_Development.svg",
    "NASA": "https://upload.wikimedia.org/wikipedia/commons/e/e5/NASA_logo.svg",
    "NRC": "https://upload.wikimedia.org/wikipedia/commons/8/85/U.S._Nuclear_Regulatory_Commission_seal.svg",
    "SSA": "https://upload.wikimedia.org/wikipedia/commons/6/6e/Social_Security_Administration_logo_%282019%29.svg",
    "USDA": "https://upload.wikimedia.org/wikipedia/commons/0/0f/Seal_of_the_United_States_Department_of_Agriculture.svg",
    "VA": "https://upload.wikimedia.org/wikipedia/commons/8/87/Seal_of_the_U.S._Department_of_Veterans_Affairs.svg",
}

def create_search_url_wikipedia(search_query):
    base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search="
    return base_url + search_query.replace(' ', '+').replace('–', '%E2%80%93').replace('&', 'and')

def parse_foia_xml(filename: str) -> Dict:
    try:
        tree = ET.parse(filename)
        root = tree.getroot()
        ns = {
            'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03',
            'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03',
            'nc': 'http://niem.gov/niem/niem-core/2.0',
            'j': 'http://niem.gov/niem/domains/jxdm/4.1'
        }
        
        org = root.find(".//nc:Organization", ns)
        data = {
            "name": org.find("nc:OrganizationName", ns).text if org is not None else "Unknown",
            "abbr": org.find("nc:OrganizationAbbreviationText", ns).text if org is not None else "N/A",
            "fiscal_year": root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A",
            "pending_start": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsPendingAtStartQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
            "received": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsReceivedQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
            "processed": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsProcessedQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
            "pending_end": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsPendingAtEndQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
            "full_grants": root.find(".//foia:RequestDispositionSection/foia:RequestDisposition/foia:RequestDispositionFullGrantQuantity", ns).text if root.find(".//foia:RequestDispositionSection/foia:RequestDisposition", ns) is not None else "N/A"
        }
        return data
    except ET.ParseError:
        return {"name": "Error", "abbr": "N/A", "fiscal_year": "N/A", "pending_start": "N/A", "received": "N/A", "processed": "N/A", "pending_end": "N/A", "full_grants": "N/A"}

def list_and_display_xml_files():
    xml_files = [f for f in os.listdir('.') if f.endswith('.xml')]
    if not xml_files:
        st.markdown("No XML files found in the directory. 📂❓")
        return
    
    for xml_file in xml_files:
        file_name = xml_file[:-4]
        data = parse_foia_xml(xml_file)
        with st.expander(f"📋 {file_name} - {data['name']} ({data['abbr']})"):
            if data['abbr'] in AGENCY_LOGOS:
                st.image(
                    AGENCY_LOGOS[data['abbr']],
                    caption=f"{data['name']} Seal",
                    width=100
                )
            else:
                st.write("🖼️ No logo available for this agency yet.")
                
            st.write(f"📅 Fiscal Year: {data['fiscal_year']}")
            st.write(f"⏳ Pending at Start: {data['pending_start']}")
            st.write(f"📥 Received: {data['received']}")
            st.write(f"✅ Processed: {data['processed']}")
            st.write(f"⏳ Pending at End: {data['pending_end']}")
            st.write(f"👍 Full Grants: {data['full_grants']}")
            st.write(f"[Wikipedia - {data['name']}]({create_search_url_wikipedia(data['name'])})")

def search_foia_content(query: str, agency: str = None) -> Dict:
    results = {
        "query": query,
        "agency": agency,
        "results": [
            {"title": f"Sample FOIA Response 1 for {query}", "date": "2023-01-01"},
            {"title": f"Sample FOIA Response 2 for {query}", "date": "2023-02-01"}
        ]
    }
    return results

def main():
    AGENCIES = load_agencies_from_xml()
    
    st.title("Freedom of Information Act (FOIA) Explorer 🌍📊")
    
    st.image(
        "https://upload.wikimedia.org/wikipedia/en/e/e8/U.S._Commission_of_Fine_Arts_logo.png",
        caption="Logo of the United States Commission of Fine Arts - Representing U.S. Government Transparency",
        width=200
    )
    
    st.write("""
    The Freedom of Information Act (FOIA) empowers individuals by granting access to previously unreleased information and documents controlled by the United States government. Championing transparency and accountability, FOIA serves as a foundation for democratic engagement and open government initiatives. 🎉✨
    Below is a list of datasets available under FOIA, alongside guessed Wikipedia URLs for more information. 📚🔍
    """)
    
    st.markdown("""
    - [FOIA.Gov](https://www.foia.gov/foia-dataset-download.html)
    - [Data.Gov](https://catalog.data.gov/dataset?tags=foia)
    """)

    datasets = [
        ("Provider Taxonomy", "🩺"),
        ("Consumer Complaint Database", "📞"),
        ("Medicare Provider Utilization and Payment Data", "💊"),
        ("Global Terrorism Database", "🌍"),
        ("National Nutrient Database", "🍎"),
        ("Patent Grant Full Text Data", "📜"),
        ("Toxic Release Inventory", "☣️"),
        ("Residential Energy Consumption Survey", "🏠")
    ]

    st.markdown("### FOIA Datasets and Wikipedia URLs")
    for dataset, emoji in datasets:
        st.markdown(f"- {emoji} **{dataset}**: [Wikipedia]({create_search_url_wikipedia(dataset)})")

    st.header("Agency Browser")
    agency_names = sorted(list(AGENCIES.keys()))
    selected_agency = st.selectbox("Select Agency", [""] + agency_names)

    if selected_agency:
        agency = Agency(AGENCIES[selected_agency])
        st.subheader(f"{agency.name} Details")
        if agency.summary.get('abbreviation') in AGENCY_LOGOS:
            st.image(
                AGENCY_LOGOS[agency.summary.get('abbreviation')],
                caption=f"{agency.name} Seal",
                width=100
            )
        st.write(f"Description: {agency.summary.get('description', 'N/A')}")
        st.write(f"Abbreviation: {agency.summary.get('abbreviation', 'N/A')}")
        if agency.website:
            st.write(f"Website: [{agency.website}]({agency.website})")
        
        st.subheader("Contact Information")
        contact_info = [
            "\n".join(agency.address.get("address_lines", [])),
            agency.address.get("street", ""),
            f"{agency.address.get('city', '')}, {agency.address.get('state', '')} {agency.address.get('zip', '')}",
            agency.service_center.get("phone", [""])[0]
        ]
        st.write("\n".join([line for line in contact_info if line]))
        if agency.emails:
            st.write(f"Email: [{agency.emails[0]}](mailto:{agency.emails[0]})")

        if agency.hasRequestForm():
            form_url = "https://foiaonline.regulations.gov/foia/action/public/request/createRequest" if agency.isFOIAonline() else agency.request_form
            st.write(f"[Submit FOIA Request]({form_url})")

        st.subheader("Median Processing Times")
        if agency.request_time_stats:
            for year, stats in agency.request_time_stats.items():
                col1, col2 = st.columns(2)
                with col1:
                    st.write(f"Year: {year}")
                with col2:
                    for key, value in stats.items():
                        if "median" in key:
                            st.write(f"{key.replace('_median_days', '').title()}: {value} days")

    st.subheader("Search FOIA Documents")
    search_query = st.text_input("Enter search query")
    if st.button("Search") and search_query:
        with st.spinner("Searching..."):
            results = search_foia_content(search_query, selected_agency)
            st.write(f"Found {len(results['results'])} results for '{search_query}':")
            for result in results["results"]:
                st.write(f"- {result['title']} ({result['date']})")

    st.header("FOIA XML Reports")
    list_and_display_xml_files()

if __name__ == "__main__":
    main()