import streamlit as st import pandas as pd import requests import json import os import xml.etree.ElementTree as ET from typing import Dict, List def load_agencies_from_xml() -> Dict: AGENCIES = {} xml_files = [f for f in os.listdir('.') if f.endswith('.xml')] ns = { 'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03', 'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03', 'nc': 'http://niem.gov/niem/niem-core/2.0', 'j': 'http://niem.gov/niem/domains/jxdm/4.1' } for xml_file in xml_files: try: tree = ET.parse(xml_file) root = tree.getroot() org = root.find(".//nc:Organization", ns) if org is not None: name = org.find("nc:OrganizationName", ns).text abbr = org.find("nc:OrganizationAbbreviationText", ns).text fiscal_year = root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A" AGENCIES[name] = { "name": name, "summary": { "name": name, "description": f"FOIA data for {name} ({fiscal_year})", "abbreviation": abbr, "website": f"https://www.{abbr.lower()}.gov" }, "website": f"https://www.{abbr.lower()}.gov", "emails": [f"foia@{abbr.lower()}.gov"], "address": {"address_lines": [], "street": "", "city": "", "state": "", "zip": ""}, "service_center": {"phone": ["N/A"]}, "request_form": f"https://www.{abbr.lower()}.gov/foia", "request_time_stats": { fiscal_year: { "simple_median_days": float(root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime/foia:SimpleResponseTime/foia:ResponseTimeMedianDaysValue", ns).text) if root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime/foia:SimpleResponseTime/foia:ResponseTimeMedianDaysValue", ns) is not None else 0 } } } except ET.ParseError: st.warning(f"Could not parse {xml_file}") return AGENCIES class Agency: def __init__(self, data: Dict): self.data = data @property def name(self) -> str: return self.data.get("name", "") @property def summary(self) -> Dict: return self.data.get("summary", {}) @property def website(self) -> str: return self.data.get("website", "") @property def emails(self) -> List[str]: return self.data.get("emails", []) @property def address(self) -> Dict: return self.data.get("address", {}) @property def service_center(self) -> Dict: return self.data.get("service_center", {}) @property def request_form(self) -> str: return self.data.get("request_form", "") @property def request_time_stats(self) -> Dict: return self.data.get("request_time_stats", {}) def isFOIAonline(self) -> bool: return "foiaonline" in self.request_form.lower() def hasRequestForm(self) -> bool: return bool(self.request_form) AGENCY_LOGOS = { "CFA": "https://upload.wikimedia.org/wikipedia/en/e/e8/U.S._Commission_of_Fine_Arts_logo.png", "CEQ": "https://upload.wikimedia.org/wikipedia/commons/5/58/CEQ-Seal.png", "DOJ": "https://upload.wikimedia.org/wikipedia/commons/f/f8/Seal_of_the_United_States_Department_of_Justice.svg", "EPA": "https://upload.wikimedia.org/wikipedia/commons/7/78/Environmental_Protection_Agency_logo.svg", "FTC": "https://upload.wikimedia.org/wikipedia/commons/5/5b/US-FederalTradeCommission-Seal.svg", "ABMC": "https://upload.wikimedia.org/wikipedia/commons/2/2f/ABMC_seal.png", "AFRH": "https://upload.wikimedia.org/wikipedia/commons/2/2e/Armed_Forces_Retirement_Home_logo.png", "DHS": "https://upload.wikimedia.org/wikipedia/commons/5/52/Seal_of_the_United_States_Department_of_Homeland_Security.svg", "DOC": "https://upload.wikimedia.org/wikipedia/commons/e/e0/Seal_of_the_United_States_Department_of_Commerce.svg", "DOD": "https://upload.wikimedia.org/wikipedia/commons/e/e0/Seal_of_the_United_States_Department_of_Defense_%282004%29.svg", "DOE": "https://upload.wikimedia.org/wikipedia/commons/e/e0/United_States_Department_of_Energy_Seal.svg", "DOI": "https://upload.wikimedia.org/wikipedia/commons/e/e3/Seal_of_the_United_States_Department_of_the_Interior.svg", "DOL": "https://upload.wikimedia.org/wikipedia/commons/4/47/Seal_of_the_United_States_Department_of_Labor.svg", "DOS": "https://upload.wikimedia.org/wikipedia/commons/1/1b/Seal_of_the_United_States_Department_of_State.svg", "DOT": "https://upload.wikimedia.org/wikipedia/commons/8/88/Seal_of_the_United_States_Department_of_Transportation.svg", "HHS": "https://upload.wikimedia.org/wikipedia/commons/0/03/Seal_of_the_U.S._Department_of_Health_and_Human_Services.svg", "HUD": "https://upload.wikimedia.org/wikipedia/commons/6/6e/Seal_of_the_United_States_Department_of_Housing_and_Urban_Development.svg", "NASA": "https://upload.wikimedia.org/wikipedia/commons/e/e5/NASA_logo.svg", "NRC": "https://upload.wikimedia.org/wikipedia/commons/8/85/U.S._Nuclear_Regulatory_Commission_seal.svg", "SSA": "https://upload.wikimedia.org/wikipedia/commons/6/6e/Social_Security_Administration_logo_%282019%29.svg", "USDA": "https://upload.wikimedia.org/wikipedia/commons/0/0f/Seal_of_the_United_States_Department_of_Agriculture.svg", "VA": "https://upload.wikimedia.org/wikipedia/commons/8/87/Seal_of_the_U.S._Department_of_Veterans_Affairs.svg", } def create_search_url_wikipedia(search_query): base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search=" return base_url + search_query.replace(' ', '+').replace('–', '%E2%80%93').replace('&', 'and') def parse_foia_xml(filename: str) -> Dict: try: tree = ET.parse(filename) root = tree.getroot() ns = { 'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03', 'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03', 'nc': 'http://niem.gov/niem/niem-core/2.0', 'j': 'http://niem.gov/niem/domains/jxdm/4.1' } org = root.find(".//nc:Organization", ns) data = { "name": org.find("nc:OrganizationName", ns).text if org is not None else "Unknown", "abbr": org.find("nc:OrganizationAbbreviationText", ns).text if org is not None else "N/A", "fiscal_year": root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A", "pending_start": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsPendingAtStartQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A", "received": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsReceivedQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A", "processed": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsProcessedQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A", "pending_end": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsPendingAtEndQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A", "full_grants": root.find(".//foia:RequestDispositionSection/foia:RequestDisposition/foia:RequestDispositionFullGrantQuantity", ns).text if root.find(".//foia:RequestDispositionSection/foia:RequestDisposition", ns) is not None else "N/A" } return data except ET.ParseError: return {"name": "Error", "abbr": "N/A", "fiscal_year": "N/A", "pending_start": "N/A", "received": "N/A", "processed": "N/A", "pending_end": "N/A", "full_grants": "N/A"} def list_and_display_xml_files(): xml_files = [f for f in os.listdir('.') if f.endswith('.xml')] if not xml_files: st.markdown("No XML files found in the directory. 📂❓") return for xml_file in xml_files: file_name = xml_file[:-4] data = parse_foia_xml(xml_file) with st.expander(f"📋 {file_name} - {data['name']} ({data['abbr']})"): if data['abbr'] in AGENCY_LOGOS: st.image( AGENCY_LOGOS[data['abbr']], caption=f"{data['name']} Seal", width=100 ) else: st.write("🖼️ No logo available for this agency yet.") st.write(f"📅 Fiscal Year: {data['fiscal_year']}") st.write(f"⏳ Pending at Start: {data['pending_start']}") st.write(f"📥 Received: {data['received']}") st.write(f"✅ Processed: {data['processed']}") st.write(f"⏳ Pending at End: {data['pending_end']}") st.write(f"👍 Full Grants: {data['full_grants']}") st.write(f"[Wikipedia - {data['name']}]({create_search_url_wikipedia(data['name'])})") def search_foia_content(query: str, agency: str = None) -> Dict: results = { "query": query, "agency": agency, "results": [ {"title": f"Sample FOIA Response 1 for {query}", "date": "2023-01-01"}, {"title": f"Sample FOIA Response 2 for {query}", "date": "2023-02-01"} ] } return results def main(): AGENCIES = load_agencies_from_xml() st.title("Freedom of Information Act (FOIA) Explorer 🌍📊") st.image( "https://upload.wikimedia.org/wikipedia/en/e/e8/U.S._Commission_of_Fine_Arts_logo.png", caption="Logo of the United States Commission of Fine Arts - Representing U.S. Government Transparency", width=200 ) st.write(""" The Freedom of Information Act (FOIA) empowers individuals by granting access to previously unreleased information and documents controlled by the United States government. Championing transparency and accountability, FOIA serves as a foundation for democratic engagement and open government initiatives. 🎉✨ Below is a list of datasets available under FOIA, alongside guessed Wikipedia URLs for more information. 📚🔍 """) st.markdown(""" - [FOIA.Gov](https://www.foia.gov/foia-dataset-download.html) - [Data.Gov](https://catalog.data.gov/dataset?tags=foia) """) datasets = [ ("Provider Taxonomy", "🩺"), ("Consumer Complaint Database", "📞"), ("Medicare Provider Utilization and Payment Data", "💊"), ("Global Terrorism Database", "🌍"), ("National Nutrient Database", "🍎"), ("Patent Grant Full Text Data", "📜"), ("Toxic Release Inventory", "☣️"), ("Residential Energy Consumption Survey", "🏠") ] st.markdown("### FOIA Datasets and Wikipedia URLs") for dataset, emoji in datasets: st.markdown(f"- {emoji} **{dataset}**: [Wikipedia]({create_search_url_wikipedia(dataset)})") st.header("Agency Browser") agency_names = sorted(list(AGENCIES.keys())) selected_agency = st.selectbox("Select Agency", [""] + agency_names) if selected_agency: agency = Agency(AGENCIES[selected_agency]) st.subheader(f"{agency.name} Details") if agency.summary.get('abbreviation') in AGENCY_LOGOS: st.image( AGENCY_LOGOS[agency.summary.get('abbreviation')], caption=f"{agency.name} Seal", width=100 ) st.write(f"Description: {agency.summary.get('description', 'N/A')}") st.write(f"Abbreviation: {agency.summary.get('abbreviation', 'N/A')}") if agency.website: st.write(f"Website: [{agency.website}]({agency.website})") st.subheader("Contact Information") contact_info = [ "\n".join(agency.address.get("address_lines", [])), agency.address.get("street", ""), f"{agency.address.get('city', '')}, {agency.address.get('state', '')} {agency.address.get('zip', '')}", agency.service_center.get("phone", [""])[0] ] st.write("\n".join([line for line in contact_info if line])) if agency.emails: st.write(f"Email: [{agency.emails[0]}](mailto:{agency.emails[0]})") if agency.hasRequestForm(): form_url = "https://foiaonline.regulations.gov/foia/action/public/request/createRequest" if agency.isFOIAonline() else agency.request_form st.write(f"[Submit FOIA Request]({form_url})") st.subheader("Median Processing Times") if agency.request_time_stats: for year, stats in agency.request_time_stats.items(): col1, col2 = st.columns(2) with col1: st.write(f"Year: {year}") with col2: for key, value in stats.items(): if "median" in key: st.write(f"{key.replace('_median_days', '').title()}: {value} days") st.subheader("Search FOIA Documents") search_query = st.text_input("Enter search query") if st.button("Search") and search_query: with st.spinner("Searching..."): results = search_foia_content(search_query, selected_agency) st.write(f"Found {len(results['results'])} results for '{search_query}':") for result in results["results"]: st.write(f"- {result['title']} ({result['date']})") st.header("FOIA XML Reports") list_and_display_xml_files() if __name__ == "__main__": main()