|
import streamlit as st |
|
import pandas as pd |
|
import requests |
|
import json |
|
import os |
|
import xml.etree.ElementTree as ET |
|
from typing import Dict, List |
|
|
|
|
|
AGENCIES = { |
|
"Department of Justice": { |
|
"name": "Department of Justice", |
|
"summary": { |
|
"name": "Department of Justice", |
|
"description": "The mission of the DOJ is to enforce the law...", |
|
"abbreviation": "DOJ", |
|
"website": "https://www.justice.gov" |
|
}, |
|
"website": "https://www.justice.gov", |
|
"emails": ["[email protected]"], |
|
"address": { |
|
"address_lines": ["950 Pennsylvania Avenue, NW"], |
|
"street": "", |
|
"city": "Washington", |
|
"state": "DC", |
|
"zip": "20530-0001" |
|
}, |
|
"service_center": {"phone": ["202-514-2000"]}, |
|
"request_form": "https://www.justice.gov/oip/foia-request", |
|
"request_time_stats": { |
|
"2022": {"simple_median_days": 20, "complex_median_days": 45} |
|
} |
|
} |
|
} |
|
|
|
class Agency: |
|
def __init__(self, data: Dict): |
|
self.data = data |
|
|
|
@property |
|
def name(self) -> str: |
|
return self.data.get("name", "") |
|
|
|
@property |
|
def summary(self) -> Dict: |
|
return self.data.get("summary", {}) |
|
|
|
@property |
|
def website(self) -> str: |
|
return self.data.get("website", "") |
|
|
|
@property |
|
def emails(self) -> List[str]: |
|
return self.data.get("emails", []) |
|
|
|
@property |
|
def address(self) -> Dict: |
|
return self.data.get("address", {}) |
|
|
|
@property |
|
def service_center(self) -> Dict: |
|
return self.data.get("service_center", {}) |
|
|
|
@property |
|
def request_form(self) -> str: |
|
return self.data.get("request_form", "") |
|
|
|
@property |
|
def request_time_stats(self) -> Dict: |
|
return self.data.get("request_time_stats", {}) |
|
|
|
def isFOIAonline(self) -> bool: |
|
return "foiaonline" in self.request_form.lower() |
|
|
|
def hasRequestForm(self) -> bool: |
|
return bool(self.request_form) |
|
|
|
def create_search_url_wikipedia(search_query): |
|
base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search=" |
|
return base_url + search_query.replace(' ', '+').replace('β', '%E2%80%93').replace('&', 'and') |
|
|
|
def process_foia_xml_file(filename: str): |
|
"""Process FOIA XML file and display results with emojis""" |
|
try: |
|
tree = ET.parse(filename) |
|
root = tree.getroot() |
|
|
|
|
|
ns = { |
|
'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03', |
|
'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03', |
|
'nc': 'http://niem.gov/niem/niem-core/2.0', |
|
'j': 'http://niem.gov/niem/domains/jxdm/4.1' |
|
} |
|
|
|
|
|
org = root.find(".//nc:Organization", ns) |
|
org_name = org.find("nc:OrganizationName", ns).text if org is not None else "Unknown" |
|
org_abbr = org.find("nc:OrganizationAbbreviationText", ns).text if org is not None else "N/A" |
|
fiscal_year = root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A" |
|
|
|
st.subheader(f"π {org_name} ({org_abbr}) FOIA Report {fiscal_year}") |
|
st.write(f"[Wikipedia - {org_name}]({create_search_url_wikipedia(org_name)})") |
|
|
|
|
|
proc_stats = root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) |
|
if proc_stats is not None: |
|
st.write("π **Request Processing Statistics**") |
|
st.write(f"- Pending at Start: {proc_stats.find('foia:ProcessingStatisticsPendingAtStartQuantity', ns).text} β³") |
|
st.write(f"- Received: {proc_stats.find('foia:ProcessingStatisticsReceivedQuantity', ns).text} π₯") |
|
st.write(f"- Processed: {proc_stats.find('foia:ProcessingStatisticsProcessedQuantity', ns).text} β
") |
|
st.write(f"- Pending at End: {proc_stats.find('foia:ProcessingStatisticsPendingAtEndQuantity', ns).text} β³") |
|
|
|
|
|
disp = root.find(".//foia:RequestDispositionSection/foia:RequestDisposition", ns) |
|
if disp is not None: |
|
st.write("π **Request Dispositions**") |
|
st.write(f"- Full Grants: {disp.find('foia:RequestDispositionFullGrantQuantity', ns).text} β
") |
|
st.write(f"- Partial Grants: {disp.find('foia:RequestDispositionPartialGrantQuantity', ns).text} β") |
|
st.write(f"- Full Denials: {disp.find('foia:RequestDispositionFullExemptionDenialQuantity', ns).text} β") |
|
denial = disp.find('foia:NonExemptionDenial', ns) |
|
if denial is not None: |
|
st.write(f"- Non-Exemption Denial ({denial.find('foia:NonExemptionDenialReasonCode', ns).text}): {denial.find('foia:NonExemptionDenialQuantity', ns).text} π") |
|
|
|
|
|
exemptions = root.findall(".//foia:RequestDispositionAppliedExemptionsSection/foia:ComponentAppliedExemptions/foia:AppliedExemption", ns) |
|
if exemptions: |
|
st.write("π« **Applied Exemptions**") |
|
for ex in exemptions: |
|
code = ex.find('foia:AppliedExemptionCode', ns).text |
|
qty = ex.find('foia:AppliedExemptionQuantity', ns).text |
|
st.write(f"- {code}: {qty} times") |
|
|
|
|
|
resp_time = root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime", ns) |
|
if resp_time is not None: |
|
st.write("β° **Response Times**") |
|
simple = resp_time.find('foia:SimpleResponseTime', ns) |
|
if simple is not None: |
|
st.write(f"- Simple Requests:") |
|
st.write(f" - Median: {simple.find('foia:ResponseTimeMedianDaysValue', ns).text} days β±οΈ") |
|
st.write(f" - Average: {simple.find('foia:ResponseTimeAverageDaysValue', ns).text} days π") |
|
|
|
except ET.ParseError: |
|
st.error(f"β Error parsing XML file: {filename}") |
|
|
|
def list_xml_files(): |
|
xml_files = [f[:-4] for f in os.listdir('.') if f.endswith('.xml')] |
|
if not xml_files: |
|
st.markdown("No XML files found in the directory. πβ") |
|
return [] |
|
return xml_files |
|
|
|
def search_foia_content(query: str, agency: str = None) -> Dict: |
|
results = { |
|
"query": query, |
|
"agency": agency, |
|
"results": [ |
|
{"title": f"Sample FOIA Response 1 for {query}", "date": "2023-01-01"}, |
|
{"title": f"Sample FOIA Response 2 for {query}", "date": "2023-02-01"} |
|
] |
|
} |
|
return results |
|
|
|
def main(): |
|
st.title("Freedom of Information Act (FOIA) Explorer ππ") |
|
|
|
tab1, tab2 = st.tabs(["Agency Browser", "FOIA Datasets & XML Scanner"]) |
|
|
|
with tab1: |
|
st.header("Agency Browser") |
|
agency_names = sorted(list(AGENCIES.keys())) |
|
selected_agency = st.selectbox("Select Agency", [""] + agency_names) |
|
|
|
if selected_agency: |
|
agency = Agency(AGENCIES[selected_agency]) |
|
st.subheader(f"{agency.name} Details") |
|
st.write(f"Description: {agency.summary.get('description', 'N/A')}") |
|
st.write(f"Abbreviation: {agency.summary.get('abbreviation', 'N/A')}") |
|
if agency.website: |
|
st.write(f"Website: [{agency.website}]({agency.website})") |
|
|
|
st.subheader("Contact Information") |
|
contact_info = [ |
|
"\n".join(agency.address.get("address_lines", [])), |
|
agency.address.get("street", ""), |
|
f"{agency.address.get('city', '')}, {agency.address.get('state', '')} {agency.address.get('zip', '')}", |
|
agency.service_center.get("phone", [""])[0] |
|
] |
|
st.write("\n".join([line for line in contact_info if line])) |
|
if agency.emails: |
|
st.write(f"Email: [{agency.emails[0]}](mailto:{agency.emails[0]})") |
|
|
|
if agency.hasRequestForm(): |
|
form_url = "https://foiaonline.regulations.gov/foia/action/public/request/createRequest" if agency.isFOIAonline() else agency.request_form |
|
st.write(f"[Submit FOIA Request]({form_url})") |
|
|
|
st.subheader("Median Processing Times") |
|
if agency.request_time_stats: |
|
for year, stats in agency.request_time_stats.items(): |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.write(f"Year: {year}") |
|
with col2: |
|
for key, value in stats.items(): |
|
if "median" in key: |
|
st.write(f"{key.replace('_median_days', '').title()}: {value} days") |
|
|
|
st.subheader("Search FOIA Documents") |
|
search_query = st.text_input("Enter search query") |
|
if st.button("Search") and search_query: |
|
with st.spinner("Searching..."): |
|
results = search_foia_content(search_query, selected_agency) |
|
st.write(f"Found {len(results['results'])} results for '{search_query}':") |
|
for result in results["results"]: |
|
st.write(f"- {result['title']} ({result['date']})") |
|
|
|
with tab2: |
|
st.header("FOIA Datasets & XML Scanner") |
|
st.write(""" |
|
The Freedom of Information Act (FOIA) empowers individuals by granting access to previously unreleased information and documents controlled by the United States government. Championing transparency and accountability, FOIA serves as a foundation for democratic engagement and open government initiatives. πβ¨ |
|
Below is a list of datasets available under FOIA, alongside guessed Wikipedia URLs for more information. ππ |
|
""") |
|
|
|
st.markdown(""" |
|
- [FOIA.Gov](https://www.foia.gov/foia-dataset-download.html) |
|
- [Data.Gov](https://catalog.data.gov/dataset?tags=foia) |
|
""") |
|
|
|
datasets = [ |
|
"Provider Taxonomy", |
|
"Consumer Complaint Database", |
|
"Medicare Provider Utilization and Payment Data", |
|
"Global Terrorism Database", |
|
"National Nutrient Database", |
|
"Patent Grant Full Text Data", |
|
"Toxic Release Inventory", |
|
"Residential Energy Consumption Survey", |
|
] |
|
|
|
st.markdown("### FOIA Datasets and Wikipedia URLs") |
|
for dataset in datasets: |
|
st.markdown(f"- **{dataset}**: [Wikipedia]({create_search_url_wikipedia(dataset)})") |
|
|
|
st.markdown("### Available FOIA XML Reports") |
|
xml_files = list_xml_files() |
|
for xml_file in xml_files: |
|
col1, col2 = st.columns([3, 1]) |
|
with col1: |
|
st.markdown(f"- [{xml_file}]({xml_file}.xml)") |
|
with col2: |
|
if st.button(f"π Process {xml_file}", key=f"process_{xml_file}"): |
|
process_foia_xml_file(f"{xml_file}.xml") |
|
|
|
if __name__ == "__main__": |
|
main() |