Spaces:

awacke1
/

Freedom-of-Information-Act-FOIA-Datasets

Sleeping

App Files Files Community

Freedom-of-Information-Act-FOIA-Datasets / app.py

awacke1

Update app.py

0d8fa25 verified 3 months ago

raw

history blame

11.5 kB

	import streamlit as st
	import pandas as pd
	import requests
	import json
	import os
	import xml.etree.ElementTree as ET
	from typing import Dict, List

	# Function to parse XML and build AGENCIES dictionary
	def load_agencies_from_xml() -> Dict:
	AGENCIES = {}
	xml_files = [f for f in os.listdir('.') if f.endswith('.xml')]
	ns = {
	'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03',
	'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03',
	'nc': 'http://niem.gov/niem/niem-core/2.0',
	'j': 'http://niem.gov/niem/domains/jxdm/4.1'
	}

	for xml_file in xml_files:
	try:
	tree = ET.parse(xml_file)
	root = tree.getroot()
	org = root.find(".//nc:Organization", ns)
	if org is not None:
	name = org.find("nc:OrganizationName", ns).text
	abbr = org.find("nc:OrganizationAbbreviationText", ns).text
	fiscal_year = root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A"

	# Build minimal agency data (can be expanded with more XML data)
	AGENCIES[name] = {
	"name": name,
	"summary": {
	"name": name,
	"description": f"FOIA data for {name} ({fiscal_year})",
	"abbreviation": abbr,
	"website": f"https://www.{abbr.lower()}.gov" # Guessed URL, adjust as needed
	},
	"website": f"https://www.{abbr.lower()}.gov",
	"emails": [f"foia@{abbr.lower()}.gov"], # Guessed email
	"address": {"address_lines": [], "street": "", "city": "", "state": "", "zip": ""}, # Placeholder
	"service_center": {"phone": ["N/A"]}, # Placeholder
	"request_form": f"https://www.{abbr.lower()}.gov/foia", # Guessed form URL
	"request_time_stats": {
	fiscal_year: {
	"simple_median_days": float(root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime/foia:SimpleResponseTime/foia:ResponseTimeMedianDaysValue", ns).text) if root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime/foia:SimpleResponseTime/foia:ResponseTimeMedianDaysValue", ns) is not None else 0
	}
	}
	}
	except ET.ParseError:
	st.warning(f"Could not parse {xml_file}")
	return AGENCIES

	class Agency:
	def __init__(self, data: Dict):
	self.data = data

	@property
	def name(self) -> str:
	return self.data.get("name", "")

	@property
	def summary(self) -> Dict:
	return self.data.get("summary", {})

	@property
	def website(self) -> str:
	return self.data.get("website", "")

	@property
	def emails(self) -> List[str]:
	return self.data.get("emails", [])

	@property
	def address(self) -> Dict:
	return self.data.get("address", {})

	@property
	def service_center(self) -> Dict:
	return self.data.get("service_center", {})

	@property
	def request_form(self) -> str:
	return self.data.get("request_form", "")

	@property
	def request_time_stats(self) -> Dict:
	return self.data.get("request_time_stats", {})

	def isFOIAonline(self) -> bool:
	return "foiaonline" in self.request_form.lower()

	def hasRequestForm(self) -> bool:
	return bool(self.request_form)

	def create_search_url_wikipedia(search_query):
	base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search="
	return base_url + search_query.replace(' ', '+').replace('–', '%E2%80%93').replace('&', 'and')

	def parse_foia_xml(filename: str) -> Dict:
	"""Parse FOIA XML file and return key information"""
	try:
	tree = ET.parse(filename)
	root = tree.getroot()
	ns = {
	'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03',
	'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03',
	'nc': 'http://niem.gov/niem/niem-core/2.0',
	'j': 'http://niem.gov/niem/domains/jxdm/4.1'
	}

	org = root.find(".//nc:Organization", ns)
	data = {
	"name": org.find("nc:OrganizationName", ns).text if org is not None else "Unknown",
	"abbr": org.find("nc:OrganizationAbbreviationText", ns).text if org is not None else "N/A",
	"fiscal_year": root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A",
	"pending_start": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsPendingAtStartQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
	"received": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsReceivedQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
	"processed": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsProcessedQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
	"pending_end": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsPendingAtEndQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
	"full_grants": root.find(".//foia:RequestDispositionSection/foia:RequestDisposition/foia:RequestDispositionFullGrantQuantity", ns).text if root.find(".//foia:RequestDispositionSection/foia:RequestDisposition", ns) is not None else "N/A"
	}
	return data
	except ET.ParseError:
	return {"name": "Error", "abbr": "N/A", "fiscal_year": "N/A", "pending_start": "N/A", "received": "N/A", "processed": "N/A", "pending_end": "N/A", "full_grants": "N/A"}

	def list_and_display_xml_files():
	xml_files = [f for f in os.listdir('.') if f.endswith('.xml')]
	if not xml_files:
	st.markdown("No XML files found in the directory. 📂❓")
	return

	for xml_file in xml_files:
	file_name = xml_file[:-4] # Remove .xml extension
	data = parse_foia_xml(xml_file)
	with st.expander(f"📋 {file_name} - {data['name']} ({data['abbr']})"):
	st.write(f"📅 Fiscal Year: {data['fiscal_year']}")
	st.write(f"⏳ Pending at Start: {data['pending_start']}")
	st.write(f"📥 Received: {data['received']}")
	st.write(f"✅ Processed: {data['processed']}")
	st.write(f"⏳ Pending at End: {data['pending_end']}")
	st.write(f"👍 Full Grants: {data['full_grants']}")
	st.write(f"[Wikipedia - {data['name']}]({create_search_url_wikipedia(data['name'])})")

	def search_foia_content(query: str, agency: str = None) -> Dict:
	results = {
	"query": query,
	"agency": agency,
	"results": [
	{"title": f"Sample FOIA Response 1 for {query}", "date": "2023-01-01"},
	{"title": f"Sample FOIA Response 2 for {query}", "date": "2023-02-01"}
	]
	}
	return results

	def main():
	# Load agencies from XML files
	AGENCIES = load_agencies_from_xml()

	st.title("Freedom of Information Act (FOIA) Explorer 🌍📊")

	st.write("""
	The Freedom of Information Act (FOIA) empowers individuals by granting access to previously unreleased information and documents controlled by the United States government. Championing transparency and accountability, FOIA serves as a foundation for democratic engagement and open government initiatives. 🎉✨
	Below is a list of datasets available under FOIA, alongside guessed Wikipedia URLs for more information. 📚🔍
	""")

	st.markdown("""
	- [FOIA.Gov](https://www.foia.gov/foia-dataset-download.html)
	- [Data.Gov](https://catalog.data.gov/dataset?tags=foia)
	""")

	# FOIA Datasets with Emojis
	datasets = [
	("Provider Taxonomy", "🩺"),
	("Consumer Complaint Database", "📞"),
	("Medicare Provider Utilization and Payment Data", "💊"),
	("Global Terrorism Database", "🌍"),
	("National Nutrient Database", "🍎"),
	("Patent Grant Full Text Data", "📜"),
	("Toxic Release Inventory", "☣️"),
	("Residential Energy Consumption Survey", "🏠")
	]

	st.markdown("### FOIA Datasets and Wikipedia URLs")
	for dataset, emoji in datasets:
	st.markdown(f"- {emoji} {dataset}: [Wikipedia]({create_search_url_wikipedia(dataset)})")

	# Agency Browser
	st.header("Agency Browser")
	agency_names = sorted(list(AGENCIES.keys()))
	selected_agency = st.selectbox("Select Agency", [""] + agency_names)

	if selected_agency:
	agency = Agency(AGENCIES[selected_agency])
	st.subheader(f"{agency.name} Details")
	st.write(f"Description: {agency.summary.get('description', 'N/A')}")
	st.write(f"Abbreviation: {agency.summary.get('abbreviation', 'N/A')}")
	if agency.website:
	st.write(f"Website: [{agency.website}]({agency.website})")

	st.subheader("Contact Information")
	contact_info = [
	"\n".join(agency.address.get("address_lines", [])),
	agency.address.get("street", ""),
	f"{agency.address.get('city', '')}, {agency.address.get('state', '')} {agency.address.get('zip', '')}",
	agency.service_center.get("phone", [""])[0]
	]
	st.write("\n".join([line for line in contact_info if line]))
	if agency.emails:
	st.write(f"Email: [{agency.emails[0]}](mailto:{agency.emails[0]})")

	if agency.hasRequestForm():
	form_url = "https://foiaonline.regulations.gov/foia/action/public/request/createRequest" if agency.isFOIAonline() else agency.request_form
	st.write(f"[Submit FOIA Request]({form_url})")

	st.subheader("Median Processing Times")
	if agency.request_time_stats:
	for year, stats in agency.request_time_stats.items():
	col1, col2 = st.columns(2)
	with col1:
	st.write(f"Year: {year}")
	with col2:
	for key, value in stats.items():
	if "median" in key:
	st.write(f"{key.replace('_median_days', '').title()}: {value} days")

	# FOIA Document Search
	st.subheader("Search FOIA Documents")
	search_query = st.text_input("Enter search query")
	if st.button("Search") and search_query:
	with st.spinner("Searching..."):
	results = search_foia_content(search_query, selected_agency)
	st.write(f"Found {len(results['results'])} results for '{search_query}':")
	for result in results["results"]:
	st.write(f"- {result['title']} ({result['date']})")

	# XML Files Display
	st.header("FOIA XML Reports")
	list_and_display_xml_files()

	if __name__ == "__main__":
	main()