Spaces:

awacke1
/

Freedom-of-Information-Act-FOIA-Datasets

Sleeping

App Files Files Community

awacke1 commited on Mar 19

Commit

fdaad4e

verified ·

1 Parent(s): cbe41a4

Create app.py

Browse files

Files changed (1) hide show

app.py +199 -0

app.py ADDED Viewed

	@@ -0,0 +1,199 @@

+import streamlit as st
+import pandas as pd
+import requests
+import json
+import os
+import xml.etree.ElementTree as ET
+from typing import Dict, List
+# Sample agency data
+AGENCIES = {
+    "Department of Justice": {
+        "name": "Department of Justice",
+        "summary": {
+            "name": "Department of Justice",
+            "description": "The mission of the DOJ is to enforce the law...",
+            "abbreviation": "DOJ",
+            "website": "https://www.justice.gov"
+        },
+        "website": "https://www.justice.gov",
+        "emails": ["[email protected]"],
+        "address": {
+            "address_lines": ["950 Pennsylvania Avenue, NW"],
+            "street": "",
+            "city": "Washington",
+            "state": "DC",
+            "zip": "20530-0001"
+        },
+        "service_center": {"phone": ["202-514-2000"]},
+        "request_form": "https://www.justice.gov/oip/foia-request",
+        "request_time_stats": {
+            "2022": {"simple_median_days": 20, "complex_median_days": 45}
+        }
+    }
+}
+class Agency:
+    def __init__(self, data: Dict):
+        self.data = data
+    @property
+    def name(self) -> str:
+        return self.data.get("name", "")
+    @property
+    def summary(self) -> Dict:
+        return self.data.get("summary", {})
+    @property
+    def website(self) -> str:
+        return self.data.get("website", "")
+    @property
+    def emails(self) -> List[str]:
+        return self.data.get("emails", [])
+    @property
+    def address(self) -> Dict:
+        return self.data.get("address", {})
+    @property
+    def service_center(self) -> Dict:
+        return self.data.get("service_center", {})
+    @property
+    def request_form(self) -> str:
+        return self.data.get("request_form", "")
+    @property
+    def request_time_stats(self) -> Dict:
+        return self.data.get("request_time_stats", {})
+    def isFOIAonline(self) -> bool:
+        return "foiaonline" in self.request_form.lower()
+    def hasRequestForm(self) -> bool:
+        return bool(self.request_form)
+def create_search_url_wikipedia(search_query):
+    base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search="
+    return base_url + search_query.replace(' ', '+').replace('–', '%E2%80%93').replace('&', 'and')
+def scan_for_xml_files_and_generate_links():
+    xml_files = [f for f in os.listdir('.') if f.endswith('.xml')]
+    if not xml_files:
+        st.markdown("No XML files found in the directory.")
+        return
+    markdown_table = "Filename | Abbreviation | Full Name | Links\n--- | --- | --- | ---\n"
+    for xml_file in xml_files:
+        try:
+            tree = ET.parse(xml_file)
+            root = tree.getroot()
+            for org in root.findall(".//nc:Organization", namespaces={'nc': 'http://niem.gov/niem/niem-core/2.0'}):
+                short_name = org.find("nc:OrganizationAbbreviationText", namespaces={'nc': 'http://niem.gov/niem/niem-core/2.0'})
+                long_name = org.find("nc:OrganizationName", namespaces={'nc': 'http://niem.gov/niem/niem-core/2.0'})
+                if short_name is not None and long_name is not None:
+                    links = f"[Abbreviation Wikipedia]({create_search_url_wikipedia(short_name.text)}) | [Full Name Wikipedia]({create_search_url_wikipedia(long_name.text)})"
+                    markdown_table += f"{xml_file} | {short_name.text} | {long_name.text} | {links}\n"
+        except ET.ParseError:
+            st.warning(f"Could not parse XML file: {xml_file}")
+    st.markdown(markdown_table)
+def search_foia_content(query: str, agency: str = None) -> Dict:
+    results = {
+        "query": query,
+        "agency": agency,
+        "results": [
+            {"title": f"Sample FOIA Response 1 for {query}", "date": "2023-01-01"},
+            {"title": f"Sample FOIA Response 2 for {query}", "date": "2023-02-01"}
+        ]
+    }
+    return results
+def main():
+    st.title("Freedom of Information Act (FOIA) Explorer 🌍📊")
+    # Tabs for different views
+    tab1, tab2 = st.tabs(["Agency Browser", "FOIA Datasets & XML Scanner"])
+    with tab1:
+        st.header("Agency Browser")
+        agency_names = sorted(list(AGENCIES.keys()))
+        selected_agency = st.selectbox("Select Agency", [""] + agency_names)
+        if selected_agency:
+            agency = Agency(AGENCIES[selected_agency])
+            st.subheader(f"{agency.name} Details")
+            st.write(f"Description: {agency.summary.get('description', 'N/A')}")
+            st.write(f"Abbreviation: {agency.summary.get('abbreviation', 'N/A')}")
+            if agency.website:
+                st.write(f"Website: [{agency.website}]({agency.website})")
+            st.subheader("Contact Information")
+            contact_info = [
+                "\n".join(agency.address.get("address_lines", [])),
+                agency.address.get("street", ""),
+                f"{agency.address.get('city', '')}, {agency.address.get('state', '')} {agency.address.get('zip', '')}",
+                agency.service_center.get("phone", [""])[0]
+            ]
+            st.write("\n".join([line for line in contact_info if line]))
+            if agency.emails:
+                st.write(f"Email: [{agency.emails[0]}](mailto:{agency.emails[0]})")
+            if agency.hasRequestForm():
+                form_url = "https://foiaonline.regulations.gov/foia/action/public/request/createRequest" if agency.isFOIAonline() else agency.request_form
+                st.write(f"[Submit FOIA Request]({form_url})")
+            st.subheader("Median Processing Times")
+            if agency.request_time_stats:
+                for year, stats in agency.request_time_stats.items():
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        st.write(f"Year: {year}")
+                    with col2:
+                        for key, value in stats.items():
+                            if "median" in key:
+                                st.write(f"{key.replace('_median_days', '').title()}: {value} days")
+        st.subheader("Search FOIA Documents")
+        search_query = st.text_input("Enter search query")
+        if st.button("Search") and search_query:
+            with st.spinner("Searching..."):
+                results = search_foia_content(search_query, selected_agency)
+                st.write(f"Found {len(results['results'])} results for '{search_query}':")
+                for result in results["results"]:
+                    st.write(f"- {result['title']} ({result['date']})")
+    with tab2:
+        st.header("FOIA Datasets & XML Scanner")
+        st.write("""
+        The Freedom of Information Act (FOIA) empowers individuals by granting access to previously unreleased information and documents controlled by the United States government. Championing transparency and accountability, FOIA serves as a foundation for democratic engagement and open government initiatives. 🎉✨
+        """)
+        st.markdown("""
+        - [FOIA.Gov](https://www.foia.gov/foia-dataset-download.html)
+        - [Data.Gov](https://catalog.data.gov/dataset?tags=foia)
+        """)
+        datasets = [
+            "Provider Taxonomy",
+            "Consumer Complaint Database",
+            "Medicare Provider Utilization and Payment Data",
+            "Global Terrorism Database",
+            "National Nutrient Database",
+            "Patent Grant Full Text Data",
+            "Toxic Release Inventory",
+            "Residential Energy Consumption Survey",
+        ]
+        st.markdown("### FOIA Datasets and Wikipedia URLs")
+        for dataset in datasets:
+            st.markdown(f"- **{dataset}**: [Wikipedia]({create_search_url_wikipedia(dataset)})")
+        st.markdown("### Organizations in Found XML Files")
+        scan_for_xml_files_and_generate_links()
+if __name__ == "__main__":
+    main()