awacke1 commited on
Commit
fdaad4e
·
verified ·
1 Parent(s): cbe41a4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +199 -0
app.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import requests
4
+ import json
5
+ import os
6
+ import xml.etree.ElementTree as ET
7
+ from typing import Dict, List
8
+
9
+ # Sample agency data
10
+ AGENCIES = {
11
+ "Department of Justice": {
12
+ "name": "Department of Justice",
13
+ "summary": {
14
+ "name": "Department of Justice",
15
+ "description": "The mission of the DOJ is to enforce the law...",
16
+ "abbreviation": "DOJ",
17
+ "website": "https://www.justice.gov"
18
+ },
19
+ "website": "https://www.justice.gov",
20
+ "emails": ["[email protected]"],
21
+ "address": {
22
+ "address_lines": ["950 Pennsylvania Avenue, NW"],
23
+ "street": "",
24
+ "city": "Washington",
25
+ "state": "DC",
26
+ "zip": "20530-0001"
27
+ },
28
+ "service_center": {"phone": ["202-514-2000"]},
29
+ "request_form": "https://www.justice.gov/oip/foia-request",
30
+ "request_time_stats": {
31
+ "2022": {"simple_median_days": 20, "complex_median_days": 45}
32
+ }
33
+ }
34
+ }
35
+
36
+ class Agency:
37
+ def __init__(self, data: Dict):
38
+ self.data = data
39
+
40
+ @property
41
+ def name(self) -> str:
42
+ return self.data.get("name", "")
43
+
44
+ @property
45
+ def summary(self) -> Dict:
46
+ return self.data.get("summary", {})
47
+
48
+ @property
49
+ def website(self) -> str:
50
+ return self.data.get("website", "")
51
+
52
+ @property
53
+ def emails(self) -> List[str]:
54
+ return self.data.get("emails", [])
55
+
56
+ @property
57
+ def address(self) -> Dict:
58
+ return self.data.get("address", {})
59
+
60
+ @property
61
+ def service_center(self) -> Dict:
62
+ return self.data.get("service_center", {})
63
+
64
+ @property
65
+ def request_form(self) -> str:
66
+ return self.data.get("request_form", "")
67
+
68
+ @property
69
+ def request_time_stats(self) -> Dict:
70
+ return self.data.get("request_time_stats", {})
71
+
72
+ def isFOIAonline(self) -> bool:
73
+ return "foiaonline" in self.request_form.lower()
74
+
75
+ def hasRequestForm(self) -> bool:
76
+ return bool(self.request_form)
77
+
78
+ def create_search_url_wikipedia(search_query):
79
+ base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search="
80
+ return base_url + search_query.replace(' ', '+').replace('–', '%E2%80%93').replace('&', 'and')
81
+
82
+ def scan_for_xml_files_and_generate_links():
83
+ xml_files = [f for f in os.listdir('.') if f.endswith('.xml')]
84
+ if not xml_files:
85
+ st.markdown("No XML files found in the directory.")
86
+ return
87
+
88
+ markdown_table = "Filename | Abbreviation | Full Name | Links\n--- | --- | --- | ---\n"
89
+ for xml_file in xml_files:
90
+ try:
91
+ tree = ET.parse(xml_file)
92
+ root = tree.getroot()
93
+ for org in root.findall(".//nc:Organization", namespaces={'nc': 'http://niem.gov/niem/niem-core/2.0'}):
94
+ short_name = org.find("nc:OrganizationAbbreviationText", namespaces={'nc': 'http://niem.gov/niem/niem-core/2.0'})
95
+ long_name = org.find("nc:OrganizationName", namespaces={'nc': 'http://niem.gov/niem/niem-core/2.0'})
96
+ if short_name is not None and long_name is not None:
97
+ links = f"[Abbreviation Wikipedia]({create_search_url_wikipedia(short_name.text)}) | [Full Name Wikipedia]({create_search_url_wikipedia(long_name.text)})"
98
+ markdown_table += f"{xml_file} | {short_name.text} | {long_name.text} | {links}\n"
99
+ except ET.ParseError:
100
+ st.warning(f"Could not parse XML file: {xml_file}")
101
+
102
+ st.markdown(markdown_table)
103
+
104
+ def search_foia_content(query: str, agency: str = None) -> Dict:
105
+ results = {
106
+ "query": query,
107
+ "agency": agency,
108
+ "results": [
109
+ {"title": f"Sample FOIA Response 1 for {query}", "date": "2023-01-01"},
110
+ {"title": f"Sample FOIA Response 2 for {query}", "date": "2023-02-01"}
111
+ ]
112
+ }
113
+ return results
114
+
115
+ def main():
116
+ st.title("Freedom of Information Act (FOIA) Explorer 🌍📊")
117
+
118
+ # Tabs for different views
119
+ tab1, tab2 = st.tabs(["Agency Browser", "FOIA Datasets & XML Scanner"])
120
+
121
+ with tab1:
122
+ st.header("Agency Browser")
123
+ agency_names = sorted(list(AGENCIES.keys()))
124
+ selected_agency = st.selectbox("Select Agency", [""] + agency_names)
125
+
126
+ if selected_agency:
127
+ agency = Agency(AGENCIES[selected_agency])
128
+ st.subheader(f"{agency.name} Details")
129
+ st.write(f"Description: {agency.summary.get('description', 'N/A')}")
130
+ st.write(f"Abbreviation: {agency.summary.get('abbreviation', 'N/A')}")
131
+ if agency.website:
132
+ st.write(f"Website: [{agency.website}]({agency.website})")
133
+
134
+ st.subheader("Contact Information")
135
+ contact_info = [
136
+ "\n".join(agency.address.get("address_lines", [])),
137
+ agency.address.get("street", ""),
138
+ f"{agency.address.get('city', '')}, {agency.address.get('state', '')} {agency.address.get('zip', '')}",
139
+ agency.service_center.get("phone", [""])[0]
140
+ ]
141
+ st.write("\n".join([line for line in contact_info if line]))
142
+ if agency.emails:
143
+ st.write(f"Email: [{agency.emails[0]}](mailto:{agency.emails[0]})")
144
+
145
+ if agency.hasRequestForm():
146
+ form_url = "https://foiaonline.regulations.gov/foia/action/public/request/createRequest" if agency.isFOIAonline() else agency.request_form
147
+ st.write(f"[Submit FOIA Request]({form_url})")
148
+
149
+ st.subheader("Median Processing Times")
150
+ if agency.request_time_stats:
151
+ for year, stats in agency.request_time_stats.items():
152
+ col1, col2 = st.columns(2)
153
+ with col1:
154
+ st.write(f"Year: {year}")
155
+ with col2:
156
+ for key, value in stats.items():
157
+ if "median" in key:
158
+ st.write(f"{key.replace('_median_days', '').title()}: {value} days")
159
+
160
+ st.subheader("Search FOIA Documents")
161
+ search_query = st.text_input("Enter search query")
162
+ if st.button("Search") and search_query:
163
+ with st.spinner("Searching..."):
164
+ results = search_foia_content(search_query, selected_agency)
165
+ st.write(f"Found {len(results['results'])} results for '{search_query}':")
166
+ for result in results["results"]:
167
+ st.write(f"- {result['title']} ({result['date']})")
168
+
169
+ with tab2:
170
+ st.header("FOIA Datasets & XML Scanner")
171
+ st.write("""
172
+ The Freedom of Information Act (FOIA) empowers individuals by granting access to previously unreleased information and documents controlled by the United States government. Championing transparency and accountability, FOIA serves as a foundation for democratic engagement and open government initiatives. 🎉✨
173
+ """)
174
+
175
+ st.markdown("""
176
+ - [FOIA.Gov](https://www.foia.gov/foia-dataset-download.html)
177
+ - [Data.Gov](https://catalog.data.gov/dataset?tags=foia)
178
+ """)
179
+
180
+ datasets = [
181
+ "Provider Taxonomy",
182
+ "Consumer Complaint Database",
183
+ "Medicare Provider Utilization and Payment Data",
184
+ "Global Terrorism Database",
185
+ "National Nutrient Database",
186
+ "Patent Grant Full Text Data",
187
+ "Toxic Release Inventory",
188
+ "Residential Energy Consumption Survey",
189
+ ]
190
+
191
+ st.markdown("### FOIA Datasets and Wikipedia URLs")
192
+ for dataset in datasets:
193
+ st.markdown(f"- **{dataset}**: [Wikipedia]({create_search_url_wikipedia(dataset)})")
194
+
195
+ st.markdown("### Organizations in Found XML Files")
196
+ scan_for_xml_files_and_generate_links()
197
+
198
+ if __name__ == "__main__":
199
+ main()