awacke1 commited on
Commit
0d8fa25
Β·
verified Β·
1 Parent(s): d447935

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -166
app.py CHANGED
@@ -6,32 +6,50 @@ import os
6
  import xml.etree.ElementTree as ET
7
  from typing import Dict, List
8
 
9
- # Sample agency data
10
- AGENCIES = {
11
- "Department of Justice": {
12
- "name": "Department of Justice",
13
- "summary": {
14
- "name": "Department of Justice",
15
- "description": "The mission of the DOJ is to enforce the law...",
16
- "abbreviation": "DOJ",
17
- "website": "https://www.justice.gov"
18
- },
19
- "website": "https://www.justice.gov",
20
- "emails": ["[email protected]"],
21
- "address": {
22
- "address_lines": ["950 Pennsylvania Avenue, NW"],
23
- "street": "",
24
- "city": "Washington",
25
- "state": "DC",
26
- "zip": "20530-0001"
27
- },
28
- "service_center": {"phone": ["202-514-2000"]},
29
- "request_form": "https://www.justice.gov/oip/foia-request",
30
- "request_time_stats": {
31
- "2022": {"simple_median_days": 20, "complex_median_days": 45}
32
- }
33
  }
34
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  class Agency:
37
  def __init__(self, data: Dict):
@@ -79,77 +97,50 @@ def create_search_url_wikipedia(search_query):
79
  base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search="
80
  return base_url + search_query.replace(' ', '+').replace('–', '%E2%80%93').replace('&', 'and')
81
 
82
- def process_foia_xml_file(filename: str):
83
- """Process FOIA XML file and display results with emojis"""
84
  try:
85
  tree = ET.parse(filename)
86
  root = tree.getroot()
87
-
88
- # Namespace dictionary
89
  ns = {
90
  'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03',
91
  'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03',
92
  'nc': 'http://niem.gov/niem/niem-core/2.0',
93
  'j': 'http://niem.gov/niem/domains/jxdm/4.1'
94
  }
95
-
96
- # Extract basic info
97
- org = root.find(".//nc:Organization", ns)
98
- org_name = org.find("nc:OrganizationName", ns).text if org is not None else "Unknown"
99
- org_abbr = org.find("nc:OrganizationAbbreviationText", ns).text if org is not None else "N/A"
100
- fiscal_year = root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A"
101
 
102
- st.subheader(f"πŸ“‹ {org_name} ({org_abbr}) FOIA Report {fiscal_year}")
103
- st.write(f"[Wikipedia - {org_name}]({create_search_url_wikipedia(org_name)})")
104
-
105
- # Processing Statistics
106
- proc_stats = root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns)
107
- if proc_stats is not None:
108
- st.write("πŸ“Š **Request Processing Statistics**")
109
- st.write(f"- Pending at Start: {proc_stats.find('foia:ProcessingStatisticsPendingAtStartQuantity', ns).text} ⏳")
110
- st.write(f"- Received: {proc_stats.find('foia:ProcessingStatisticsReceivedQuantity', ns).text} πŸ“₯")
111
- st.write(f"- Processed: {proc_stats.find('foia:ProcessingStatisticsProcessedQuantity', ns).text} βœ…")
112
- st.write(f"- Pending at End: {proc_stats.find('foia:ProcessingStatisticsPendingAtEndQuantity', ns).text} ⏳")
113
-
114
- # Request Dispositions
115
- disp = root.find(".//foia:RequestDispositionSection/foia:RequestDisposition", ns)
116
- if disp is not None:
117
- st.write("πŸ“‘ **Request Dispositions**")
118
- st.write(f"- Full Grants: {disp.find('foia:RequestDispositionFullGrantQuantity', ns).text} βœ…")
119
- st.write(f"- Partial Grants: {disp.find('foia:RequestDispositionPartialGrantQuantity', ns).text} βž•")
120
- st.write(f"- Full Denials: {disp.find('foia:RequestDispositionFullExemptionDenialQuantity', ns).text} ❌")
121
- denial = disp.find('foia:NonExemptionDenial', ns)
122
- if denial is not None:
123
- st.write(f"- Non-Exemption Denial ({denial.find('foia:NonExemptionDenialReasonCode', ns).text}): {denial.find('foia:NonExemptionDenialQuantity', ns).text} πŸ“­")
124
-
125
- # Applied Exemptions
126
- exemptions = root.findall(".//foia:RequestDispositionAppliedExemptionsSection/foia:ComponentAppliedExemptions/foia:AppliedExemption", ns)
127
- if exemptions:
128
- st.write("🚫 **Applied Exemptions**")
129
- for ex in exemptions:
130
- code = ex.find('foia:AppliedExemptionCode', ns).text
131
- qty = ex.find('foia:AppliedExemptionQuantity', ns).text
132
- st.write(f"- {code}: {qty} times")
133
-
134
- # Response Times
135
- resp_time = root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime", ns)
136
- if resp_time is not None:
137
- st.write("⏰ **Response Times**")
138
- simple = resp_time.find('foia:SimpleResponseTime', ns)
139
- if simple is not None:
140
- st.write(f"- Simple Requests:")
141
- st.write(f" - Median: {simple.find('foia:ResponseTimeMedianDaysValue', ns).text} days ⏱️")
142
- st.write(f" - Average: {simple.find('foia:ResponseTimeAverageDaysValue', ns).text} days πŸ“ˆ")
143
-
144
  except ET.ParseError:
145
- st.error(f"❌ Error parsing XML file: {filename}")
146
 
147
- def list_xml_files():
148
- xml_files = [f[:-4] for f in os.listdir('.') if f.endswith('.xml')]
149
  if not xml_files:
150
  st.markdown("No XML files found in the directory. πŸ“‚β“")
151
- return []
152
- return xml_files
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  def search_foia_content(query: str, agency: str = None) -> Dict:
155
  results = {
@@ -163,94 +154,89 @@ def search_foia_content(query: str, agency: str = None) -> Dict:
163
  return results
164
 
165
  def main():
 
 
 
166
  st.title("Freedom of Information Act (FOIA) Explorer πŸŒπŸ“Š")
167
 
168
- tab1, tab2 = st.tabs(["Agency Browser", "FOIA Datasets & XML Scanner"])
169
-
170
- with tab1:
171
- st.header("Agency Browser")
172
- agency_names = sorted(list(AGENCIES.keys()))
173
- selected_agency = st.selectbox("Select Agency", [""] + agency_names)
174
-
175
- if selected_agency:
176
- agency = Agency(AGENCIES[selected_agency])
177
- st.subheader(f"{agency.name} Details")
178
- st.write(f"Description: {agency.summary.get('description', 'N/A')}")
179
- st.write(f"Abbreviation: {agency.summary.get('abbreviation', 'N/A')}")
180
- if agency.website:
181
- st.write(f"Website: [{agency.website}]({agency.website})")
182
-
183
- st.subheader("Contact Information")
184
- contact_info = [
185
- "\n".join(agency.address.get("address_lines", [])),
186
- agency.address.get("street", ""),
187
- f"{agency.address.get('city', '')}, {agency.address.get('state', '')} {agency.address.get('zip', '')}",
188
- agency.service_center.get("phone", [""])[0]
189
- ]
190
- st.write("\n".join([line for line in contact_info if line]))
191
- if agency.emails:
192
- st.write(f"Email: [{agency.emails[0]}](mailto:{agency.emails[0]})")
193
-
194
- if agency.hasRequestForm():
195
- form_url = "https://foiaonline.regulations.gov/foia/action/public/request/createRequest" if agency.isFOIAonline() else agency.request_form
196
- st.write(f"[Submit FOIA Request]({form_url})")
197
-
198
- st.subheader("Median Processing Times")
199
- if agency.request_time_stats:
200
- for year, stats in agency.request_time_stats.items():
201
- col1, col2 = st.columns(2)
202
- with col1:
203
- st.write(f"Year: {year}")
204
- with col2:
205
- for key, value in stats.items():
206
- if "median" in key:
207
- st.write(f"{key.replace('_median_days', '').title()}: {value} days")
208
-
209
- st.subheader("Search FOIA Documents")
210
- search_query = st.text_input("Enter search query")
211
- if st.button("Search") and search_query:
212
- with st.spinner("Searching..."):
213
- results = search_foia_content(search_query, selected_agency)
214
- st.write(f"Found {len(results['results'])} results for '{search_query}':")
215
- for result in results["results"]:
216
- st.write(f"- {result['title']} ({result['date']})")
217
-
218
- with tab2:
219
- st.header("FOIA Datasets & XML Scanner")
220
- st.write("""
221
- The Freedom of Information Act (FOIA) empowers individuals by granting access to previously unreleased information and documents controlled by the United States government. Championing transparency and accountability, FOIA serves as a foundation for democratic engagement and open government initiatives. πŸŽ‰βœ¨
222
- Below is a list of datasets available under FOIA, alongside guessed Wikipedia URLs for more information. πŸ“šπŸ”
223
- """)
224
 
225
- st.markdown("""
226
- - [FOIA.Gov](https://www.foia.gov/foia-dataset-download.html)
227
- - [Data.Gov](https://catalog.data.gov/dataset?tags=foia)
228
- """)
229
-
230
- datasets = [
231
- "Provider Taxonomy",
232
- "Consumer Complaint Database",
233
- "Medicare Provider Utilization and Payment Data",
234
- "Global Terrorism Database",
235
- "National Nutrient Database",
236
- "Patent Grant Full Text Data",
237
- "Toxic Release Inventory",
238
- "Residential Energy Consumption Survey",
239
  ]
240
-
241
- st.markdown("### FOIA Datasets and Wikipedia URLs")
242
- for dataset in datasets:
243
- st.markdown(f"- **{dataset}**: [Wikipedia]({create_search_url_wikipedia(dataset)})")
244
-
245
- st.markdown("### Available FOIA XML Reports")
246
- xml_files = list_xml_files()
247
- for xml_file in xml_files:
248
- col1, col2 = st.columns([3, 1])
249
- with col1:
250
- st.markdown(f"- [{xml_file}]({xml_file}.xml)")
251
- with col2:
252
- if st.button(f"πŸ“Š Process {xml_file}", key=f"process_{xml_file}"):
253
- process_foia_xml_file(f"{xml_file}.xml")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
  if __name__ == "__main__":
256
  main()
 
6
  import xml.etree.ElementTree as ET
7
  from typing import Dict, List
8
 
9
+ # Function to parse XML and build AGENCIES dictionary
10
+ def load_agencies_from_xml() -> Dict:
11
+ AGENCIES = {}
12
+ xml_files = [f for f in os.listdir('.') if f.endswith('.xml')]
13
+ ns = {
14
+ 'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03',
15
+ 'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03',
16
+ 'nc': 'http://niem.gov/niem/niem-core/2.0',
17
+ 'j': 'http://niem.gov/niem/domains/jxdm/4.1'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
+
20
+ for xml_file in xml_files:
21
+ try:
22
+ tree = ET.parse(xml_file)
23
+ root = tree.getroot()
24
+ org = root.find(".//nc:Organization", ns)
25
+ if org is not None:
26
+ name = org.find("nc:OrganizationName", ns).text
27
+ abbr = org.find("nc:OrganizationAbbreviationText", ns).text
28
+ fiscal_year = root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A"
29
+
30
+ # Build minimal agency data (can be expanded with more XML data)
31
+ AGENCIES[name] = {
32
+ "name": name,
33
+ "summary": {
34
+ "name": name,
35
+ "description": f"FOIA data for {name} ({fiscal_year})",
36
+ "abbreviation": abbr,
37
+ "website": f"https://www.{abbr.lower()}.gov" # Guessed URL, adjust as needed
38
+ },
39
+ "website": f"https://www.{abbr.lower()}.gov",
40
+ "emails": [f"foia@{abbr.lower()}.gov"], # Guessed email
41
+ "address": {"address_lines": [], "street": "", "city": "", "state": "", "zip": ""}, # Placeholder
42
+ "service_center": {"phone": ["N/A"]}, # Placeholder
43
+ "request_form": f"https://www.{abbr.lower()}.gov/foia", # Guessed form URL
44
+ "request_time_stats": {
45
+ fiscal_year: {
46
+ "simple_median_days": float(root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime/foia:SimpleResponseTime/foia:ResponseTimeMedianDaysValue", ns).text) if root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime/foia:SimpleResponseTime/foia:ResponseTimeMedianDaysValue", ns) is not None else 0
47
+ }
48
+ }
49
+ }
50
+ except ET.ParseError:
51
+ st.warning(f"Could not parse {xml_file}")
52
+ return AGENCIES
53
 
54
  class Agency:
55
  def __init__(self, data: Dict):
 
97
  base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search="
98
  return base_url + search_query.replace(' ', '+').replace('–', '%E2%80%93').replace('&', 'and')
99
 
100
+ def parse_foia_xml(filename: str) -> Dict:
101
+ """Parse FOIA XML file and return key information"""
102
  try:
103
  tree = ET.parse(filename)
104
  root = tree.getroot()
 
 
105
  ns = {
106
  'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03',
107
  'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03',
108
  'nc': 'http://niem.gov/niem/niem-core/2.0',
109
  'j': 'http://niem.gov/niem/domains/jxdm/4.1'
110
  }
 
 
 
 
 
 
111
 
112
+ org = root.find(".//nc:Organization", ns)
113
+ data = {
114
+ "name": org.find("nc:OrganizationName", ns).text if org is not None else "Unknown",
115
+ "abbr": org.find("nc:OrganizationAbbreviationText", ns).text if org is not None else "N/A",
116
+ "fiscal_year": root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A",
117
+ "pending_start": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsPendingAtStartQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
118
+ "received": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsReceivedQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
119
+ "processed": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsProcessedQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
120
+ "pending_end": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsPendingAtEndQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
121
+ "full_grants": root.find(".//foia:RequestDispositionSection/foia:RequestDisposition/foia:RequestDispositionFullGrantQuantity", ns).text if root.find(".//foia:RequestDispositionSection/foia:RequestDisposition", ns) is not None else "N/A"
122
+ }
123
+ return data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  except ET.ParseError:
125
+ return {"name": "Error", "abbr": "N/A", "fiscal_year": "N/A", "pending_start": "N/A", "received": "N/A", "processed": "N/A", "pending_end": "N/A", "full_grants": "N/A"}
126
 
127
+ def list_and_display_xml_files():
128
+ xml_files = [f for f in os.listdir('.') if f.endswith('.xml')]
129
  if not xml_files:
130
  st.markdown("No XML files found in the directory. πŸ“‚β“")
131
+ return
132
+
133
+ for xml_file in xml_files:
134
+ file_name = xml_file[:-4] # Remove .xml extension
135
+ data = parse_foia_xml(xml_file)
136
+ with st.expander(f"πŸ“‹ {file_name} - {data['name']} ({data['abbr']})"):
137
+ st.write(f"πŸ“… Fiscal Year: {data['fiscal_year']}")
138
+ st.write(f"⏳ Pending at Start: {data['pending_start']}")
139
+ st.write(f"πŸ“₯ Received: {data['received']}")
140
+ st.write(f"βœ… Processed: {data['processed']}")
141
+ st.write(f"⏳ Pending at End: {data['pending_end']}")
142
+ st.write(f"πŸ‘ Full Grants: {data['full_grants']}")
143
+ st.write(f"[Wikipedia - {data['name']}]({create_search_url_wikipedia(data['name'])})")
144
 
145
  def search_foia_content(query: str, agency: str = None) -> Dict:
146
  results = {
 
154
  return results
155
 
156
  def main():
157
+ # Load agencies from XML files
158
+ AGENCIES = load_agencies_from_xml()
159
+
160
  st.title("Freedom of Information Act (FOIA) Explorer πŸŒπŸ“Š")
161
 
162
+ st.write("""
163
+ The Freedom of Information Act (FOIA) empowers individuals by granting access to previously unreleased information and documents controlled by the United States government. Championing transparency and accountability, FOIA serves as a foundation for democratic engagement and open government initiatives. πŸŽ‰βœ¨
164
+ Below is a list of datasets available under FOIA, alongside guessed Wikipedia URLs for more information. πŸ“šπŸ”
165
+ """)
166
+
167
+ st.markdown("""
168
+ - [FOIA.Gov](https://www.foia.gov/foia-dataset-download.html)
169
+ - [Data.Gov](https://catalog.data.gov/dataset?tags=foia)
170
+ """)
171
+
172
+ # FOIA Datasets with Emojis
173
+ datasets = [
174
+ ("Provider Taxonomy", "🩺"),
175
+ ("Consumer Complaint Database", "πŸ“ž"),
176
+ ("Medicare Provider Utilization and Payment Data", "πŸ’Š"),
177
+ ("Global Terrorism Database", "🌍"),
178
+ ("National Nutrient Database", "🍎"),
179
+ ("Patent Grant Full Text Data", "πŸ“œ"),
180
+ ("Toxic Release Inventory", "☣️"),
181
+ ("Residential Energy Consumption Survey", "🏠")
182
+ ]
183
+
184
+ st.markdown("### FOIA Datasets and Wikipedia URLs")
185
+ for dataset, emoji in datasets:
186
+ st.markdown(f"- {emoji} **{dataset}**: [Wikipedia]({create_search_url_wikipedia(dataset)})")
187
+
188
+ # Agency Browser
189
+ st.header("Agency Browser")
190
+ agency_names = sorted(list(AGENCIES.keys()))
191
+ selected_agency = st.selectbox("Select Agency", [""] + agency_names)
192
+
193
+ if selected_agency:
194
+ agency = Agency(AGENCIES[selected_agency])
195
+ st.subheader(f"{agency.name} Details")
196
+ st.write(f"Description: {agency.summary.get('description', 'N/A')}")
197
+ st.write(f"Abbreviation: {agency.summary.get('abbreviation', 'N/A')}")
198
+ if agency.website:
199
+ st.write(f"Website: [{agency.website}]({agency.website})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
+ st.subheader("Contact Information")
202
+ contact_info = [
203
+ "\n".join(agency.address.get("address_lines", [])),
204
+ agency.address.get("street", ""),
205
+ f"{agency.address.get('city', '')}, {agency.address.get('state', '')} {agency.address.get('zip', '')}",
206
+ agency.service_center.get("phone", [""])[0]
 
 
 
 
 
 
 
 
207
  ]
208
+ st.write("\n".join([line for line in contact_info if line]))
209
+ if agency.emails:
210
+ st.write(f"Email: [{agency.emails[0]}](mailto:{agency.emails[0]})")
211
+
212
+ if agency.hasRequestForm():
213
+ form_url = "https://foiaonline.regulations.gov/foia/action/public/request/createRequest" if agency.isFOIAonline() else agency.request_form
214
+ st.write(f"[Submit FOIA Request]({form_url})")
215
+
216
+ st.subheader("Median Processing Times")
217
+ if agency.request_time_stats:
218
+ for year, stats in agency.request_time_stats.items():
219
+ col1, col2 = st.columns(2)
220
+ with col1:
221
+ st.write(f"Year: {year}")
222
+ with col2:
223
+ for key, value in stats.items():
224
+ if "median" in key:
225
+ st.write(f"{key.replace('_median_days', '').title()}: {value} days")
226
+
227
+ # FOIA Document Search
228
+ st.subheader("Search FOIA Documents")
229
+ search_query = st.text_input("Enter search query")
230
+ if st.button("Search") and search_query:
231
+ with st.spinner("Searching..."):
232
+ results = search_foia_content(search_query, selected_agency)
233
+ st.write(f"Found {len(results['results'])} results for '{search_query}':")
234
+ for result in results["results"]:
235
+ st.write(f"- {result['title']} ({result['date']})")
236
+
237
+ # XML Files Display
238
+ st.header("FOIA XML Reports")
239
+ list_and_display_xml_files()
240
 
241
  if __name__ == "__main__":
242
  main()