Update app.py
Browse files
app.py
CHANGED
@@ -6,32 +6,50 @@ import os
|
|
6 |
import xml.etree.ElementTree as ET
|
7 |
from typing import Dict, List
|
8 |
|
9 |
-
#
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
},
|
19 |
-
"website": "https://www.justice.gov",
|
20 |
-
"emails": ["[email protected]"],
|
21 |
-
"address": {
|
22 |
-
"address_lines": ["950 Pennsylvania Avenue, NW"],
|
23 |
-
"street": "",
|
24 |
-
"city": "Washington",
|
25 |
-
"state": "DC",
|
26 |
-
"zip": "20530-0001"
|
27 |
-
},
|
28 |
-
"service_center": {"phone": ["202-514-2000"]},
|
29 |
-
"request_form": "https://www.justice.gov/oip/foia-request",
|
30 |
-
"request_time_stats": {
|
31 |
-
"2022": {"simple_median_days": 20, "complex_median_days": 45}
|
32 |
-
}
|
33 |
}
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
class Agency:
|
37 |
def __init__(self, data: Dict):
|
@@ -79,77 +97,50 @@ def create_search_url_wikipedia(search_query):
|
|
79 |
base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search="
|
80 |
return base_url + search_query.replace(' ', '+').replace('β', '%E2%80%93').replace('&', 'and')
|
81 |
|
82 |
-
def
|
83 |
-
"""
|
84 |
try:
|
85 |
tree = ET.parse(filename)
|
86 |
root = tree.getroot()
|
87 |
-
|
88 |
-
# Namespace dictionary
|
89 |
ns = {
|
90 |
'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03',
|
91 |
'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03',
|
92 |
'nc': 'http://niem.gov/niem/niem-core/2.0',
|
93 |
'j': 'http://niem.gov/niem/domains/jxdm/4.1'
|
94 |
}
|
95 |
-
|
96 |
-
# Extract basic info
|
97 |
-
org = root.find(".//nc:Organization", ns)
|
98 |
-
org_name = org.find("nc:OrganizationName", ns).text if org is not None else "Unknown"
|
99 |
-
org_abbr = org.find("nc:OrganizationAbbreviationText", ns).text if org is not None else "N/A"
|
100 |
-
fiscal_year = root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A"
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
# Request Dispositions
|
115 |
-
disp = root.find(".//foia:RequestDispositionSection/foia:RequestDisposition", ns)
|
116 |
-
if disp is not None:
|
117 |
-
st.write("π **Request Dispositions**")
|
118 |
-
st.write(f"- Full Grants: {disp.find('foia:RequestDispositionFullGrantQuantity', ns).text} β
")
|
119 |
-
st.write(f"- Partial Grants: {disp.find('foia:RequestDispositionPartialGrantQuantity', ns).text} β")
|
120 |
-
st.write(f"- Full Denials: {disp.find('foia:RequestDispositionFullExemptionDenialQuantity', ns).text} β")
|
121 |
-
denial = disp.find('foia:NonExemptionDenial', ns)
|
122 |
-
if denial is not None:
|
123 |
-
st.write(f"- Non-Exemption Denial ({denial.find('foia:NonExemptionDenialReasonCode', ns).text}): {denial.find('foia:NonExemptionDenialQuantity', ns).text} π")
|
124 |
-
|
125 |
-
# Applied Exemptions
|
126 |
-
exemptions = root.findall(".//foia:RequestDispositionAppliedExemptionsSection/foia:ComponentAppliedExemptions/foia:AppliedExemption", ns)
|
127 |
-
if exemptions:
|
128 |
-
st.write("π« **Applied Exemptions**")
|
129 |
-
for ex in exemptions:
|
130 |
-
code = ex.find('foia:AppliedExemptionCode', ns).text
|
131 |
-
qty = ex.find('foia:AppliedExemptionQuantity', ns).text
|
132 |
-
st.write(f"- {code}: {qty} times")
|
133 |
-
|
134 |
-
# Response Times
|
135 |
-
resp_time = root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime", ns)
|
136 |
-
if resp_time is not None:
|
137 |
-
st.write("β° **Response Times**")
|
138 |
-
simple = resp_time.find('foia:SimpleResponseTime', ns)
|
139 |
-
if simple is not None:
|
140 |
-
st.write(f"- Simple Requests:")
|
141 |
-
st.write(f" - Median: {simple.find('foia:ResponseTimeMedianDaysValue', ns).text} days β±οΈ")
|
142 |
-
st.write(f" - Average: {simple.find('foia:ResponseTimeAverageDaysValue', ns).text} days π")
|
143 |
-
|
144 |
except ET.ParseError:
|
145 |
-
|
146 |
|
147 |
-
def
|
148 |
-
xml_files = [f
|
149 |
if not xml_files:
|
150 |
st.markdown("No XML files found in the directory. πβ")
|
151 |
-
return
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
def search_foia_content(query: str, agency: str = None) -> Dict:
|
155 |
results = {
|
@@ -163,94 +154,89 @@ def search_foia_content(query: str, agency: str = None) -> Dict:
|
|
163 |
return results
|
164 |
|
165 |
def main():
|
|
|
|
|
|
|
166 |
st.title("Freedom of Information Act (FOIA) Explorer ππ")
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
if "median" in key:
|
207 |
-
st.write(f"{key.replace('_median_days', '').title()}: {value} days")
|
208 |
-
|
209 |
-
st.subheader("Search FOIA Documents")
|
210 |
-
search_query = st.text_input("Enter search query")
|
211 |
-
if st.button("Search") and search_query:
|
212 |
-
with st.spinner("Searching..."):
|
213 |
-
results = search_foia_content(search_query, selected_agency)
|
214 |
-
st.write(f"Found {len(results['results'])} results for '{search_query}':")
|
215 |
-
for result in results["results"]:
|
216 |
-
st.write(f"- {result['title']} ({result['date']})")
|
217 |
-
|
218 |
-
with tab2:
|
219 |
-
st.header("FOIA Datasets & XML Scanner")
|
220 |
-
st.write("""
|
221 |
-
The Freedom of Information Act (FOIA) empowers individuals by granting access to previously unreleased information and documents controlled by the United States government. Championing transparency and accountability, FOIA serves as a foundation for democratic engagement and open government initiatives. πβ¨
|
222 |
-
Below is a list of datasets available under FOIA, alongside guessed Wikipedia URLs for more information. ππ
|
223 |
-
""")
|
224 |
|
225 |
-
st.
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
"Provider Taxonomy",
|
232 |
-
"Consumer Complaint Database",
|
233 |
-
"Medicare Provider Utilization and Payment Data",
|
234 |
-
"Global Terrorism Database",
|
235 |
-
"National Nutrient Database",
|
236 |
-
"Patent Grant Full Text Data",
|
237 |
-
"Toxic Release Inventory",
|
238 |
-
"Residential Energy Consumption Survey",
|
239 |
]
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
|
255 |
if __name__ == "__main__":
|
256 |
main()
|
|
|
6 |
import xml.etree.ElementTree as ET
|
7 |
from typing import Dict, List
|
8 |
|
9 |
+
# Function to parse XML and build AGENCIES dictionary
|
10 |
+
def load_agencies_from_xml() -> Dict:
|
11 |
+
AGENCIES = {}
|
12 |
+
xml_files = [f for f in os.listdir('.') if f.endswith('.xml')]
|
13 |
+
ns = {
|
14 |
+
'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03',
|
15 |
+
'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03',
|
16 |
+
'nc': 'http://niem.gov/niem/niem-core/2.0',
|
17 |
+
'j': 'http://niem.gov/niem/domains/jxdm/4.1'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
}
|
19 |
+
|
20 |
+
for xml_file in xml_files:
|
21 |
+
try:
|
22 |
+
tree = ET.parse(xml_file)
|
23 |
+
root = tree.getroot()
|
24 |
+
org = root.find(".//nc:Organization", ns)
|
25 |
+
if org is not None:
|
26 |
+
name = org.find("nc:OrganizationName", ns).text
|
27 |
+
abbr = org.find("nc:OrganizationAbbreviationText", ns).text
|
28 |
+
fiscal_year = root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A"
|
29 |
+
|
30 |
+
# Build minimal agency data (can be expanded with more XML data)
|
31 |
+
AGENCIES[name] = {
|
32 |
+
"name": name,
|
33 |
+
"summary": {
|
34 |
+
"name": name,
|
35 |
+
"description": f"FOIA data for {name} ({fiscal_year})",
|
36 |
+
"abbreviation": abbr,
|
37 |
+
"website": f"https://www.{abbr.lower()}.gov" # Guessed URL, adjust as needed
|
38 |
+
},
|
39 |
+
"website": f"https://www.{abbr.lower()}.gov",
|
40 |
+
"emails": [f"foia@{abbr.lower()}.gov"], # Guessed email
|
41 |
+
"address": {"address_lines": [], "street": "", "city": "", "state": "", "zip": ""}, # Placeholder
|
42 |
+
"service_center": {"phone": ["N/A"]}, # Placeholder
|
43 |
+
"request_form": f"https://www.{abbr.lower()}.gov/foia", # Guessed form URL
|
44 |
+
"request_time_stats": {
|
45 |
+
fiscal_year: {
|
46 |
+
"simple_median_days": float(root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime/foia:SimpleResponseTime/foia:ResponseTimeMedianDaysValue", ns).text) if root.find(".//foia:ProcessedResponseTimeSection/foia:ProcessedResponseTime/foia:SimpleResponseTime/foia:ResponseTimeMedianDaysValue", ns) is not None else 0
|
47 |
+
}
|
48 |
+
}
|
49 |
+
}
|
50 |
+
except ET.ParseError:
|
51 |
+
st.warning(f"Could not parse {xml_file}")
|
52 |
+
return AGENCIES
|
53 |
|
54 |
class Agency:
|
55 |
def __init__(self, data: Dict):
|
|
|
97 |
base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search="
|
98 |
return base_url + search_query.replace(' ', '+').replace('β', '%E2%80%93').replace('&', 'and')
|
99 |
|
100 |
+
def parse_foia_xml(filename: str) -> Dict:
|
101 |
+
"""Parse FOIA XML file and return key information"""
|
102 |
try:
|
103 |
tree = ET.parse(filename)
|
104 |
root = tree.getroot()
|
|
|
|
|
105 |
ns = {
|
106 |
'iepd': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/exchange/1.03',
|
107 |
'foia': 'http://leisp.usdoj.gov/niem/FoiaAnnualReport/extension/1.03',
|
108 |
'nc': 'http://niem.gov/niem/niem-core/2.0',
|
109 |
'j': 'http://niem.gov/niem/domains/jxdm/4.1'
|
110 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
+
org = root.find(".//nc:Organization", ns)
|
113 |
+
data = {
|
114 |
+
"name": org.find("nc:OrganizationName", ns).text if org is not None else "Unknown",
|
115 |
+
"abbr": org.find("nc:OrganizationAbbreviationText", ns).text if org is not None else "N/A",
|
116 |
+
"fiscal_year": root.find(".//foia:DocumentFiscalYearDate", ns).text if root.find(".//foia:DocumentFiscalYearDate", ns) is not None else "N/A",
|
117 |
+
"pending_start": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsPendingAtStartQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
|
118 |
+
"received": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsReceivedQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
|
119 |
+
"processed": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsProcessedQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
|
120 |
+
"pending_end": root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics/foia:ProcessingStatisticsPendingAtEndQuantity", ns).text if root.find(".//foia:ProcessedRequestSection/foia:ProcessingStatistics", ns) is not None else "N/A",
|
121 |
+
"full_grants": root.find(".//foia:RequestDispositionSection/foia:RequestDisposition/foia:RequestDispositionFullGrantQuantity", ns).text if root.find(".//foia:RequestDispositionSection/foia:RequestDisposition", ns) is not None else "N/A"
|
122 |
+
}
|
123 |
+
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
except ET.ParseError:
|
125 |
+
return {"name": "Error", "abbr": "N/A", "fiscal_year": "N/A", "pending_start": "N/A", "received": "N/A", "processed": "N/A", "pending_end": "N/A", "full_grants": "N/A"}
|
126 |
|
127 |
+
def list_and_display_xml_files():
|
128 |
+
xml_files = [f for f in os.listdir('.') if f.endswith('.xml')]
|
129 |
if not xml_files:
|
130 |
st.markdown("No XML files found in the directory. πβ")
|
131 |
+
return
|
132 |
+
|
133 |
+
for xml_file in xml_files:
|
134 |
+
file_name = xml_file[:-4] # Remove .xml extension
|
135 |
+
data = parse_foia_xml(xml_file)
|
136 |
+
with st.expander(f"π {file_name} - {data['name']} ({data['abbr']})"):
|
137 |
+
st.write(f"π
Fiscal Year: {data['fiscal_year']}")
|
138 |
+
st.write(f"β³ Pending at Start: {data['pending_start']}")
|
139 |
+
st.write(f"π₯ Received: {data['received']}")
|
140 |
+
st.write(f"β
Processed: {data['processed']}")
|
141 |
+
st.write(f"β³ Pending at End: {data['pending_end']}")
|
142 |
+
st.write(f"π Full Grants: {data['full_grants']}")
|
143 |
+
st.write(f"[Wikipedia - {data['name']}]({create_search_url_wikipedia(data['name'])})")
|
144 |
|
145 |
def search_foia_content(query: str, agency: str = None) -> Dict:
|
146 |
results = {
|
|
|
154 |
return results
|
155 |
|
156 |
def main():
|
157 |
+
# Load agencies from XML files
|
158 |
+
AGENCIES = load_agencies_from_xml()
|
159 |
+
|
160 |
st.title("Freedom of Information Act (FOIA) Explorer ππ")
|
161 |
|
162 |
+
st.write("""
|
163 |
+
The Freedom of Information Act (FOIA) empowers individuals by granting access to previously unreleased information and documents controlled by the United States government. Championing transparency and accountability, FOIA serves as a foundation for democratic engagement and open government initiatives. πβ¨
|
164 |
+
Below is a list of datasets available under FOIA, alongside guessed Wikipedia URLs for more information. ππ
|
165 |
+
""")
|
166 |
+
|
167 |
+
st.markdown("""
|
168 |
+
- [FOIA.Gov](https://www.foia.gov/foia-dataset-download.html)
|
169 |
+
- [Data.Gov](https://catalog.data.gov/dataset?tags=foia)
|
170 |
+
""")
|
171 |
+
|
172 |
+
# FOIA Datasets with Emojis
|
173 |
+
datasets = [
|
174 |
+
("Provider Taxonomy", "π©Ί"),
|
175 |
+
("Consumer Complaint Database", "π"),
|
176 |
+
("Medicare Provider Utilization and Payment Data", "π"),
|
177 |
+
("Global Terrorism Database", "π"),
|
178 |
+
("National Nutrient Database", "π"),
|
179 |
+
("Patent Grant Full Text Data", "π"),
|
180 |
+
("Toxic Release Inventory", "β£οΈ"),
|
181 |
+
("Residential Energy Consumption Survey", "π ")
|
182 |
+
]
|
183 |
+
|
184 |
+
st.markdown("### FOIA Datasets and Wikipedia URLs")
|
185 |
+
for dataset, emoji in datasets:
|
186 |
+
st.markdown(f"- {emoji} **{dataset}**: [Wikipedia]({create_search_url_wikipedia(dataset)})")
|
187 |
+
|
188 |
+
# Agency Browser
|
189 |
+
st.header("Agency Browser")
|
190 |
+
agency_names = sorted(list(AGENCIES.keys()))
|
191 |
+
selected_agency = st.selectbox("Select Agency", [""] + agency_names)
|
192 |
+
|
193 |
+
if selected_agency:
|
194 |
+
agency = Agency(AGENCIES[selected_agency])
|
195 |
+
st.subheader(f"{agency.name} Details")
|
196 |
+
st.write(f"Description: {agency.summary.get('description', 'N/A')}")
|
197 |
+
st.write(f"Abbreviation: {agency.summary.get('abbreviation', 'N/A')}")
|
198 |
+
if agency.website:
|
199 |
+
st.write(f"Website: [{agency.website}]({agency.website})")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
+
st.subheader("Contact Information")
|
202 |
+
contact_info = [
|
203 |
+
"\n".join(agency.address.get("address_lines", [])),
|
204 |
+
agency.address.get("street", ""),
|
205 |
+
f"{agency.address.get('city', '')}, {agency.address.get('state', '')} {agency.address.get('zip', '')}",
|
206 |
+
agency.service_center.get("phone", [""])[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
]
|
208 |
+
st.write("\n".join([line for line in contact_info if line]))
|
209 |
+
if agency.emails:
|
210 |
+
st.write(f"Email: [{agency.emails[0]}](mailto:{agency.emails[0]})")
|
211 |
+
|
212 |
+
if agency.hasRequestForm():
|
213 |
+
form_url = "https://foiaonline.regulations.gov/foia/action/public/request/createRequest" if agency.isFOIAonline() else agency.request_form
|
214 |
+
st.write(f"[Submit FOIA Request]({form_url})")
|
215 |
+
|
216 |
+
st.subheader("Median Processing Times")
|
217 |
+
if agency.request_time_stats:
|
218 |
+
for year, stats in agency.request_time_stats.items():
|
219 |
+
col1, col2 = st.columns(2)
|
220 |
+
with col1:
|
221 |
+
st.write(f"Year: {year}")
|
222 |
+
with col2:
|
223 |
+
for key, value in stats.items():
|
224 |
+
if "median" in key:
|
225 |
+
st.write(f"{key.replace('_median_days', '').title()}: {value} days")
|
226 |
+
|
227 |
+
# FOIA Document Search
|
228 |
+
st.subheader("Search FOIA Documents")
|
229 |
+
search_query = st.text_input("Enter search query")
|
230 |
+
if st.button("Search") and search_query:
|
231 |
+
with st.spinner("Searching..."):
|
232 |
+
results = search_foia_content(search_query, selected_agency)
|
233 |
+
st.write(f"Found {len(results['results'])} results for '{search_query}':")
|
234 |
+
for result in results["results"]:
|
235 |
+
st.write(f"- {result['title']} ({result['date']})")
|
236 |
+
|
237 |
+
# XML Files Display
|
238 |
+
st.header("FOIA XML Reports")
|
239 |
+
list_and_display_xml_files()
|
240 |
|
241 |
if __name__ == "__main__":
|
242 |
main()
|