Spaces:
Sleeping
Sleeping
Irfan Savji
commited on
Commit
Β·
4b5e136
1
Parent(s):
bb9b2a5
Add Canadian Parliamentary Expenditures Explorer app
Browse filesThis Gradio app provides an interactive dashboard to explore and analyze
Canadian House of Commons expenditure data from 2021-2025, with filters
for year, party, and category, plus visualizations and search capabilities.
- README.md +24 -5
- app.py +270 -0
- requirements.txt +4 -0
README.md
CHANGED
@@ -1,12 +1,31 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.39.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Canadian Parliamentary Expenditures Explorer
|
3 |
+
emoji: π
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: yellow
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.39.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
license: mit
|
11 |
---
|
12 |
|
13 |
+
# Canadian Parliamentary Expenditures Explorer
|
14 |
+
|
15 |
+
An interactive dashboard for exploring Canadian House of Commons expenditure data.
|
16 |
+
|
17 |
+
## Features
|
18 |
+
|
19 |
+
- π Interactive visualizations of parliamentary spending patterns
|
20 |
+
- π Filter by year, party, and expense category
|
21 |
+
- π₯ Analyze spending by individual members of parliament
|
22 |
+
- π View spending trends over time
|
23 |
+
- π― Search and explore specific member expenses
|
24 |
+
|
25 |
+
## Dataset
|
26 |
+
|
27 |
+
This application uses the [Canadian Parliamentary Expenditures dataset](https://huggingface.co/datasets/irf23/canadian-parliamentary-expenditures), which contains:
|
28 |
+
- 1.2+ million expenditure records
|
29 |
+
- Data from 2021 Q2 to 2025 Q4
|
30 |
+
- 450 parliament members
|
31 |
+
- Categories: Travel, Hospitality, Contract, and Other
|
app.py
ADDED
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import plotly.express as px
|
4 |
+
import plotly.graph_objects as go
|
5 |
+
from datasets import load_dataset
|
6 |
+
|
7 |
+
# Load the dataset
|
8 |
+
print("Loading dataset...")
|
9 |
+
dataset = load_dataset("irf23/canadian-parliamentary-expenditures")
|
10 |
+
|
11 |
+
# Convert to pandas DataFrames
|
12 |
+
expenditures_df = dataset['expenditures'].to_pandas()
|
13 |
+
members_df = dataset['members'].to_pandas()
|
14 |
+
|
15 |
+
# Convert date columns
|
16 |
+
expenditures_df['date_incurred'] = pd.to_datetime(expenditures_df['date_incurred'])
|
17 |
+
expenditures_df['amount'] = pd.to_numeric(expenditures_df['amount'])
|
18 |
+
|
19 |
+
print(f"Loaded {len(expenditures_df)} expenditure records")
|
20 |
+
|
21 |
+
def create_overview_plots(year_filter, party_filter, category_filter):
|
22 |
+
# Apply filters
|
23 |
+
filtered_df = expenditures_df.copy()
|
24 |
+
if year_filter:
|
25 |
+
filtered_df = filtered_df[filtered_df['period_year'].isin(year_filter)]
|
26 |
+
if party_filter:
|
27 |
+
filtered_df = filtered_df[filtered_df['party'].isin(party_filter)]
|
28 |
+
if category_filter:
|
29 |
+
filtered_df = filtered_df[filtered_df['category'].isin(category_filter)]
|
30 |
+
|
31 |
+
# Calculate metrics
|
32 |
+
total_spending = filtered_df['amount'].sum()
|
33 |
+
num_records = len(filtered_df)
|
34 |
+
avg_expense = filtered_df['amount'].mean()
|
35 |
+
num_members = filtered_df['member_id'].nunique()
|
36 |
+
|
37 |
+
metrics_text = f"""
|
38 |
+
### Key Metrics
|
39 |
+
- **Total Spending**: ${total_spending:,.2f}
|
40 |
+
- **Number of Records**: {num_records:,}
|
41 |
+
- **Average Expense**: ${avg_expense:,.2f}
|
42 |
+
- **Active Members**: {num_members}
|
43 |
+
"""
|
44 |
+
|
45 |
+
# Create spending by category pie chart
|
46 |
+
category_spending = filtered_df.groupby('category')['amount'].sum().reset_index()
|
47 |
+
fig_category = px.pie(
|
48 |
+
category_spending,
|
49 |
+
values='amount',
|
50 |
+
names='category',
|
51 |
+
title='Spending by Category'
|
52 |
+
)
|
53 |
+
|
54 |
+
# Create spending by party bar chart
|
55 |
+
party_spending = filtered_df.groupby('party')['amount'].sum().sort_values(ascending=False).reset_index()
|
56 |
+
fig_party = px.bar(
|
57 |
+
party_spending,
|
58 |
+
x='party',
|
59 |
+
y='amount',
|
60 |
+
title='Total Spending by Party',
|
61 |
+
labels={'amount': 'Total Amount ($)', 'party': 'Party'}
|
62 |
+
)
|
63 |
+
|
64 |
+
# Create quarterly trend line chart
|
65 |
+
quarterly = filtered_df.groupby(['period_year', 'period_quarter'])['amount'].sum().reset_index()
|
66 |
+
quarterly['period'] = quarterly['period_year'].astype(str) + '-Q' + quarterly['period_quarter'].astype(str)
|
67 |
+
fig_trend = px.line(
|
68 |
+
quarterly,
|
69 |
+
x='period',
|
70 |
+
y='amount',
|
71 |
+
title='Quarterly Spending Trend',
|
72 |
+
labels={'amount': 'Total Amount ($)', 'period': 'Period'},
|
73 |
+
markers=True
|
74 |
+
)
|
75 |
+
|
76 |
+
return metrics_text, fig_category, fig_party, fig_trend
|
77 |
+
|
78 |
+
def get_top_spenders(n_top, year_filter, party_filter, category_filter):
|
79 |
+
# Apply filters
|
80 |
+
filtered_df = expenditures_df.copy()
|
81 |
+
if year_filter:
|
82 |
+
filtered_df = filtered_df[filtered_df['period_year'].isin(year_filter)]
|
83 |
+
if party_filter:
|
84 |
+
filtered_df = filtered_df[filtered_df['party'].isin(party_filter)]
|
85 |
+
if category_filter:
|
86 |
+
filtered_df = filtered_df[filtered_df['category'].isin(category_filter)]
|
87 |
+
|
88 |
+
# Get top spenders
|
89 |
+
top_spenders = filtered_df.groupby(['member_name', 'party'])['amount'].sum().sort_values(ascending=False).head(n_top).reset_index()
|
90 |
+
|
91 |
+
fig = px.bar(
|
92 |
+
top_spenders,
|
93 |
+
x='amount',
|
94 |
+
y='member_name',
|
95 |
+
color='party',
|
96 |
+
orientation='h',
|
97 |
+
title=f'Top {n_top} Spenders',
|
98 |
+
labels={'amount': 'Total Amount ($)', 'member_name': 'Member'},
|
99 |
+
height=max(400, n_top * 25)
|
100 |
+
)
|
101 |
+
fig.update_layout(yaxis={'categoryorder': 'total ascending'})
|
102 |
+
|
103 |
+
return fig
|
104 |
+
|
105 |
+
def analyze_member(member_name):
|
106 |
+
if not member_name:
|
107 |
+
return "Please select a member", None
|
108 |
+
|
109 |
+
member_df = expenditures_df[expenditures_df['member_name'] == member_name]
|
110 |
+
|
111 |
+
if member_df.empty:
|
112 |
+
return "No data found for this member", None
|
113 |
+
|
114 |
+
# Calculate metrics
|
115 |
+
total = member_df['amount'].sum()
|
116 |
+
count = len(member_df)
|
117 |
+
avg = member_df['amount'].mean()
|
118 |
+
party = member_df['party'].iloc[0]
|
119 |
+
|
120 |
+
info = f"""
|
121 |
+
### {member_name} ({party})
|
122 |
+
- **Total Expenses**: ${total:,.2f}
|
123 |
+
- **Number of Expenses**: {count:,}
|
124 |
+
- **Average Expense**: ${avg:,.2f}
|
125 |
+
"""
|
126 |
+
|
127 |
+
# Create category breakdown
|
128 |
+
category_breakdown = member_df.groupby('category')['amount'].sum().reset_index()
|
129 |
+
fig = px.pie(
|
130 |
+
category_breakdown,
|
131 |
+
values='amount',
|
132 |
+
names='category',
|
133 |
+
title=f'Expense Categories for {member_name}'
|
134 |
+
)
|
135 |
+
|
136 |
+
return info, fig
|
137 |
+
|
138 |
+
def search_expenses(member_search, min_amount, max_amount, category_filter):
|
139 |
+
filtered_df = expenditures_df.copy()
|
140 |
+
|
141 |
+
if member_search:
|
142 |
+
filtered_df = filtered_df[filtered_df['member_name'].str.contains(member_search, case=False, na=False)]
|
143 |
+
|
144 |
+
filtered_df = filtered_df[(filtered_df['amount'] >= min_amount) & (filtered_df['amount'] <= max_amount)]
|
145 |
+
|
146 |
+
if category_filter and category_filter != "All":
|
147 |
+
filtered_df = filtered_df[filtered_df['category'] == category_filter]
|
148 |
+
|
149 |
+
# Get top 100 results
|
150 |
+
result = filtered_df.nlargest(100, 'amount')[['member_name', 'party', 'category', 'amount', 'description', 'supplier', 'date_incurred']]
|
151 |
+
|
152 |
+
return result
|
153 |
+
|
154 |
+
# Get unique values for filters
|
155 |
+
years = sorted(expenditures_df['period_year'].unique().tolist())
|
156 |
+
parties = sorted(expenditures_df['party'].unique().tolist())
|
157 |
+
categories = sorted(expenditures_df['category'].unique().tolist())
|
158 |
+
member_names = sorted(expenditures_df['member_name'].unique().tolist())
|
159 |
+
|
160 |
+
# Create Gradio interface
|
161 |
+
with gr.Blocks(title="Canadian Parliamentary Expenditures", theme=gr.themes.Soft()) as demo:
|
162 |
+
gr.Markdown("# π Canadian Parliamentary Expenditures Explorer")
|
163 |
+
gr.Markdown("Explore spending data from the Canadian House of Commons (2021-2025)")
|
164 |
+
|
165 |
+
# Filters
|
166 |
+
with gr.Row():
|
167 |
+
year_filter = gr.CheckboxGroup(
|
168 |
+
choices=years,
|
169 |
+
value=years[-2:],
|
170 |
+
label="Select Years"
|
171 |
+
)
|
172 |
+
party_filter = gr.CheckboxGroup(
|
173 |
+
choices=parties,
|
174 |
+
value=parties,
|
175 |
+
label="Select Parties"
|
176 |
+
)
|
177 |
+
category_filter = gr.CheckboxGroup(
|
178 |
+
choices=categories,
|
179 |
+
value=categories,
|
180 |
+
label="Select Categories"
|
181 |
+
)
|
182 |
+
|
183 |
+
# Overview Tab
|
184 |
+
with gr.Tab("Overview"):
|
185 |
+
overview_btn = gr.Button("Update Overview", variant="primary")
|
186 |
+
metrics_display = gr.Markdown()
|
187 |
+
|
188 |
+
with gr.Row():
|
189 |
+
category_plot = gr.Plot()
|
190 |
+
party_plot = gr.Plot()
|
191 |
+
|
192 |
+
trend_plot = gr.Plot()
|
193 |
+
|
194 |
+
overview_btn.click(
|
195 |
+
create_overview_plots,
|
196 |
+
inputs=[year_filter, party_filter, category_filter],
|
197 |
+
outputs=[metrics_display, category_plot, party_plot, trend_plot]
|
198 |
+
)
|
199 |
+
|
200 |
+
# Top Spenders Tab
|
201 |
+
with gr.Tab("Top Spenders"):
|
202 |
+
n_slider = gr.Slider(10, 50, value=20, step=5, label="Number of top spenders")
|
203 |
+
spenders_btn = gr.Button("Show Top Spenders", variant="primary")
|
204 |
+
spenders_plot = gr.Plot()
|
205 |
+
|
206 |
+
spenders_btn.click(
|
207 |
+
get_top_spenders,
|
208 |
+
inputs=[n_slider, year_filter, party_filter, category_filter],
|
209 |
+
outputs=spenders_plot
|
210 |
+
)
|
211 |
+
|
212 |
+
# Member Analysis Tab
|
213 |
+
with gr.Tab("Member Analysis"):
|
214 |
+
member_dropdown = gr.Dropdown(
|
215 |
+
choices=member_names,
|
216 |
+
label="Select a Member",
|
217 |
+
searchable=True
|
218 |
+
)
|
219 |
+
member_info = gr.Markdown()
|
220 |
+
member_plot = gr.Plot()
|
221 |
+
|
222 |
+
member_dropdown.change(
|
223 |
+
analyze_member,
|
224 |
+
inputs=member_dropdown,
|
225 |
+
outputs=[member_info, member_plot]
|
226 |
+
)
|
227 |
+
|
228 |
+
# Search Tab
|
229 |
+
with gr.Tab("Search Expenses"):
|
230 |
+
with gr.Row():
|
231 |
+
search_member = gr.Textbox(label="Member Name (partial match)", placeholder="e.g., Trudeau")
|
232 |
+
search_category = gr.Dropdown(
|
233 |
+
choices=["All"] + categories,
|
234 |
+
value="All",
|
235 |
+
label="Category"
|
236 |
+
)
|
237 |
+
|
238 |
+
with gr.Row():
|
239 |
+
min_amount_input = gr.Number(value=0, label="Minimum Amount ($)")
|
240 |
+
max_amount_input = gr.Number(value=1000000, label="Maximum Amount ($)")
|
241 |
+
|
242 |
+
search_btn = gr.Button("Search", variant="primary")
|
243 |
+
results_table = gr.Dataframe(
|
244 |
+
headers=["Member", "Party", "Category", "Amount", "Description", "Supplier", "Date"],
|
245 |
+
datatype=["str", "str", "str", "number", "str", "str", "str"]
|
246 |
+
)
|
247 |
+
|
248 |
+
search_btn.click(
|
249 |
+
search_expenses,
|
250 |
+
inputs=[search_member, min_amount_input, max_amount_input, search_category],
|
251 |
+
outputs=results_table
|
252 |
+
)
|
253 |
+
|
254 |
+
# Footer
|
255 |
+
gr.Markdown("""
|
256 |
+
---
|
257 |
+
**Data Source**: Canadian House of Commons
|
258 |
+
**Dataset**: [irf23/canadian-parliamentary-expenditures](https://huggingface.co/datasets/irf23/canadian-parliamentary-expenditures)
|
259 |
+
**License**: CC0-1.0 (Public Domain)
|
260 |
+
""")
|
261 |
+
|
262 |
+
# Load initial overview
|
263 |
+
demo.load(
|
264 |
+
create_overview_plots,
|
265 |
+
inputs=[year_filter, party_filter, category_filter],
|
266 |
+
outputs=[metrics_display, category_plot, party_plot, trend_plot]
|
267 |
+
)
|
268 |
+
|
269 |
+
if __name__ == "__main__":
|
270 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas==2.2.0
|
2 |
+
plotly==5.19.0
|
3 |
+
datasets==2.17.0
|
4 |
+
pyarrow>=10.0.0
|