Irfan Savji
Fix Gradio Dropdown parameter error
33f5253
import gradio as gr
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datasets import load_dataset
import pyarrow.parquet as pq
# Load the dataset using direct parquet file loading
print("Loading dataset...")
# Load directly from Hugging Face using pandas
train_dfs = []
test_dfs = []
# Base URL for the dataset files
base_url = "https://huggingface.co/datasets/irf23/canadian-parliamentary-expenditures/resolve/main/data"
# List of expected files based on the dataset description
print("Loading training data...")
for year in range(2021, 2025):
for quarter in range(1, 5):
if year == 2021 and quarter == 1:
continue # Data starts from 2021 Q2
try:
url = f"{base_url}/train/expenditures-{year}-q{quarter}.parquet"
df = pd.read_parquet(url)
train_dfs.append(df)
print(f"Loaded {year} Q{quarter} train data ({len(df)} records)")
except Exception as e:
print(f"Could not load {year} Q{quarter}: {e}")
# Load 2025 test data
print("\nLoading test data...")
for quarter in range(1, 5):
try:
url = f"{base_url}/test/expenditures-2025-q{quarter}.parquet"
df = pd.read_parquet(url)
test_dfs.append(df)
print(f"Loaded 2025 Q{quarter} test data ({len(df)} records)")
except Exception as e:
print(f"Could not load 2025 Q{quarter}: {e}")
# Combine all dataframes
if train_dfs and test_dfs:
expenditures_df = pd.concat(train_dfs + test_dfs, ignore_index=True)
elif train_dfs:
expenditures_df = pd.concat(train_dfs, ignore_index=True)
else:
# Create dummy data for testing
print("Creating dummy data for demonstration")
expenditures_df = pd.DataFrame({
'Id': ['1', '2', '3'],
'MemberId': ['m1', 'm2', 'm3'],
'MemberName': ['John Doe', 'Jane Smith', 'Bob Johnson'],
'Constituency': ['Riding A', 'Riding B', 'Riding C'],
'Party': ['Liberal', 'Conservative', 'NDP'],
'Category': ['Travel', 'Hospitality', 'Contract'],
'Amount': [1000.0, 2000.0, 1500.0],
'Description': ['Flight to Ottawa', 'Meeting expenses', 'Consulting'],
'Location': ['Toronto', 'Vancouver', 'Montreal'],
'Supplier': ['Air Canada', 'Hotel XYZ', 'Consultant ABC'],
'PeriodYear': [2024, 2024, 2024],
'PeriodQuarter': [1, 1, 2],
'DateIncurred': ['2024-01-15', '2024-02-20', '2024-04-10'],
'ClaimId': ['c1', 'c2', 'c3'],
'CreatedAt': ['2024-01-20', '2024-02-25', '2024-04-15'],
'UpdatedAt': ['2024-01-20', '2024-02-25', '2024-04-15']
})
# Convert column names to lowercase
expenditures_df.columns = expenditures_df.columns.str.lower()
# Convert data types
expenditures_df['dateincurred'] = pd.to_datetime(expenditures_df['dateincurred'], errors='coerce')
expenditures_df['amount'] = pd.to_numeric(expenditures_df['amount'], errors='coerce')
expenditures_df['periodyear'] = pd.to_numeric(expenditures_df['periodyear'], errors='coerce')
expenditures_df['periodquarter'] = pd.to_numeric(expenditures_df['periodquarter'], errors='coerce')
print(f"\nLoaded {len(expenditures_df)} total expenditure records")
print(f"Columns: {list(expenditures_df.columns)}")
def create_overview_plots(year_filter, party_filter, category_filter):
# Apply filters
filtered_df = expenditures_df.copy()
if year_filter:
filtered_df = filtered_df[filtered_df['periodyear'].isin(year_filter)]
if party_filter:
filtered_df = filtered_df[filtered_df['party'].isin(party_filter)]
if category_filter:
filtered_df = filtered_df[filtered_df['category'].isin(category_filter)]
# Calculate metrics
total_spending = filtered_df['amount'].sum()
num_records = len(filtered_df)
avg_expense = filtered_df['amount'].mean() if num_records > 0 else 0
num_members = filtered_df['memberid'].nunique()
metrics_text = f"""
### Key Metrics
- **Total Spending**: ${total_spending:,.2f}
- **Number of Records**: {num_records:,}
- **Average Expense**: ${avg_expense:,.2f}
- **Active Members**: {num_members}
"""
# Create spending by category pie chart
if len(filtered_df) > 0:
category_spending = filtered_df.groupby('category')['amount'].sum().reset_index()
fig_category = px.pie(
category_spending,
values='amount',
names='category',
title='Spending by Category'
)
else:
fig_category = px.pie(title='No data for selected filters')
# Create spending by party bar chart
if len(filtered_df) > 0:
party_spending = filtered_df.groupby('party')['amount'].sum().sort_values(ascending=False).reset_index()
fig_party = px.bar(
party_spending,
x='party',
y='amount',
title='Total Spending by Party',
labels={'amount': 'Total Amount ($)', 'party': 'Party'}
)
else:
fig_party = px.bar(title='No data for selected filters')
# Create quarterly trend line chart
if len(filtered_df) > 0:
quarterly = filtered_df.groupby(['periodyear', 'periodquarter'])['amount'].sum().reset_index()
quarterly['period'] = quarterly['periodyear'].astype(str) + '-Q' + quarterly['periodquarter'].astype(str)
fig_trend = px.line(
quarterly,
x='period',
y='amount',
title='Quarterly Spending Trend',
labels={'amount': 'Total Amount ($)', 'period': 'Period'},
markers=True
)
else:
fig_trend = px.line(title='No data for selected filters')
return metrics_text, fig_category, fig_party, fig_trend
def get_top_spenders(n_top, year_filter, party_filter, category_filter):
# Apply filters
filtered_df = expenditures_df.copy()
if year_filter:
filtered_df = filtered_df[filtered_df['periodyear'].isin(year_filter)]
if party_filter:
filtered_df = filtered_df[filtered_df['party'].isin(party_filter)]
if category_filter:
filtered_df = filtered_df[filtered_df['category'].isin(category_filter)]
if len(filtered_df) > 0:
# Get top spenders
top_spenders = filtered_df.groupby(['membername', 'party'])['amount'].sum().sort_values(ascending=False).head(n_top).reset_index()
fig = px.bar(
top_spenders,
x='amount',
y='membername',
color='party',
orientation='h',
title=f'Top {n_top} Spenders',
labels={'amount': 'Total Amount ($)', 'membername': 'Member'},
height=max(400, n_top * 25)
)
fig.update_layout(yaxis={'categoryorder': 'total ascending'})
else:
fig = px.bar(title='No data for selected filters')
return fig
def analyze_member(member_name):
if not member_name:
return "Please select a member", None
member_df = expenditures_df[expenditures_df['membername'] == member_name]
if member_df.empty:
return "No data found for this member", None
# Calculate metrics
total = member_df['amount'].sum()
count = len(member_df)
avg = member_df['amount'].mean()
party = member_df['party'].iloc[0]
info = f"""
### {member_name} ({party})
- **Total Expenses**: ${total:,.2f}
- **Number of Expenses**: {count:,}
- **Average Expense**: ${avg:,.2f}
"""
# Create category breakdown
category_breakdown = member_df.groupby('category')['amount'].sum().reset_index()
fig = px.pie(
category_breakdown,
values='amount',
names='category',
title=f'Expense Categories for {member_name}'
)
return info, fig
def search_expenses(member_search, min_amount, max_amount, category_filter):
filtered_df = expenditures_df.copy()
if member_search:
filtered_df = filtered_df[filtered_df['membername'].str.contains(member_search, case=False, na=False)]
filtered_df = filtered_df[(filtered_df['amount'] >= min_amount) & (filtered_df['amount'] <= max_amount)]
if category_filter and category_filter != "All":
filtered_df = filtered_df[filtered_df['category'] == category_filter]
# Get top 100 results
result = filtered_df.nlargest(100, 'amount')[['membername', 'party', 'category', 'amount', 'description', 'supplier', 'dateincurred']]
return result
# Get unique values for filters
years = sorted(expenditures_df['periodyear'].dropna().unique().tolist())
parties = sorted(expenditures_df['party'].dropna().unique().tolist())
categories = sorted(expenditures_df['category'].dropna().unique().tolist())
member_names = sorted(expenditures_df['membername'].dropna().unique().tolist())
# Create Gradio interface
with gr.Blocks(title="Canadian Parliamentary Expenditures", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🍁 Canadian Parliamentary Expenditures Explorer")
gr.Markdown("Explore spending data from the Canadian House of Commons (2021-2025)")
# Filters
with gr.Row():
year_filter = gr.CheckboxGroup(
choices=years,
value=years[-2:] if len(years) >= 2 else years,
label="Select Years"
)
party_filter = gr.CheckboxGroup(
choices=parties,
value=parties,
label="Select Parties"
)
category_filter = gr.CheckboxGroup(
choices=categories,
value=categories,
label="Select Categories"
)
# Overview Tab
with gr.Tab("Overview"):
overview_btn = gr.Button("Update Overview", variant="primary")
metrics_display = gr.Markdown()
with gr.Row():
category_plot = gr.Plot()
party_plot = gr.Plot()
trend_plot = gr.Plot()
overview_btn.click(
create_overview_plots,
inputs=[year_filter, party_filter, category_filter],
outputs=[metrics_display, category_plot, party_plot, trend_plot]
)
# Top Spenders Tab
with gr.Tab("Top Spenders"):
n_slider = gr.Slider(10, 50, value=20, step=5, label="Number of top spenders")
spenders_btn = gr.Button("Show Top Spenders", variant="primary")
spenders_plot = gr.Plot()
spenders_btn.click(
get_top_spenders,
inputs=[n_slider, year_filter, party_filter, category_filter],
outputs=spenders_plot
)
# Member Analysis Tab
with gr.Tab("Member Analysis"):
member_dropdown = gr.Dropdown(
choices=member_names,
label="Select a Member",
interactive=True
)
member_info = gr.Markdown()
member_plot = gr.Plot()
member_dropdown.change(
analyze_member,
inputs=member_dropdown,
outputs=[member_info, member_plot]
)
# Search Tab
with gr.Tab("Search Expenses"):
with gr.Row():
search_member = gr.Textbox(label="Member Name (partial match)", placeholder="e.g., Trudeau")
search_category = gr.Dropdown(
choices=["All"] + categories,
value="All",
label="Category"
)
with gr.Row():
min_amount_input = gr.Number(value=0, label="Minimum Amount ($)")
max_amount_input = gr.Number(value=1000000, label="Maximum Amount ($)")
search_btn = gr.Button("Search", variant="primary")
results_table = gr.Dataframe(
headers=["Member", "Party", "Category", "Amount", "Description", "Supplier", "Date"],
datatype=["str", "str", "str", "number", "str", "str", "str"]
)
search_btn.click(
search_expenses,
inputs=[search_member, min_amount_input, max_amount_input, search_category],
outputs=results_table
)
# Footer
gr.Markdown("""
---
**Data Source**: Canadian House of Commons
**Dataset**: [irf23/canadian-parliamentary-expenditures](https://huggingface.co/datasets/irf23/canadian-parliamentary-expenditures)
**License**: CC0-1.0 (Public Domain)
""")
# Load initial overview
demo.load(
create_overview_plots,
inputs=[year_filter, party_filter, category_filter],
outputs=[metrics_display, category_plot, party_plot, trend_plot]
)
if __name__ == "__main__":
demo.launch()