Spaces:

Chloecky
/

IS445_FP2

Sleeping

File size: 10,104 Bytes

2b51b88

import numpy as np
import pandas as pd
import altair as alt
import streamlit as st

@st.cache_data
def load_data():
    df =  pd.read_csv("https://huggingface.co/datasets/Chloecky/traffic_crashes_chicago/resolve/main/Traffic_Crashes_-_Crashes_20250420.csv")
    df = df.dropna(subset=['LATITUDE', 'LONGITUDE'])
    return df

st.set_page_config(layout="wide")

st.title('Streamlit App for IS445 FP2')
st.text('Group 8: Keyu (Chloe) Cai, Yutong Zheng')

traffic = load_data()

# Transform date column
traffic['CRASH_DATE'] = pd.to_datetime(traffic['CRASH_DATE'])

# Raw dataset already has 'Hour' and 'Month'
traffic['YEAR'] = traffic['CRASH_DATE'].dt.year
traffic['DY']   = traffic['CRASH_DATE'].dt.day

traffic_analysis = traffic.loc[:, ~traffic.columns.isin(['CRASH_RECORD_ID', 'CRASH_DATE_EST_I',
                                                        'REPORT_TYPE', 'INTERSECTION_RELATED_I', 'NOT_RIGHT_OF_WAY_I',
                                                        'HIT_AND_RUN_I', 'PHOTOS_TAKEN_I', 'STATEMENTS_TAKEN_I',
                                                        'DOORING_I', 'WORK_ZONE_I', 'WORK_ZONE_TYPE', 'WORKERS_PRESENT_I',
                                                        'LOCATION'])]
# Select years with complete records
traffic_analysis = traffic_analysis[(traffic_analysis['YEAR'].isin(range(2018, 2025))) & (traffic_analysis['INJURIES_TOTAL'] > 0)].copy()
traffic_analysis = traffic_analysis[(traffic_analysis['LONGITUDE'] != 0) & (traffic_analysis['LATITUDE'] != 0)].copy()

# Add Weekday/Weekend label for each record
traffic_analysis['DAY_TYPE'] = traffic_analysis['CRASH_DAY_OF_WEEK'].apply(lambda x: 'Weekend' if x in [1, 7] else 'Weekday')

# Driver plot: Heatmap of Injuries Total by Location in City of Chicago
alt.data_transformers.disable_max_rows()
selection = alt.selection_interval(encodings=['x','y'])
chart = alt.Chart(traffic_analysis).mark_rect().encode(
    x=alt.X('LONGITUDE:Q', bin=alt.Bin(maxbins=20), title='Longitude (°)'),
    y=alt.Y('LATITUDE:Q', bin=alt.Bin(maxbins=20), title='Latitude (°)'),
    color=alt.Color('sum(INJURIES_TOTAL):Q', 
                    scale=alt.Scale(scheme='blues'), 
                    title='Injuries Total',
                    legend=alt.Legend(orient='left', offset=20, titlePadding=15)),
    tooltip=[
        alt.Tooltip('count()', title='Crash Count'),
        alt.Tooltip('sum(INJURIES_TOTAL):Q', title='Injuries Total'),
        alt.Tooltip('LATITUDE:Q', bin=True, title='Latitude bin'),
        alt.Tooltip('LONGITUDE:Q', bin=True, title='Longitude bin')
    ]
).add_params(
    selection
).properties(
    width=300,
    height=300,
    title='Heatmap of Injuries Total by Location in City of Chicago'
    # title=alt.TitleParams(
    # text='Heatmap of Injuries Total by Location in City of Chicago',
    # anchor='middle'  # <<< 关键在这里，anchor设成'middle'就是居中！
# )
)

# Driven plot 1: Hourly Distribution of Injury-Related Crashes: Weekday vs Weekend
line = alt.Chart(traffic_analysis).transform_filter(
    selection
).transform_aggregate(
    crash_count='count()',
    groupby=['CRASH_HOUR', 'DAY_TYPE'] 
).transform_calculate(
    adjusted_count="datum.DAY_TYPE == 'Weekday' ? datum.crash_count / 5 : datum.crash_count / 2" 
).mark_line(point=True).encode(
    x=alt.X('CRASH_HOUR:O', title='Hour of Day'),
    y=alt.Y('adjusted_count:Q', title='Average Number of Injury-Related Crashes'),
    color=alt.Color('DAY_TYPE:N', legend=alt.Legend(title='Day Type', titlePadding=15)),
    tooltip=[
        alt.Tooltip('CRASH_HOUR:O', title='Hour of Day'),
        alt.Tooltip('DAY_TYPE:N', title='Day Type'),
        alt.Tooltip('adjusted_count:Q', title='Average Count', format=',d')
    ]
).properties(
    width=300,
    height=300,
    title='Hourly Distribution of Injury-Related Crashes: Weekday vs Weekend'
)

# line = alt.Chart(traffic_analysis).mark_line(point=True).encode(
#     x=alt.X('CRASH_HOUR:O', title='Hour of Day'),
#     y=alt.Y('count()', title='Number of Injury-Related Crashes'),
#     color=alt.Color('DAY_TYPE:N', legend=alt.Legend(title='Day Type'))
# ).transform_filter(
#     selection
# ).properties(
#     width=300,
#     height=300,
#     title='Hourly Distribution of Injury-Related Crashes: Weekday vs Weekend'
# )


# Driven plot 2: Fatal Injury Rate of Different Lighting Conditions
bar1 = alt.Chart(traffic_analysis).mark_bar().encode(
    x=alt.X('LIGHTING_CONDITION:N', sort='-y', title='Lighting Condition'),
    y=alt.Y('mean(INJURIES_FATAL):Q', scale=alt.Scale(domainMin=0), axis=alt.Axis(format='%'), title='Fatal Injury Rate'),
    color=alt.Color('LIGHTING_CONDITION:N', 
                    scale=alt.Scale(
                        domain=['DARKNESS', 'DARKNESS, LIGHTED ROAD', 'DAWN', 'DUSK', 'DAYLIGHT', 'UNKNOWN'],
                        range=['#084C88', '#2A6FB6', '#4FA3D9', '#7EC8E3', '#BFEFFF', '#E0F7FA']
                    ),
                    legend=alt.Legend(orient='left', title='Lighting Condition', titlePadding=15)),
    tooltip=[
        alt.Tooltip('LIGHTING_CONDITION:N', title='Lighting Condition'),
        alt.Tooltip('mean(INJURIES_FATAL):Q', title='Fatal Injury Rate', format='.2f')
    ]
).transform_filter(
    selection
).properties(
    width=300,
    height=300,
    title='Fatal Injury Rate of Different Lighting Conditions'
)


# bar2 = alt.Chart(traffic_analysis).mark_bar().encode(
#     x=alt.X('WEATHER_CONDITION:N'),
#     y=alt.Y('mean(INJURIES_FATAL):Q'),
#     color=alt.Color('WEATHER_CONDITION:N', 
#                     legend=alt.Legend(orient='right'))
# ).transform_filter(
#     selection
# ).properties(
#     width=400,
#     height=400
# )

# Driven plot 3: Trends in Crash Damage Costs by Year (2018–2024)
grouped_bar = alt.Chart(traffic_analysis).mark_bar().encode(
    x=alt.X('YEAR:O', title='Year'),
    y=alt.Y('count()', title='Count'),
    color=alt.Color('DAMAGE:N', 
                    scale=alt.Scale(
                    domain=['$500 OR LESS', '$501 - $1,500', 'OVER $1,500'],
                    range=['#AEDFF7', '#4FA3D9', '#084C88']
                    ),
                    title='Damage Level', legend=alt.Legend(title='Damage Level', titlePadding=15)),
    xOffset='DAMAGE:N',
    tooltip=[
        alt.Tooltip('YEAR:O', title='Year'),
        alt.Tooltip('DAMAGE:N', title='Damage Level'),
        alt.Tooltip('count()', title='Count')
    ]
).transform_filter(
    selection
).properties(
    width=300,
    height=300,
    title='Annual Distribution of Crash Damage Levels (2018–2024)'
)

top_row = chart|line
bottom_row = (bar1|grouped_bar).resolve_scale(color='independent')
final_chart = top_row & bottom_row

# top_row = alt.hconcat(chart, line).resolve_scale(color='independent')
# bottom_row = alt.hconcat(bar1, grouped_bar).resolve_scale(color='independent')

# final_chart = alt.vconcat(top_row, bottom_row)

st.altair_chart(final_chart, use_container_width=False)

st.markdown('''
            ### Dashboard Overview and Guidance
            This dashboard presents an analysis based on the City of Chicago's injury-related crash data from 2018 to 2024. The original dataset (https://data.cityofchicago.org/Transportation/Traffic-Crashes-Crashes/85ca-t3if/data_preview) includes detailed information about crashes that resulted in injuries within the city during this time period.  
            
            The top-left figure displays a colored map (heatmap) showing the geographic distribution of total injuries from crashes, with darker areas indicating places where more people were injured. This heatmap serves as the driver plot of the dashboard. Users can click and drag to select a specific region of interest, and the three driven plots will automatically update to reflect data from the selected area.  
            
            The first driven plot (top right) shows the distribution of injury-related crashes across 24 hours, comparing patterns between weekdays and weekends. This chart reflects the number of crashes that caused injuries (not the total number of injuries like in the heatmap).   
            
            The second driven plot (bottom left) illustrates how different lighting conditions (such as daylight, dusk, or darkness) are associated with variations in fatal injury rates. The lighting conditions are sorted from highest to lowest fatal injury rate.  
            
            The third driven plot (bottom right) depicts the trend of crash counts across different damage cost levels from 2018 to 2024. The bar heights represent the number of crashes falling into each damage category for each year.
            
            Overall, this dashboard highlights key insights from the crash dataset in terms of time, environment, and damage severity. It is designed to help anyone interested in traffic crash data better understand the dataset, and it may also offer valuable guidance for city planners or traffic management officials seeking to improve road safety.
            ''')

st.markdown('''
    #### Contexual Dataset
    We have found a contextual dataset, which can be accessed at https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Community-Areas/igwz-8jzy. This dataset shows the boundaries of the 77 community areas in Chicago. Adding this dataset to our project will help us group traffic accidents by these areas instead of just using latitude and longitude. This makes it easier for people to understand where accidents happen more often and helps tell a clearer story about which neighborhoods have more traffic safety issues.
''')

st.markdown('''
            #### Hosting Datasets
            We would continue our plan for hosting original dataset on HuggingFace like Part 1 (https://huggingface.co/datasets/Chloecky/traffic_crashes_chicago/resolve/main/Traffic_Crashes_-_Crashes_20250420.csv). To ensure consistency across all datasets, we also decided to host the contextual dataset on Hugging Face as well (https://huggingface.co/datasets/Chloecky/traffic_crashes_chicago/resolve/main/Boundaries_-_Community_Areas_20250424.csv).
            ''')