VizLib-Altair / app.py
awacke1's picture
Update app.py
80f3dd4
import streamlit as st
import pandas as pd
import altair as alt
# Define list of largest hospitals with latitude and longitude
largest_hospitals = [
{
'name': 'Florida Hospital Orlando',
'city': 'Orlando',
'state': 'FL',
'zip_code': '32803',
'bed_count': 2411,
'lat': 28.562229,
'lng': -81.362976
},
{
'name': 'Cleveland Clinic',
'city': 'Cleveland',
'state': 'OH',
'zip_code': '44195',
'bed_count': 1730,
'lat': 41.501669,
'lng': -81.621275
},
{
'name': 'Mayo Clinic',
'city': 'Rochester',
'state': 'MN',
'zip_code': '55905',
'bed_count': 1372,
'lat': 44.020634,
'lng': -92.463476
},
{
'name': 'NewYork-Presbyterian Hospital-Columbia and Cornell',
'city': 'New York',
'state': 'NY',
'zip_code': '10032',
'bed_count': 2332,
'lat': 40.840886,
'lng': -73.942184
},
{
'name': 'UCHealth University of Colorado Hospital',
'city': 'Aurora',
'state': 'CO',
'zip_code': '80045',
'bed_count': 672,
'lat': 39.742401,
'lng': -104.834694
},
{
'name': 'Houston Methodist Hospital',
'city': 'Houston',
'state': 'TX',
'zip_code': '77030',
'bed_count': 1063,
'lat': 29.710292,
'lng': -95.399262
},
{
'name': 'Johns Hopkins Hospital',
'city': 'Baltimore',
'state': 'MD',
'zip_code': '21287',
'bed_count': 1293,
'lat': 39.297082,
'lng': -76.590726
},
{
'name': 'Massachusetts General Hospital',
'city': 'Boston',
'state': 'MA',
'zip_code': '02114',
'bed_count': 1032,
'lat': 42.363371,
'lng': -71.068635
},
{
'name': 'University of Michigan Hospitals-Michigan Medicine',
'city': 'Ann Arbor',
'state': 'MI',
'zip_code': '48109',
'bed_count': 1145,
'lat': 42.282531,
'lng': -83.728376
},
{
'name': 'Mount Sinai Hospital',
'city': 'New York',
'state': 'NY',
'zip_code': '10029',
'bed_count': 1168,
'lat': 40.789866,
'lng': -73.952348
}
]
largest_hospitals_df = pd.DataFrame(largest_hospitals)
# Define chart functions
def stacked_bar_chart():
chart = alt.Chart(largest_hospitals_df).mark_bar().encode(
y=alt.Y('state:N', sort='-x'),
x=alt.X('bed_count:Q', stack='normalize'),
color=alt.Color('state:N'),
tooltip=['state', 'bed_count']
).properties(
width=700,
height=500,
title='Largest Hospitals by State (Stacked Bar Chart)'
)
st.altair_chart(chart)
def bump_chart():
chart = alt.Chart(largest_hospitals_df).transform_joinaggregate(
max_bed_count='max(bed_count)',
).transform_window(
rank='rank(max_bed_count)',
sort=[alt.SortField('max_bed_count', order='descending')]
).transform_filter(
alt.datum.rank <= 10
).mark_line().encode(
y=alt.Y('name:N', sort=alt.EncodingSortField('bed_count', order='descending')),
x=alt.X('bed_count:Q'),
color=alt.Color('state:N'),
tooltip=['name', 'bed_count', 'state']
).properties(
width=700,
height=500,
title='Largest Hospitals by Bed Count (Bump Chart)'
)
st.altair_chart(chart)
def radial_chart():
chart = alt.Chart(largest_hospitals_df).mark_circle().encode(
x='sum(bed_count)',
y='state',
size='sum(bed_count)',
color='state',
tooltip=['state', 'bed_count']
).properties(
width=700,
height=500,
title='Largest Hospitals by State (Radial Chart)'
)
st.altair_chart(chart)
def trellis_area_sort_chart():
chart = alt.Chart(largest_hospitals_df).mark_area().encode(
x=alt.X('year:O', title='Year'),
y=alt.Y('bed_count:Q', title='Bed Count'),
color=alt.Color('state:N', legend=alt.Legend(title='State')),
row=alt.Row('name:N', sort='-x', title='Hospital')
).transform_calculate(
year='substring(zip_code, 0, 2) + "00"'
).properties(
width=700,
height=500,
title='Largest Hospitals by Year (Trellis Area Sort Chart)'
)
st.altair_chart(chart)
def wind_vector_map():
airports_df = pd.read_csv('https://raw.githubusercontent.com/hvo/datasets/master/nyc_airports.csv')
airports = alt.Chart(airports_df).mark_circle(size=100).encode(
longitude='lon:Q',
latitude='lat:Q',
tooltip=['name', 'city', 'state']
)
wind_df = pd.read_csv('https://raw.githubusercontent.com/vega/vega/master/docs/data/wind.csv')
wind = alt.Chart(wind_df).mark_line().encode(
x='u:Q',
y='v:Q',
color=alt.Color('speed:Q', scale=alt.Scale(scheme='inferno')),
size='speed:Q',
tooltip=['u', 'v', 'speed']
)
chart = alt.layer(
alt.themes.dark(),
alt.repeat(row=range(4), column=range(4), layer=0, data=largest_hospitals_df),
airports,
wind.transform_filter(
alt.datum.zip_code == str(largest_hospitals_df.iloc[0]['zip_code'])
).transform_calculate(
azimuth='atan2(v, u)',
speed='sqrt(u * u + v * v)',
dx='cos(azimuth * PI/180) * speed',
dy='sin(azimuth * PI/180) * speed'
).mark_arrow().encode(
longitude='lng:Q',
latitude='lat:Q',
angle='azimuth:Q',
size=alt.Size('speed:Q', scale=alt.Scale(range=[0, 50])),
tooltip=['u', 'v', 'speed', 'azimuth']
)
).properties(
width=700,
height=500,
title='Wind Vectors and Airports in New York City'
)
st.altair_chart(chart)
def table_bubble_plot():
chart = alt.Chart(largest_hospitals_df).mark_circle().encode(
x=alt.X('bed_count:Q', title='Bed Count'),
y=alt.Y('state:N', sort='-x', title='State'),
size=alt.Size('bed_count:Q', title='Bed Count'),
color=alt.Color('state:N'),
tooltip=['name', 'bed_count', 'city', 'state']
).properties(
width=700,
height=500,
title='Largest Hospitals in the US (Table Bubble Plot)'
)
st.altair_chart(chart)
def locations_of_us_airports():
airports_df = pd.read_csv('https://raw.githubusercontent.com/hvo/datasets/master/nyc_airports.csv')
chart = alt.Chart(airports_df).mark_circle(size=100).encode(
longitude='lon:Q',
latitude='lat:Q',
tooltip=['name', 'city', 'state']
).properties(
width=700,
height=500,
title='Locations of US Airports'
)
st.altair_chart(chart)
def connections_among_us_airports_interactive():
airports_df = pd.read_csv('https://raw.githubusercontent.com/hvo/datasets/master/nyc_airports.csv')
routes_df = pd.read_csv('https://raw.githubusercontent.com/vega/vega/master/docs/data/flights-2k.csv')
source = alt.selection_multi(fields=['origin'])
chart = alt.Chart(routes_df).mark_geoshape(stroke='white', strokeWidth=0.5).encode(
color=alt.condition(source, alt.value('red'), alt.value('lightgray')),
tooltip=['origin', 'destination', 'count']
).transform_lookup(
lookup='id',
from_=alt.LookupData(airports_df, 'id', ['name', 'city', 'state'])
).project(
type='albersUsa'
).properties(
width=700,
height=500,
title='Connections Among US Airports (Interactive)'
)
points = alt.Chart(airports_df).mark_circle(size=100).encode(
longitude='lon:Q',
latitude='lat:Q',
tooltip=['name', 'city', 'state', 'id']
).transform_filter(
source
)
st.altair_chart(chart + points)
def one_dot_per_zipcode():
chart = alt.Chart(largest_hospitals_df).mark_circle(size=50).encode(
longitude='lng:Q',
latitude='lat:Q',
color=alt.Color('state:N', scale=alt.Scale(scheme='category10')),
tooltip=['name', 'city', 'state', 'bed_count']
).properties(
width=700,
height=500,
title='Largest Hospitals in the US (One Dot per Zip Code)'
)
st.altair_chart(chart)
def isotype_visualization_with_emoji():
chart = alt.Chart(largest_hospitals_df).mark_image(width=50, height=50).encode(
x=alt.X('bed_count:Q', axis=None),
y=alt.Y('state:N', sort='-x', axis=None),
url='https://raw.githubusercontent.com/twitter/twemoji/v13.0.1/assets/svg/1f628.svg',
tooltip=['name', 'bed_count']
).properties(
width=700,
height=500,
title='Largest Hospitals in the US (Isotype Visualization with Emoji)'
)
st.altair_chart(chart)
def binned_heatmap():
chart = alt.Chart(largest_hospitals_df).mark_rect().encode(
x=alt.X('bed_count:Q', bin=True),
y=alt.Y('state:N', sort='-x'),
color=alt.Color('count()'),
tooltip=['state', 'bed_count']
).properties(
width=700,
height=500,
title='Largest Hospitals by State (Binned Heatmap)'
)
st.altair_chart(chart)
def facetted_scatterplot_with_marginal_histograms():
chart = alt.Chart(largest_hospitals_df).mark_circle(size=50).encode(
x=alt.X('bed_count:Q', title='Bed Count'),
y=alt.Y('state:N', sort='-x', title='State'),
color=alt.Color('state:N'),
tooltip=['name', 'bed_count', 'city', 'state']
).properties(
width=700,
height=500,
title='Largest Hospitals in the US (Facetted Scatterplot with Marginal Histograms)'
).facet(
column=alt.Column('state:N', sort='-y', title='State'),
spacing={'column': 30}
)
histogram_x = largest_hospitals_df[['bed_count']].reset_index().rename(columns={'bed_count': 'value'})
histogram_x['variable'] = 'bed_count'
histogram_y = largest_hospitals_df[['state']].reset_index().rename(columns={'state': 'value'})
histogram_y['variable'] = 'state'
chart_x = alt.Chart(histogram_x).mark_bar().encode(
x=alt.X('value:Q', bin=True),
y=alt.Y('count()'),
color=alt.Color('variable:N', scale=alt.Scale(range=['#675193', '#ca8861'])),
tooltip=['value']
).properties(
width=700,
height=100
)
chart_y = alt.Chart(histogram_y).mark_bar().encode(
y=alt.Y('value:N', sort='-x'),
x=alt.X('count()'),
color=alt.Color('variable:N', scale=alt.Scale(range=['#675193', '#ca8861'])),
tooltip=['value']
).properties(
width=100,
height=500
)
st.altair_chart(chart | chart_x, use_container_width=True)
st.altair_chart(chart_y, use_container_width=True)
def ridgeline_plot():
chart = alt.Chart(largest_hospitals_df).transform_joinaggregate(
count='count()',
groupby=['state']
).transform_window(
rank='rank(count)',
sort=[alt.SortField('count', order='descending')]
).transform_filter(
alt.datum.rank <= 5
).transform_density(
density='bed_count',
as_=['bed_count', 'density'],
extent=[0, 3000],
groupby=['state']
).mark_area().encode(
y=alt.Y('state:N', sort='-x'),
x=alt.X('bed_count:Q', title='Bed Count'),
color=alt.Color('state:N'),
row=alt.Row('rank:O', sort='descending', title=None)
).properties(
width=700,
height=500,
title='Largest Hospitals by State (Ridgeline Plot)'
)
st.altair_chart(chart)
def create_sidebar():
chart_functions = {
'Stacked Bar Chart with Text Overlay': stacked_bar_chart,
'Bump Chart': bump_chart,
'Radial Chart': radial_chart,
'Trellis Area Sort Chart': trellis_area_sort_chart,
'Wind Vector Map': wind_vector_map,
'Table Bubble Plot': table_bubble_plot,
'Locations of US Airports': locations_of_us_airports,
'Connections Among U.S. Airports Interactive': connections_among_us_airports_interactive,
'One Dot Per Zipcode': one_dot_per_zipcode,
'Isotype Visualization with Emoji': isotype_visualization_with_emoji,
'Binned Heatmap': binned_heatmap,
'Facetted Scatterplot with Marginal Histograms': facetted_scatterplot_with_marginal_histograms,
'Ridgeline Plot': ridgeline_plot
}
st.sidebar.title('Charts')
for chart_name, chart_function in chart_functions.items():
chart_button = st.sidebar.button(f'{chart_name} {emoji(chart_name)}')
if chart_button:
chart_function()
def emoji(chart_name):
emojis = {
'Stacked Bar Chart with Text Overlay': 'πŸ“Š',
'Bump Chart': 'πŸ“ˆ',
'Radial Chart': '🎑',
'Trellis Area Sort Chart': 'πŸ“‰',
'Wind Vector Map': '🌬️',
'Table Bubble Plot': 'πŸ’¬',
'Locations of US Airports': '✈️',
'Connections Among U.S. Airports Interactive': 'πŸ›«',
'One Dot Per Zipcode': 'πŸ“',
'Isotype Visualization with Emoji': 'πŸ˜€',
'Binned Heatmap': 'πŸ—ΊοΈ',
'Facetted Scatterplot with Marginal Histograms': 'πŸ”³',
'Ridgeline Plot': 'πŸ”οΈ'
}
return emojis.get(chart_name, '')
create_sidebar()