Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
# Define the states and their populations and health concerns | |
states = { | |
'Minnesota': { | |
'population': 5700000, | |
'health_concerns': ['obesity', 'diabetes', 'heart disease'] | |
}, | |
'Wisconsin': { | |
'population': 5850000, | |
'health_concerns': ['cancer', 'alcoholism', 'depression'] | |
}, | |
'Texas': { | |
'population': 29000000, | |
'health_concerns': ['obesity', 'diabetes', 'heart disease'] | |
}, | |
'Florida': { | |
'population': 21500000, | |
'health_concerns': ['cancer', 'alcoholism', 'depression'] | |
}, | |
'California': { | |
'population': 39500000, | |
'health_concerns': ['obesity', 'diabetes', 'heart disease'] | |
}, | |
'New York': { | |
'population': 19500000, | |
'health_concerns': ['cancer', 'alcoholism', 'depression'] | |
} | |
} | |
# Augment the data by adding random noise and additional columns | |
for state in states: | |
states[state]['population'] += int(np.random.normal(0, 500000)) | |
states[state]['climate'] = np.random.choice(['cold', 'moderate', 'hot']) | |
states[state]['geography'] = np.random.choice(['coastal', 'inland', 'mountainous']) | |
states[state]['economy'] = np.random.choice(['agriculture', 'manufacturing', 'services']) | |
# Create a pandas dataframe from the augmented data | |
df = pd.DataFrame.from_dict(states, orient='index') | |
df = df[['population', 'climate', 'geography', 'economy', 'health_concerns']] | |
# Define the top 3 health concerns by state | |
top_health_concerns = { | |
'Minnesota': ['obesity', 'diabetes', 'heart disease'], | |
'Wisconsin': ['cancer', 'alcoholism', 'depression'], | |
'Texas': ['obesity', 'diabetes', 'heart disease'], | |
'Florida': ['cancer', 'alcoholism', 'depression'], | |
'California': ['obesity', 'diabetes', 'heart disease'], | |
'New York': ['cancer', 'alcoholism', 'depression'] | |
} | |
# Define the statistics for each health concern and cite references | |
statistics = { | |
'obesity': { | |
'prevalence': '32.4%', | |
'source': 'https://www.cdc.gov/obesity/data/prevalence-maps.html' | |
}, | |
'diabetes': { | |
'prevalence': '10.7%', | |
'source': 'https://www.cdc.gov/diabetes/data/statistics-report/index.html' | |
}, | |
'heart disease': { | |
'prevalence': '12.1%', | |
'source': 'https://www.cdc.gov/heartdisease/facts.htm' | |
}, | |
'cancer': { | |
'prevalence': '38.4%', | |
'source': 'https://www.cdc.gov/cancer/dcpc/data/types.htm' | |
}, | |
'alcoholism': { | |
'prevalence': '14.5%', | |
'source': 'https://www.niaaa.nih.gov/publications/brochures-and-fact-sheets/alcohol-facts-and-statistics' | |
}, | |
'depression': { | |
'prevalence': '7.6%', | |
'source': 'https://www.nimh.nih.gov/health/statistics/major-depression.shtml' | |
} | |
} | |
# Define the streamlit app | |
def app(): | |
st.title('Data Augmentation Example') | |
st.write('This app demonstrates data augmentation by adding random noise and additional columns to a short python dictionary list of the states.') | |
# Display the augmented data | |
st.header('Augmented Data') | |
st.write(df) | |
# Display the top 3 health concerns by state and their statistics | |
st.header('Top 3 Health Concerns by State') | |
for state in top_health_concerns: | |
st.subheader(state) | |
for health_concern in top_health_concerns[state]: | |
st.write(health_concern) | |
st.write('Prevalence:', statistics[health_concern]['prevalence']) | |
st.write('Source:', statistics[health_concern]['source']) | |
st.write('---') | |
app() | |