Spaces:
Runtime error
Runtime error
File size: 3,535 Bytes
cd18b19 54899c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import streamlit as st
import pandas as pd
import numpy as np
# Define the states and their populations and health concerns
states = {
'Minnesota': {
'population': 5700000,
'health_concerns': ['obesity', 'diabetes', 'heart disease']
},
'Wisconsin': {
'population': 5850000,
'health_concerns': ['cancer', 'alcoholism', 'depression']
},
'Texas': {
'population': 29000000,
'health_concerns': ['obesity', 'diabetes', 'heart disease']
},
'Florida': {
'population': 21500000,
'health_concerns': ['cancer', 'alcoholism', 'depression']
},
'California': {
'population': 39500000,
'health_concerns': ['obesity', 'diabetes', 'heart disease']
},
'New York': {
'population': 19500000,
'health_concerns': ['cancer', 'alcoholism', 'depression']
}
}
# Augment the data by adding random noise and additional columns
for state in states:
states[state]['population'] += int(np.random.normal(0, 500000))
states[state]['climate'] = np.random.choice(['cold', 'moderate', 'hot'])
states[state]['geography'] = np.random.choice(['coastal', 'inland', 'mountainous'])
states[state]['economy'] = np.random.choice(['agriculture', 'manufacturing', 'services'])
# Create a pandas dataframe from the augmented data
df = pd.DataFrame.from_dict(states, orient='index')
df = df[['population', 'climate', 'geography', 'economy', 'health_concerns']]
# Define the top 3 health concerns by state
top_health_concerns = {
'Minnesota': ['obesity', 'diabetes', 'heart disease'],
'Wisconsin': ['cancer', 'alcoholism', 'depression'],
'Texas': ['obesity', 'diabetes', 'heart disease'],
'Florida': ['cancer', 'alcoholism', 'depression'],
'California': ['obesity', 'diabetes', 'heart disease'],
'New York': ['cancer', 'alcoholism', 'depression']
}
# Define the statistics for each health concern and cite references
statistics = {
'obesity': {
'prevalence': '32.4%',
'source': 'https://www.cdc.gov/obesity/data/prevalence-maps.html'
},
'diabetes': {
'prevalence': '10.7%',
'source': 'https://www.cdc.gov/diabetes/data/statistics-report/index.html'
},
'heart disease': {
'prevalence': '12.1%',
'source': 'https://www.cdc.gov/heartdisease/facts.htm'
},
'cancer': {
'prevalence': '38.4%',
'source': 'https://www.cdc.gov/cancer/dcpc/data/types.htm'
},
'alcoholism': {
'prevalence': '14.5%',
'source': 'https://www.niaaa.nih.gov/publications/brochures-and-fact-sheets/alcohol-facts-and-statistics'
},
'depression': {
'prevalence': '7.6%',
'source': 'https://www.nimh.nih.gov/health/statistics/major-depression.shtml'
}
}
# Define the streamlit app
def app():
st.title('Data Augmentation Example')
st.write('This app demonstrates data augmentation by adding random noise and additional columns to a short python dictionary list of the states.')
# Display the augmented data
st.header('Augmented Data')
st.write(df)
# Display the top 3 health concerns by state and their statistics
st.header('Top 3 Health Concerns by State')
for state in top_health_concerns:
st.subheader(state)
for health_concern in top_health_concerns[state]:
st.write(health_concern)
st.write('Prevalence:', statistics[health_concern]['prevalence'])
st.write('Source:', statistics[health_concern]['source'])
st.write('---')
app()
|