import streamlit as st import pandas as pd import numpy as np # Define the states and their populations and health concerns states = { 'Minnesota': { 'population': 5700000, 'health_concerns': ['obesity', 'diabetes', 'heart disease'] }, 'Wisconsin': { 'population': 5850000, 'health_concerns': ['cancer', 'alcoholism', 'depression'] }, 'Texas': { 'population': 29000000, 'health_concerns': ['obesity', 'diabetes', 'heart disease'] }, 'Florida': { 'population': 21500000, 'health_concerns': ['cancer', 'alcoholism', 'depression'] }, 'California': { 'population': 39500000, 'health_concerns': ['obesity', 'diabetes', 'heart disease'] }, 'New York': { 'population': 19500000, 'health_concerns': ['cancer', 'alcoholism', 'depression'] } } # Augment the data by adding random noise and additional columns for state in states: states[state]['population'] += int(np.random.normal(0, 500000)) states[state]['climate'] = np.random.choice(['cold', 'moderate', 'hot']) states[state]['geography'] = np.random.choice(['coastal', 'inland', 'mountainous']) states[state]['economy'] = np.random.choice(['agriculture', 'manufacturing', 'services']) # Create a pandas dataframe from the augmented data df = pd.DataFrame.from_dict(states, orient='index') df = df[['population', 'climate', 'geography', 'economy', 'health_concerns']] # Define the top 3 health concerns by state top_health_concerns = { 'Minnesota': ['obesity', 'diabetes', 'heart disease'], 'Wisconsin': ['cancer', 'alcoholism', 'depression'], 'Texas': ['obesity', 'diabetes', 'heart disease'], 'Florida': ['cancer', 'alcoholism', 'depression'], 'California': ['obesity', 'diabetes', 'heart disease'], 'New York': ['cancer', 'alcoholism', 'depression'] } # Define the statistics for each health concern and cite references statistics = { 'obesity': { 'prevalence': '32.4%', 'source': 'https://www.cdc.gov/obesity/data/prevalence-maps.html' }, 'diabetes': { 'prevalence': '10.7%', 'source': 'https://www.cdc.gov/diabetes/data/statistics-report/index.html' }, 'heart disease': { 'prevalence': '12.1%', 'source': 'https://www.cdc.gov/heartdisease/facts.htm' }, 'cancer': { 'prevalence': '38.4%', 'source': 'https://www.cdc.gov/cancer/dcpc/data/types.htm' }, 'alcoholism': { 'prevalence': '14.5%', 'source': 'https://www.niaaa.nih.gov/publications/brochures-and-fact-sheets/alcohol-facts-and-statistics' }, 'depression': { 'prevalence': '7.6%', 'source': 'https://www.nimh.nih.gov/health/statistics/major-depression.shtml' } } # Define the streamlit app def app(): st.title('Data Augmentation Example') st.write('This app demonstrates data augmentation by adding random noise and additional columns to a short python dictionary list of the states.') # Display the augmented data st.header('Augmented Data') st.write(df) # Display the top 3 health concerns by state and their statistics st.header('Top 3 Health Concerns by State') for state in top_health_concerns: st.subheader(state) for health_concern in top_health_concerns[state]: st.write(health_concern) st.write('Prevalence:', statistics[health_concern]['prevalence']) st.write('Source:', statistics[health_concern]['source']) st.write('---') app()