Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import numpy as np | |
import google.generativeai as genai | |
import os | |
from io import StringIO | |
import json | |
st.set_page_config(layout="wide", page_title="Dynamic Data Dashboard") | |
def main(): | |
st.title("Dynamic Data Dashboard Generator") | |
st.markdown(""" | |
Upload your CSV file to generate an interactive dashboard tailored to your data. | |
The application uses AI to analyze your data and create relevant visualizations. | |
""") | |
# API key input with validation | |
api_key_input = st.sidebar.text_input("Enter your Gemini API key for more power", type="password") | |
api_key = api_key_input or os.getenv("GEMINI_API_KEY") | |
uploaded_file = st.file_uploader("Choose a CSV file", type="csv") | |
if uploaded_file is not None: | |
try: | |
# Read and display data | |
df = pd.read_csv(uploaded_file) | |
with st.expander("Preview Data", expanded=True): | |
st.dataframe(df.head(10)) | |
# Basic data info | |
st.subheader("Data Overview") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.metric("Rows", df.shape[0]) | |
st.metric("Columns", df.shape[1]) | |
with col2: | |
st.metric("Numerical Columns", len(df.select_dtypes(include=np.number).columns)) | |
st.metric("Categorical Columns", len(df.select_dtypes(exclude=np.number).columns)) | |
# If API key is provided, use Gemini for analysis | |
if api_key: | |
st.subheader("AI-Powered Dashboard") | |
with st.spinner("Analyzing your data and generating visualizations..."): | |
try: | |
generate_ai_dashboard(df, api_key) | |
except Exception as e: | |
st.error(f"Error generating AI dashboard: {e}") | |
# Standard visualizations | |
st.subheader("Standard Visualizations") | |
generate_standard_dashboard(df) | |
except Exception as e: | |
st.error(f"Error processing your file: {e}") | |
def generate_standard_dashboard(df): | |
"""Generate standard visualizations based on data types""" | |
# Identify numerical and categorical columns | |
numerical_cols = df.select_dtypes(include=np.number).columns.tolist() | |
categorical_cols = df.select_dtypes(exclude=np.number).columns.tolist() | |
# Data completeness | |
st.subheader("Data Completeness") | |
missing_data = pd.DataFrame({'column': df.columns, | |
'missing_values': df.isnull().sum(), | |
'percentage': (df.isnull().sum() / len(df) * 100).round(2)}) | |
fig = px.bar(missing_data, x='column', y='percentage', | |
title='Missing Values Percentage', | |
labels={'percentage': 'Missing Values (%)', 'column': 'Column'}) | |
st.plotly_chart(fig, use_container_width=True) | |
# Distribution of numerical columns | |
if numerical_cols: | |
st.subheader("Numerical Distributions") | |
selected_num_col = st.selectbox("Select a numerical column", numerical_cols) | |
col1, col2 = st.columns(2) | |
with col1: | |
fig = px.histogram(df, x=selected_num_col, title=f'Distribution of {selected_num_col}') | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
fig = px.box(df, y=selected_num_col, title=f'Box Plot of {selected_num_col}') | |
st.plotly_chart(fig, use_container_width=True) | |
# Distribution of categorical columns | |
if categorical_cols: | |
st.subheader("Categorical Distributions") | |
selected_cat_col = st.selectbox("Select a categorical column", categorical_cols) | |
# Limit to top 10 categories for readability | |
value_counts = df[selected_cat_col].value_counts().nlargest(10) | |
fig = px.bar(x=value_counts.index, y=value_counts.values, | |
title=f'Top 10 Categories in {selected_cat_col}', | |
labels={'x': selected_cat_col, 'y': 'Count'}) | |
st.plotly_chart(fig, use_container_width=True) | |
# Correlation heatmap for numerical data | |
if len(numerical_cols) > 1: | |
st.subheader("Correlation Between Numerical Variables") | |
corr = df[numerical_cols].corr() | |
fig = px.imshow(corr, text_auto=True, aspect="auto", | |
title="Correlation Heatmap") | |
st.plotly_chart(fig, use_container_width=True) | |
# Scatter plot for exploring relationships | |
if len(numerical_cols) >= 2: | |
st.subheader("Explore Relationships") | |
col1, col2 = st.columns(2) | |
with col1: | |
x_col = st.selectbox("X-axis", numerical_cols, index=0) | |
with col2: | |
y_col = st.selectbox("Y-axis", numerical_cols, index=min(1, len(numerical_cols)-1)) | |
color_col = None | |
if categorical_cols: | |
color_col = st.selectbox("Color by (optional)", ['None'] + categorical_cols) | |
if color_col == 'None': | |
color_col = None | |
fig = px.scatter(df, x=x_col, y=y_col, color=color_col, | |
title=f'{y_col} vs {x_col}', | |
opacity=0.7) | |
st.plotly_chart(fig, use_container_width=True) | |
def generate_ai_dashboard(df, api_key): | |
"""Use Gemini AI to analyze data and generate dashboard recommendations""" | |
# Configure Gemini | |
genai.configure(api_key=api_key) | |
model = genai.GenerativeModel('gemini-2.0-flash') | |
# Generate data summary | |
column_info = {col: { | |
'dtype': str(df[col].dtype), | |
'unique_values': int(df[col].nunique()), | |
'missing_values': int(df[col].isna().sum()), | |
'sample': [str(x) for x in df[col].dropna().sample(min(5, len(df))).tolist()] | |
} for col in df.columns} | |
# Prepare prompt | |
full_prompt = f""" | |
Analyze the following dataset and suggest visualizations that would be insightful: | |
Dataset Summary: | |
- Rows: {df.shape[0]} | |
- Columns: {df.shape[1]} | |
Column Information: | |
{json.dumps(column_info, indent=2)} | |
Please provide visualization recommendations in the following JSON format: | |
{{ | |
"insights": [ | |
"Key insight about the data", | |
"Another insight about the data" | |
], | |
"visualizations": [ | |
{{ | |
"title": "Visualization Title", | |
"description": "What this visualization shows", | |
"type": "bar|line|scatter|pie|histogram|box|heatmap", | |
"x_column": "column_name_for_x_axis", | |
"y_column": "column_name_for_y_axis", | |
"color_column": "optional_column_for_color", | |
"facet_column": "optional_column_for_faceting" | |
}} | |
] | |
}} | |
Return ONLY the JSON, no other text. | |
""" | |
# Call Gemini API | |
response = model.generate_content( | |
full_prompt, | |
generation_config={"temperature": 0.3} | |
) | |
try: | |
# Try to parse the response as JSON | |
response_text = response.text | |
# Clean the response if it contains markdown code blocks | |
if "```json" in response_text: | |
response_text = response_text.split("```json")[1].split("```")[0].strip() | |
elif "```" in response_text: | |
response_text = response_text.split("```")[1].split("```")[0].strip() | |
recommendations = json.loads(response_text) | |
# Display AI insights | |
st.subheader("AI Insights") | |
for insight in recommendations.get("insights", []): | |
st.info(insight) | |
# Create visualizations | |
st.subheader("AI Recommended Visualizations") | |
for viz in recommendations.get("visualizations", []): | |
with st.expander(viz["title"], expanded=True): | |
st.write(viz["description"]) | |
try: | |
x_col = viz.get("x_column") | |
y_col = viz.get("y_column") | |
color_col = viz.get("color_column") | |
viz_type = viz.get("type", "bar").lower() | |
if viz_type == "bar": | |
fig = px.bar(df, x=x_col, y=y_col, color=color_col, title=viz["title"]) | |
elif viz_type == "line": | |
fig = px.line(df, x=x_col, y=y_col, color=color_col, title=viz["title"]) | |
elif viz_type == "scatter": | |
fig = px.scatter(df, x=x_col, y=y_col, color=color_col, title=viz["title"]) | |
elif viz_type == "pie": | |
fig = px.pie(df, names=x_col, values=y_col, title=viz["title"]) | |
elif viz_type == "histogram": | |
fig = px.histogram(df, x=x_col, color=color_col, title=viz["title"]) | |
elif viz_type == "box": | |
fig = px.box(df, y=y_col, x=x_col, color=color_col, title=viz["title"]) | |
elif viz_type == "heatmap": | |
pivot_table = pd.pivot_table(df, values=y_col, index=x_col, columns=color_col, aggfunc='mean') | |
fig = px.imshow(pivot_table, title=viz["title"]) | |
else: | |
fig = px.bar(df, x=x_col, y=y_col, title=viz["title"]) | |
st.plotly_chart(fig, use_container_width=True) | |
except Exception as e: | |
st.error(f"Could not create this visualization: {e}") | |
except Exception as e: | |
st.error(f"Could not parse AI recommendations: {e}") | |
st.code(response.text, language="json") | |
if __name__ == "__main__": | |
main() |