Sebbe33 commited on
Commit
78c6e23
·
verified ·
1 Parent(s): 75fbe0b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +238 -0
app.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ import numpy as np
6
+ import google.generativeai as genai
7
+ import os
8
+ from io import StringIO
9
+ import json
10
+
11
+ st.set_page_config(layout="wide", page_title="Dynamic Data Dashboard")
12
+
13
+ def main():
14
+ st.title("Dynamic Data Dashboard Generator")
15
+ st.markdown("""
16
+ Upload your CSV file to generate an interactive dashboard tailored to your data.
17
+ The application uses AI to analyze your data and create relevant visualizations.
18
+ """)
19
+
20
+ # API key input with validation
21
+ api_key_input = st.sidebar.text_input("Enter your Gemini API key for more power", type="password")
22
+ api_key = api_key_input or os.getenv("GEMINI_API_KEY")
23
+
24
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
25
+
26
+ if uploaded_file is not None:
27
+ try:
28
+ # Read and display data
29
+ df = pd.read_csv(uploaded_file)
30
+ with st.expander("Preview Data", expanded=True):
31
+ st.dataframe(df.head(10))
32
+
33
+ # Basic data info
34
+ st.subheader("Data Overview")
35
+ col1, col2 = st.columns(2)
36
+ with col1:
37
+ st.metric("Rows", df.shape[0])
38
+ st.metric("Columns", df.shape[1])
39
+ with col2:
40
+ st.metric("Numerical Columns", len(df.select_dtypes(include=np.number).columns))
41
+ st.metric("Categorical Columns", len(df.select_dtypes(exclude=np.number).columns))
42
+
43
+ # If API key is provided, use Gemini for analysis
44
+ if api_key:
45
+ st.subheader("AI-Powered Dashboard")
46
+ with st.spinner("Analyzing your data and generating visualizations..."):
47
+ try:
48
+ generate_ai_dashboard(df, api_key)
49
+ except Exception as e:
50
+ st.error(f"Error generating AI dashboard: {e}")
51
+
52
+ # Standard visualizations
53
+ st.subheader("Standard Visualizations")
54
+ generate_standard_dashboard(df)
55
+
56
+ except Exception as e:
57
+ st.error(f"Error processing your file: {e}")
58
+
59
+ def generate_standard_dashboard(df):
60
+ """Generate standard visualizations based on data types"""
61
+ # Identify numerical and categorical columns
62
+ numerical_cols = df.select_dtypes(include=np.number).columns.tolist()
63
+ categorical_cols = df.select_dtypes(exclude=np.number).columns.tolist()
64
+
65
+ # Data completeness
66
+ st.subheader("Data Completeness")
67
+ missing_data = pd.DataFrame({'column': df.columns,
68
+ 'missing_values': df.isnull().sum(),
69
+ 'percentage': (df.isnull().sum() / len(df) * 100).round(2)})
70
+ fig = px.bar(missing_data, x='column', y='percentage',
71
+ title='Missing Values Percentage',
72
+ labels={'percentage': 'Missing Values (%)', 'column': 'Column'})
73
+ st.plotly_chart(fig, use_container_width=True)
74
+
75
+ # Distribution of numerical columns
76
+ if numerical_cols:
77
+ st.subheader("Numerical Distributions")
78
+ selected_num_col = st.selectbox("Select a numerical column", numerical_cols)
79
+
80
+ col1, col2 = st.columns(2)
81
+ with col1:
82
+ fig = px.histogram(df, x=selected_num_col, title=f'Distribution of {selected_num_col}')
83
+ st.plotly_chart(fig, use_container_width=True)
84
+
85
+ with col2:
86
+ fig = px.box(df, y=selected_num_col, title=f'Box Plot of {selected_num_col}')
87
+ st.plotly_chart(fig, use_container_width=True)
88
+
89
+ # Distribution of categorical columns
90
+ if categorical_cols:
91
+ st.subheader("Categorical Distributions")
92
+ selected_cat_col = st.selectbox("Select a categorical column", categorical_cols)
93
+
94
+ # Limit to top 10 categories for readability
95
+ value_counts = df[selected_cat_col].value_counts().nlargest(10)
96
+ fig = px.bar(x=value_counts.index, y=value_counts.values,
97
+ title=f'Top 10 Categories in {selected_cat_col}',
98
+ labels={'x': selected_cat_col, 'y': 'Count'})
99
+ st.plotly_chart(fig, use_container_width=True)
100
+
101
+ # Correlation heatmap for numerical data
102
+ if len(numerical_cols) > 1:
103
+ st.subheader("Correlation Between Numerical Variables")
104
+ corr = df[numerical_cols].corr()
105
+ fig = px.imshow(corr, text_auto=True, aspect="auto",
106
+ title="Correlation Heatmap")
107
+ st.plotly_chart(fig, use_container_width=True)
108
+
109
+ # Scatter plot for exploring relationships
110
+ if len(numerical_cols) >= 2:
111
+ st.subheader("Explore Relationships")
112
+ col1, col2 = st.columns(2)
113
+ with col1:
114
+ x_col = st.selectbox("X-axis", numerical_cols, index=0)
115
+ with col2:
116
+ y_col = st.selectbox("Y-axis", numerical_cols, index=min(1, len(numerical_cols)-1))
117
+
118
+ color_col = None
119
+ if categorical_cols:
120
+ color_col = st.selectbox("Color by (optional)", ['None'] + categorical_cols)
121
+ if color_col == 'None':
122
+ color_col = None
123
+
124
+ fig = px.scatter(df, x=x_col, y=y_col, color=color_col,
125
+ title=f'{y_col} vs {x_col}',
126
+ opacity=0.7)
127
+ st.plotly_chart(fig, use_container_width=True)
128
+
129
+ def generate_ai_dashboard(df, api_key):
130
+ """Use Gemini AI to analyze data and generate dashboard recommendations"""
131
+ # Configure Gemini
132
+ genai.configure(api_key=api_key)
133
+ model = genai.GenerativeModel('gemini-2.0-flash')
134
+
135
+ # Generate data summary
136
+ column_info = {col: {
137
+ 'dtype': str(df[col].dtype),
138
+ 'unique_values': df[col].nunique(),
139
+ 'missing_values': df[col].isna().sum(),
140
+ 'sample': df[col].dropna().sample(min(5, len(df))).tolist()
141
+ } for col in df.columns}
142
+
143
+ # Prepare prompt
144
+ full_prompt = f"""
145
+ Analyze the following dataset and suggest visualizations that would be insightful:
146
+
147
+ Dataset Summary:
148
+ - Rows: {df.shape[0]}
149
+ - Columns: {df.shape[1]}
150
+
151
+ Column Information:
152
+ {json.dumps(column_info, indent=2)}
153
+
154
+ Please provide visualization recommendations in the following JSON format:
155
+ {{
156
+ "insights": [
157
+ "Key insight about the data",
158
+ "Another insight about the data"
159
+ ],
160
+ "visualizations": [
161
+ {{
162
+ "title": "Visualization Title",
163
+ "description": "What this visualization shows",
164
+ "type": "bar|line|scatter|pie|histogram|box|heatmap",
165
+ "x_column": "column_name_for_x_axis",
166
+ "y_column": "column_name_for_y_axis",
167
+ "color_column": "optional_column_for_color",
168
+ "facet_column": "optional_column_for_faceting"
169
+ }}
170
+ ]
171
+ }}
172
+
173
+ Return ONLY the JSON, no other text.
174
+ """
175
+
176
+ # Call Gemini API
177
+ response = model.generate_content(
178
+ full_prompt,
179
+ generation_config={"temperature": 0.3}
180
+ )
181
+
182
+ try:
183
+ # Try to parse the response as JSON
184
+ response_text = response.text
185
+
186
+ # Clean the response if it contains markdown code blocks
187
+ if "```json" in response_text:
188
+ response_text = response_text.split("```json")[1].split("```")[0].strip()
189
+ elif "```" in response_text:
190
+ response_text = response_text.split("```")[1].split("```")[0].strip()
191
+
192
+ recommendations = json.loads(response_text)
193
+
194
+ # Display AI insights
195
+ st.subheader("AI Insights")
196
+ for insight in recommendations.get("insights", []):
197
+ st.info(insight)
198
+
199
+ # Create visualizations
200
+ st.subheader("AI Recommended Visualizations")
201
+ for viz in recommendations.get("visualizations", []):
202
+ with st.expander(viz["title"], expanded=True):
203
+ st.write(viz["description"])
204
+
205
+ try:
206
+ x_col = viz.get("x_column")
207
+ y_col = viz.get("y_column")
208
+ color_col = viz.get("color_column")
209
+ viz_type = viz.get("type", "bar").lower()
210
+
211
+ if viz_type == "bar":
212
+ fig = px.bar(df, x=x_col, y=y_col, color=color_col, title=viz["title"])
213
+ elif viz_type == "line":
214
+ fig = px.line(df, x=x_col, y=y_col, color=color_col, title=viz["title"])
215
+ elif viz_type == "scatter":
216
+ fig = px.scatter(df, x=x_col, y=y_col, color=color_col, title=viz["title"])
217
+ elif viz_type == "pie":
218
+ fig = px.pie(df, names=x_col, values=y_col, title=viz["title"])
219
+ elif viz_type == "histogram":
220
+ fig = px.histogram(df, x=x_col, color=color_col, title=viz["title"])
221
+ elif viz_type == "box":
222
+ fig = px.box(df, y=y_col, x=x_col, color=color_col, title=viz["title"])
223
+ elif viz_type == "heatmap":
224
+ pivot_table = pd.pivot_table(df, values=y_col, index=x_col, columns=color_col, aggfunc='mean')
225
+ fig = px.imshow(pivot_table, title=viz["title"])
226
+ else:
227
+ fig = px.bar(df, x=x_col, y=y_col, title=viz["title"])
228
+
229
+ st.plotly_chart(fig, use_container_width=True)
230
+ except Exception as e:
231
+ st.error(f"Could not create this visualization: {e}")
232
+
233
+ except Exception as e:
234
+ st.error(f"Could not parse AI recommendations: {e}")
235
+ st.code(response.text, language="json")
236
+
237
+ if __name__ == "__main__":
238
+ main()