mgbam commited on
Commit
8a0173b
Β·
verified Β·
1 Parent(s): 0a40e29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -129
app.py CHANGED
@@ -1,189 +1,201 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
- import tempfile
5
- from io import BytesIO
6
- from sqlalchemy import create_engine
7
  import plotly.express as px
8
  import matplotlib.pyplot as plt
 
 
9
  from statsmodels.tsa.arima.model import ARIMA
 
 
 
 
 
 
 
 
10
 
11
- # ── Helpers to read CSV/Excel robustly ───────────────────────────────────────────
 
 
 
12
  @st.cache_data
13
- def load_file(uploaded):
14
- """Read a CSV or Excel file into a DataFrame."""
15
  try:
16
  if uploaded.name.lower().endswith((".xls", ".xlsx")):
17
  return pd.read_excel(uploaded, engine="openpyxl")
18
  else:
19
  return pd.read_csv(uploaded)
20
  except Exception as e:
21
- raise st.Error(f"Error parsing file: {e}")
 
22
 
23
- # ── Helpers for SQL database ────────────────────────────────────────────────────
24
- SUPPORTED_ENGINES = ["postgresql", "mysql", "mssql+pyodbc", "oracle+cx_oracle"]
25
  @st.cache_data
26
- def list_tables(connection_string):
27
- engine = create_engine(connection_string)
28
  return engine.table_names()
29
 
30
  @st.cache_data
31
- def fetch_table(connection_string, table_name):
32
- engine = create_engine(connection_string)
33
- return pd.read_sql_table(table_name, engine)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # ── Streamlit page setup ────────────────────────────────────────────────────────
36
- st.set_page_config(
37
- page_title="BizIntel AI Ultra",
38
- layout="wide",
39
- initial_sidebar_state="expanded",
40
- )
41
  st.title("πŸ“Š BizIntel AI Ultra – Advanced Analytics + Gemini 1.5 Pro")
42
 
43
- # ── Data source selection ───────────────────────────────────────────────────────
44
- data_source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
45
 
46
- df = None
47
- if data_source == "Upload CSV / Excel":
48
  uploaded = st.file_uploader(
49
- "Drag & drop file here (≀ 500 MB)",
50
- type=["csv", "xls", "xlsx"],
51
- accept_multiple_files=False,
52
  )
53
  if uploaded:
54
  with st.spinner("Loading file…"):
55
- df = load_file(uploaded)
56
- st.success("βœ… File loaded into memory")
57
- elif data_source == "Connect to SQL Database":
58
- engine = st.selectbox("Select DB engine", SUPPORTED_ENGINES)
59
- conn_str = st.text_input("Connection string (SQLAlchemy format)", placeholder="e.g. postgresql://user:pass@host:port/dbname")
60
  if conn_str:
61
- tables = list_tables(conn_str)
62
  table = st.selectbox("Choose table", tables)
63
  if table:
64
  with st.spinner(f"Fetching `{table}`…"):
65
- df = fetch_table(conn_str, table)
66
- st.success(f"βœ… `{table}` loaded from database")
67
 
68
- # ── If DataFrame is ready, show overview and proceed ───────────────────────────
69
- if df is not None:
70
- st.markdown("### πŸ—‚οΈ Preview")
71
- st.dataframe(df.head(5), use_container_width=True)
72
-
73
- # Dataset overview metrics
74
- n_rows, n_cols = df.shape
75
- missing_pct = (df.isna().sum().sum() / (n_rows * n_cols)) * 100
76
  st.markdown("---")
77
- c1, c2, c3 = st.columns(3)
78
- c1.metric("Rows", f"{n_rows:,}")
79
- c2.metric("Columns", f"{n_cols:,}")
80
- c3.metric("Missing %", f"{missing_pct:.1f}%")
81
 
82
- # Detailed stats
83
- st.markdown("#### πŸ“‹ Detailed descriptive statistics")
84
- st.dataframe(df.describe(include="all").transpose(), use_container_width=True)
85
 
86
- # Optional exploratory visuals
87
- st.markdown("---")
88
- st.markdown("#### πŸ”Ž Optional Exploratory Visuals")
89
  col1, col2, col3 = st.columns(3)
90
- with col1:
91
- if st.checkbox("Histogram"):
92
- num_cols = df.select_dtypes(include="number").columns.tolist()
93
- col = st.selectbox("Choose numeric column for histogram", num_cols, key="hist")
94
- fig = px.histogram(df, x=col, nbins=30, title=f"Histogram of {col}")
95
- st.plotly_chart(fig, use_container_width=True)
96
- with col2:
97
- if st.checkbox("Scatter matrix"):
98
- num_cols = df.select_dtypes(include="number").columns.tolist()[:6] # limit to first 6
99
- fig = px.scatter_matrix(df[num_cols], dimensions=num_cols, title="Scatter Matrix")
100
- st.plotly_chart(fig, use_container_width=True)
101
- with col3:
102
- if st.checkbox("Correlation heatmap"):
103
- corr = df.select_dtypes(include="number").corr()
104
- fig, ax = plt.subplots(figsize=(6, 5))
105
- im = ax.imshow(corr, vmin=-1, vmax=1, cmap="RdBu")
106
- plt.xticks(range(len(corr)), corr.columns, rotation=45, ha="right")
107
- plt.yticks(range(len(corr)), corr.columns)
108
- plt.colorbar(im, ax=ax)
109
- st.pyplot(fig)
110
-
111
- # ── Trend & Forecast ──────────────────────────────────────────────────────
112
  st.markdown("---")
113
- st.markdown("### πŸ“ˆ Trend & Forecast")
114
- # pick date/time column
115
- dt_cols = df.columns[df.dtypes.isin([np.dtype("datetime64[ns]"), np.dtype("object")])].tolist()
116
- date_col = st.selectbox("Select date/time column", dt_cols)
117
- df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
118
 
119
- # pick numeric metric
120
- num_cols = df.select_dtypes(include="number").columns.tolist()
121
- metric_col = st.selectbox("Select numeric metric", num_cols)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
- # prepare time series
124
- ts = df[[date_col, metric_col]].dropna()
125
- ts = ts.set_index(date_col).sort_index()
126
- ts = ts[~ts.index.duplicated(keep="first")]
 
 
 
 
 
 
 
 
 
127
 
128
- # Trend plot
129
- fig_trend = px.line(ts, y=metric_col, title=f"{metric_col} over Time")
130
  st.plotly_chart(fig_trend, use_container_width=True)
131
 
132
- # Forecast next 90 days with ARIMA
133
- with st.spinner("Running 90-day forecast…"):
134
  try:
135
- model = ARIMA(ts, order=(1, 1, 1)).fit()
136
- fcast = model.get_forecast(90)
137
- idx = pd.date_range(ts.index.max(), periods=91, freq="D")[1:]
138
- df_f = pd.DataFrame({"forecast": fcast.predicted_mean}, index=idx)
139
 
 
140
  fig_fc = px.line(
141
- pd.concat([ts, df_f], axis=1),
142
- labels={metric_col: metric_col, "forecast": "Forecast"},
143
- title=f"{metric_col} & 90-Day Forecast",
144
  )
145
  st.plotly_chart(fig_fc, use_container_width=True)
 
146
  except Exception as e:
147
  st.error(f"Forecast failed: {e}")
148
 
149
- # ── Strategy Recommendations ─────────────────────────────────────────────
150
  st.markdown("---")
151
- st.markdown("### πŸš€ Strategy Recommendations")
152
- st.markdown(
153
- """
154
- 1. **Data Quality First**
155
- Address any missing or malformed dates before further time-series analysis.
156
 
157
- 2. **Trend & Seasonality**
158
- Investigate any upward/downward trends and repeating seasonal patterns.
159
 
160
- 3. **Outlier Management**
161
- Identify extreme highs/lows in your metricβ€”could be bulk orders or data errors.
162
 
163
- 4. **Segment-Level Analysis**
164
- Drill into key dimensions (e.g. region, product) to tailor growth strategies.
165
 
166
- 5. **Predict & Act**
167
- Use your 90-day forecasts to guide inventory, staffing, and marketing decisions.
168
- """
169
- )
170
 
171
- # downloadable strategy as markdown
172
- strategy_md = st.session_state.get("strategy_md", "")
173
- if not strategy_md:
174
- strategy_md = st.session_state["strategy_md"] = st.container().markdown("…") # dummy to store
175
 
176
  st.download_button(
177
  "πŸ“₯ Download Strategy (.md)",
178
- data="""
179
- # BizIntel AI Ultra – Strategy Recommendations
180
-
181
- 1. Data Quality First: …
182
- 2. Trend & Seasonality: …
183
- 3. Outlier Management: …
184
- 4. Segment-Level Analysis: …
185
- 5. Predict & Act: …
186
- """,
187
- file_name="strategy.md",
188
- mime="text/markdown",
189
  )
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
 
 
 
4
  import plotly.express as px
5
  import matplotlib.pyplot as plt
6
+ from io import BytesIO
7
+ from sqlalchemy import create_engine
8
  from statsmodels.tsa.arima.model import ARIMA
9
+ import openai
10
+
11
+ # ── CONFIG ───────────────────────────────────────────────────────────────────────
12
+ st.set_page_config(
13
+ page_title="BizIntel AI Ultra",
14
+ layout="wide",
15
+ initial_sidebar_state="expanded"
16
+ )
17
 
18
+ # You must set OPENAI_API_KEY in your Streamlit Secrets
19
+ openai.api_key = st.secrets["OPENAI_API_KEY"]
20
+
21
+ # ── CACHEABLE HELPERS ────────────────────────────────────────────────────────────
22
  @st.cache_data
23
+ def load_uploaded_file(uploaded):
24
+ """Load CSV or Excel from memory into a DataFrame."""
25
  try:
26
  if uploaded.name.lower().endswith((".xls", ".xlsx")):
27
  return pd.read_excel(uploaded, engine="openpyxl")
28
  else:
29
  return pd.read_csv(uploaded)
30
  except Exception as e:
31
+ st.error(f"⚠️ File parsing failed: {e}")
32
+ return pd.DataFrame()
33
 
 
 
34
  @st.cache_data
35
+ def list_db_tables(conn_str):
36
+ engine = create_engine(conn_str)
37
  return engine.table_names()
38
 
39
  @st.cache_data
40
+ def fetch_db_table(conn_str, table):
41
+ engine = create_engine(conn_str)
42
+ return pd.read_sql_table(table, engine)
43
+
44
+ # ── DATA NARRATIVE VIA OPENAI ───────────────────────────────────────────────────
45
+ def generate_data_narrative(df: pd.DataFrame) -> str:
46
+ """Send a summary of df to OpenAI and return a polished narrative."""
47
+ summary = df.describe(include="all").transpose().round(2).to_dict()
48
+ prompt = (
49
+ "You are a world-class data analyst. "
50
+ "Below is a JSON summary of a dataset. "
51
+ "Write a concise, professional narrative highlighting the top 5 business-critical insights, "
52
+ "in bullet format:\n\n"
53
+ f"{summary}\n\n"
54
+ )
55
+ resp = openai.ChatCompletion.create(
56
+ model="gpt-4o-mini", # or "gpt-4o", "gpt-4o-mini-high"
57
+ messages=[{"role":"user","content":prompt}],
58
+ temperature=0.3,
59
+ )
60
+ return resp.choices[0].message.content.strip()
61
 
62
+ # ── APP ─────────────────────────────────────────────────────────────────────────
 
 
 
 
 
63
  st.title("πŸ“Š BizIntel AI Ultra – Advanced Analytics + Gemini 1.5 Pro")
64
 
65
+ # 1) Choose data source
66
+ source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
67
 
68
+ df = pd.DataFrame()
69
+ if source == "Upload CSV / Excel":
70
  uploaded = st.file_uploader(
71
+ "Drag & drop file here (≀500 MB) β€’ .csv, .xls, .xlsx",
72
+ type=["csv","xls","xlsx"]
 
73
  )
74
  if uploaded:
75
  with st.spinner("Loading file…"):
76
+ df = load_uploaded_file(uploaded)
77
+
78
+ else:
79
+ engine = st.selectbox("DB engine", ["postgresql","mysql","mssql+pyodbc","oracle+cx_oracle"])
80
+ conn_str = st.text_input("Connection string", placeholder="dialect+driver://user:pass@host/db")
81
  if conn_str:
82
+ tables = list_db_tables(conn_str)
83
  table = st.selectbox("Choose table", tables)
84
  if table:
85
  with st.spinner(f"Fetching `{table}`…"):
86
+ df = fetch_db_table(conn_str, table)
 
87
 
88
+ # 2) If we have data…
89
+ if not df.empty:
90
+ st.success("βœ… Data loaded!")
 
 
 
 
 
91
  st.markdown("---")
 
 
 
 
92
 
93
+ # 2a) Preview & summary metrics
94
+ st.subheader("πŸ—‚ Data Preview & Overview")
95
+ st.dataframe(df.head(5), use_container_width=True)
96
 
97
+ r, c = df.shape
98
+ missing_pct = (df.isna().sum().sum() / (r*c) * 100).round(1)
 
99
  col1, col2, col3 = st.columns(3)
100
+ col1.metric("Rows", f"{r:,}")
101
+ col2.metric("Cols", f"{c:,}")
102
+ col3.metric("Missing %", f"{missing_pct}%")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  st.markdown("---")
 
 
 
 
 
104
 
105
+ # 2b) Automated data narrative
106
+ st.subheader("πŸ“ Data Narrative")
107
+ with st.spinner("Generating insights…"):
108
+ narrative = generate_data_narrative(df)
109
+ st.markdown(narrative)
110
+
111
+ # 2c) Optional EDA visuals
112
+ st.subheader("πŸ”Ž Exploratory Visuals")
113
+ num_cols = df.select_dtypes("number").columns.tolist()
114
+ if st.checkbox("Show histogram"):
115
+ col = st.selectbox("Histogram column", num_cols, key="hist")
116
+ fig = px.histogram(df, x=col, nbins=30, title=f"Histogram of {col}")
117
+ st.plotly_chart(fig, use_container_width=True)
118
+
119
+ if st.checkbox("Show scatter matrix"):
120
+ dims = num_cols[:6]
121
+ fig = px.scatter_matrix(df[dims], dimensions=dims, title="Scatter Matrix")
122
+ st.plotly_chart(fig, use_container_width=True)
123
+
124
+ if st.checkbox("Show correlation heatmap"):
125
+ corr = df[num_cols].corr()
126
+ fig, ax = plt.subplots(figsize=(6,5))
127
+ im = ax.imshow(corr, cmap="RdBu", vmin=-1, vmax=1)
128
+ plt.xticks(range(len(corr)), corr.columns, rotation=45, ha="right")
129
+ plt.yticks(range(len(corr)), corr.columns)
130
+ plt.colorbar(im, ax=ax)
131
+ st.pyplot(fig)
132
+
133
+ # 3) Trend & forecast
134
+ st.markdown("---")
135
+ st.subheader("πŸ“ˆ Time-Series Trend & 90-Day Forecast")
136
 
137
+ # pick columns
138
+ dt_opts = [col for col in df.columns if pd.api.types.is_datetime64_any_dtype(df[col]) or df[col].dtype == "object"]
139
+ date_col = st.selectbox("Date column", dt_opts)
140
+ df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
141
+ metric_col = st.selectbox("Metric column", num_cols)
142
+
143
+ ts = (
144
+ df[[date_col, metric_col]]
145
+ .dropna()
146
+ .set_index(date_col)
147
+ .sort_index()
148
+ .loc[~df.index.duplicated(keep="first")]
149
+ )
150
 
151
+ # plot trend
152
+ fig_trend = px.line(ts, y=metric_col, title=f"{metric_col} over Time", labels={"index":"Date"})
153
  st.plotly_chart(fig_trend, use_container_width=True)
154
 
155
+ # forecast
156
+ with st.spinner("Running ARIMA…"):
157
  try:
158
+ model = ARIMA(ts, order=(1,1,1)).fit()
159
+ future_idx = pd.date_range(start=ts.index.max(), periods=91, freq="D")[1:]
160
+ pred = model.get_forecast(90).predicted_mean
161
+ df_pred = pd.Series(pred.values, index=future_idx, name="Forecast")
162
 
163
+ combo = pd.concat([ts[metric_col], df_pred], axis=1)
164
  fig_fc = px.line(
165
+ combo,
166
+ labels={metric_col:metric_col, "Forecast":"Forecast"},
167
+ title=f"{metric_col} & 90-Day Forecast"
168
  )
169
  st.plotly_chart(fig_fc, use_container_width=True)
170
+
171
  except Exception as e:
172
  st.error(f"Forecast failed: {e}")
173
 
174
+ # 4) Strategy download
175
  st.markdown("---")
176
+ st.subheader("πŸš€ Actionable Strategy Brief")
177
+ strategy_md = """
178
+ # BizIntel AI Ultra – Strategy Brief
 
 
179
 
180
+ **1. Data Quality First**
181
+ Ensure all dates are parsed correctlyβ€”critical for any time-series modeling.
182
 
183
+ **2. Trend & Seasonality**
184
+ Investigate the underlying patterns and adjust your operations calendar.
185
 
186
+ **3. Outlier Management**
187
+ Flag and validate extreme observations to avoid skewed forecasts.
188
 
189
+ **4. Segment-Level Insights**
190
+ Drill into regions or product lines for targeted interventions.
 
 
191
 
192
+ **5. Predict & Act**
193
+ Leverage your 90-day projections for inventory, staffing, and marketing plans.
194
+ """.strip()
 
195
 
196
  st.download_button(
197
  "πŸ“₯ Download Strategy (.md)",
198
+ data=strategy_md,
199
+ file_name="bizintel_strategy.md",
200
+ mime="text/markdown"
 
 
 
 
 
 
 
 
201
  )