File size: 6,650 Bytes
a875537
 
 
 
 
 
 
 
5f67bb9
 
a875537
5f67bb9
dc51ef8
5f67bb9
0a40e29
 
a875537
5f67bb9
a875537
5f67bb9
29aad45
a875537
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc51ef8
 
a875537
5f67bb9
dc51ef8
 
a875537
5f67bb9
a875537
dc51ef8
b5d6aaa
dc51ef8
 
 
 
5f67bb9
 
a875537
dc51ef8
5f67bb9
 
a875537
dc51ef8
5f67bb9
 
a875537
dc51ef8
 
a875537
 
 
dc51ef8
a875537
 
 
 
5f67bb9
a875537
8a0173b
dc51ef8
 
a875537
5f67bb9
a875537
5f67bb9
b5d6aaa
a875537
5f67bb9
 
a875537
b5d6aaa
dc51ef8
a875537
b5d6aaa
a875537
 
 
 
 
 
 
 
 
 
 
dc51ef8
 
a875537
 
 
 
5f67bb9
a875537
b5d6aaa
5f67bb9
b5d6aaa
a875537
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5d6aaa
a875537
b5d6aaa
 
a875537
dc51ef8
a875537
5f67bb9
 
a875537
 
 
 
 
5f67bb9
a875537
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""app.pyย โ€”ย BizIntelย AIย Ultraย (Geminiโ€‘only,ย v4)
Robust BI copilot featuring:
โ€ข CSVย /ย Excelย /ย Parquet uploads + live SQL ingestion
โ€ข Memoryโ€‘safe sampling for large files (โ‰คโ€ฏ5โ€ฏM rows sample)
โ€ข Geminiโ€‘generated narrative insights & quick schema audit
โ€ข Interactive EDA (histogramย +ย box, correlation heatmap)
โ€ข Tunable ARIMA forecasting (p,d,q & horizon)
โ€ข Oneโ€‘click strategy brief download
"""
from __future__ import annotations
import io, os, tempfile
from pathlib import Path
from typing import List

import pandas as pd
import plotly.express as px
import streamlit as st
from sqlalchemy import create_engine
from statsmodels.tsa.arima.model import ARIMA
import google.generativeai as genai

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 0โ€ฏยทโ€ฏPAGE CONFIG โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.set_page_config(
    page_title="BizIntelย AIย Ultra",
    page_icon="๐Ÿ“Š",
    layout="wide",
    initial_sidebar_state="expanded",
)

auth_key = None
try:
    auth_key = st.secrets["GEMINI_APIKEY"]  # type: ignore[attr-defined]
except Exception:
    auth_key = os.getenv("GEMINI_APIKEY")

if not auth_key:
    st.error(
        "โŒ **GEMINI_APIKEY** missing. Add it in *Settings โ†’ Secrets* or set envโ€ฏvar before running."
    )
    st.stop()

genai.configure(api_key=auth_key)
GEM_MODEL = "gemini-1.5-pro-latest"
TMP = Path(tempfile.gettempdir())

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 1โ€ฏยทโ€ฏHELPERS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
@st.cache_data(show_spinner=False)
def load_file(buf: io.BufferedReader, sample: bool = False) -> pd.DataFrame:
    suf = Path(buf.name).suffix.lower()
    if suf in {".xls", ".xlsx"}:
        return pd.read_excel(buf, engine="openpyxl")
    if suf == ".parquet":
        return pd.read_parquet(buf)
    return pd.read_csv(buf, nrows=5_000_000 if sample else None)

@st.cache_data(show_spinner=False)
def list_sql_tables(uri: str) -> List[str]:
    return create_engine(uri).table_names()

@st.cache_data(show_spinner=True)
def fetch_sql_table(uri: str, tbl: str) -> pd.DataFrame:
    return pd.read_sql_table(tbl, create_engine(uri))

@st.cache_data(show_spinner=False)
def gemini(prompt: str) -> str:
    return genai.GenerativeModel(GEM_MODEL).generate_content(prompt).text.strip()

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 2โ€ฏยทโ€ฏDATA LOAD UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.title("๐Ÿ“Šย BizIntelย AIย Ultra โ€” Geminiย 1.5ย Pro BI Copilot")
mode = st.sidebar.radio("Data source", ["Upload file", "SQL DB"], horizontal=True)

df: pd.DataFrame = pd.DataFrame()
if mode == "Upload file":
    upl = st.sidebar.file_uploader("CSV / Excel / Parquet", ["csv", "xls", "xlsx", "parquet"], help="โ‰คโ€ฏ2โ€ฏGB")
    sample = st.sidebar.checkbox("Sample firstย 5โ€ฏM rows only")
    if upl:
        df = load_file(upl, sample)
else:
    uri = st.sidebar.text_input("SQLAlchemy URI")
    if uri:
        tbl = st.sidebar.selectbox("Table", list_sql_tables(uri))
        if tbl:
            df = fetch_sql_table(uri, tbl)

if df.empty:
    st.info("โฌ…๏ธย Load data to start analysis.")
    st.stop()

st.success("โœ…ย Data loaded")
st.dataframe(df.head(), use_container_width=True)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 3โ€ฏยทโ€ฏOVERVIEW & GEMINI INSIGHT โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
rows, cols = df.shape
missing_pct = df.isna().sum().sum() / (rows * cols) * 100
m1,m2,m3 = st.columns(3)
m1.metric("Rows", f"{rows:,}")
m2.metric("Columns", cols)
m3.metric("Missingย %", f"{missing_pct:.1f}")

st.subheader("๐Ÿง ย Gemini Insights")
with st.spinner("Generating narrativeโ€ฆ"):
    summ_json = df.describe(include="all", datetime_is_numeric=True).round(2).to_json()
    st.markdown(gemini(
        "You are a senior BI analyst. Provide five bullet insights and three actions for this dataset:\n\n" + summ_json
    ))

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 4โ€ฏยทโ€ฏTIMEโ€‘SERIES PREP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# attempt datetime coercion
for col in df.columns:
    if not pd.api.types.is_datetime64_any_dtype(df[col]):
        try:
            df[col] = pd.to_datetime(df[col])
        except: pass

DATE_COL = st.selectbox("Date column", [c for c in df.columns if pd.api.types.is_datetime64_any_dtype(df[c])])
NUM_COL = st.selectbox("Numeric metric", [c for c in df.select_dtypes("number").columns])

ts = df[[DATE_COL, NUM_COL]].dropna().groupby(DATE_COL)[NUM_COL].mean().sort_index()
fig_tr = px.line(ts, title=f"{NUM_COL} Trend", labels={"index":"Date", NUM_COL:NUM_COL})
st.plotly_chart(fig_tr, use_container_width=True)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 5โ€ฏยทโ€ฏARIMA FORECAST โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.subheader("๐Ÿ”ฎย ARIMA Forecast")
steps = st.slider("Horizon (days)", 3, 365, 90)
p = st.number_input("p", 0, 5, 1); d = st.number_input("d", 0, 2, 1); q = st.number_input("q", 0, 5, 1)
try:
    model = ARIMA(ts, order=(p, d, q)).fit()
    future_idx = pd.date_range(ts.index[-1], periods=steps + 1, freq=pd.infer_freq(ts.index) or "D")[1:]
    forecast = pd.Series(model.forecast(steps), index=future_idx, name="Forecast")
    fig_fc = px.line(pd.concat([ts, forecast], axis=1), title="Actual vs Forecast")
    st.plotly_chart(fig_fc, use_container_width=True)
except Exception as e:
    st.error(f"ARIMA failed: {e}")

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 6โ€ฏยทโ€ฏEDA EXPANDERS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.subheader("๐Ÿ”ย EDA Dashboard")
with st.expander("Histogram + Box"):
    st.plotly_chart(px.histogram(df, x=NUM_COL, marginal="box", template="plotly_dark"), use_container_width=True)
with st.expander("Correlation Heatmap"):
    corr = df.select_dtypes("number").corr()
    st.plotly_chart(px.imshow(corr, color_continuous_scale="RdBu", title="Correlation"), use_container_width=True)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 7โ€ฏยทโ€ฏSTRATEGY BRIEF โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
brief = (
    "# Strategy Brief\n"
    "โ€ข Clean missing date values for stable models.\n"
    "โ€ข Investigate high correlations for driver analysis.\n"
    "โ€ข Use forecast for inventory & workforce planning.\n"
    "โ€ข Monitor outliers (>3ฯƒ) weekly.\n"
    "โ€ข Segment by region/product for targeted actions."
)
st.download_button("โฌ‡๏ธย Download Strategy (.md)", brief, "bizintel_brief.md", "text/markdown")