Spaces:
Sleeping
Sleeping
"""app.pyย โย BizIntelย AIย Ultraย (Geminiโonly,ย v4) | |
Robust BI copilot featuring: | |
โข CSVย /ย Excelย /ย Parquet uploads + live SQL ingestion | |
โข Memoryโsafe sampling for large files (โคโฏ5โฏM rows sample) | |
โข Geminiโgenerated narrative insights & quick schema audit | |
โข Interactive EDA (histogramย +ย box, correlation heatmap) | |
โข Tunable ARIMA forecasting (p,d,q & horizon) | |
โข Oneโclick strategy brief download | |
""" | |
from __future__ import annotations | |
import io, os, tempfile | |
from pathlib import Path | |
from typing import List | |
import pandas as pd | |
import plotly.express as px | |
import streamlit as st | |
from sqlalchemy import create_engine | |
from statsmodels.tsa.arima.model import ARIMA | |
import google.generativeai as genai | |
# โโโโโโโโโโโโโโโโโโโโโ 0โฏยทโฏPAGE CONFIG โโโโโโโโโโโโโโโโโโโโโโ | |
st.set_page_config( | |
page_title="BizIntelย AIย Ultra", | |
page_icon="๐", | |
layout="wide", | |
initial_sidebar_state="expanded", | |
) | |
auth_key = None | |
try: | |
auth_key = st.secrets["GEMINI_APIKEY"] # type: ignore[attr-defined] | |
except Exception: | |
auth_key = os.getenv("GEMINI_APIKEY") | |
if not auth_key: | |
st.error( | |
"โ **GEMINI_APIKEY** missing. Add it in *Settings โ Secrets* or set envโฏvar before running." | |
) | |
st.stop() | |
genai.configure(api_key=auth_key) | |
GEM_MODEL = "gemini-1.5-pro-latest" | |
TMP = Path(tempfile.gettempdir()) | |
# โโโโโโโโโโโโโโโโโโโโโ 1โฏยทโฏHELPERS โโโโโโโโโโโโโโโโโโโโโโโโโโ | |
def load_file(buf: io.BufferedReader, sample: bool = False) -> pd.DataFrame: | |
suf = Path(buf.name).suffix.lower() | |
if suf in {".xls", ".xlsx"}: | |
return pd.read_excel(buf, engine="openpyxl") | |
if suf == ".parquet": | |
return pd.read_parquet(buf) | |
return pd.read_csv(buf, nrows=5_000_000 if sample else None) | |
def list_sql_tables(uri: str) -> List[str]: | |
return create_engine(uri).table_names() | |
def fetch_sql_table(uri: str, tbl: str) -> pd.DataFrame: | |
return pd.read_sql_table(tbl, create_engine(uri)) | |
def gemini(prompt: str) -> str: | |
return genai.GenerativeModel(GEM_MODEL).generate_content(prompt).text.strip() | |
# โโโโโโโโโโโโโโโโโโโโโ 2โฏยทโฏDATA LOAD UI โโโโโโโโโโโโโโโโโโโโโ | |
st.title("๐ย BizIntelย AIย Ultra โ Geminiย 1.5ย Pro BI Copilot") | |
mode = st.sidebar.radio("Data source", ["Upload file", "SQL DB"], horizontal=True) | |
df: pd.DataFrame = pd.DataFrame() | |
if mode == "Upload file": | |
upl = st.sidebar.file_uploader("CSV / Excel / Parquet", ["csv", "xls", "xlsx", "parquet"], help="โคโฏ2โฏGB") | |
sample = st.sidebar.checkbox("Sample firstย 5โฏM rows only") | |
if upl: | |
df = load_file(upl, sample) | |
else: | |
uri = st.sidebar.text_input("SQLAlchemy URI") | |
if uri: | |
tbl = st.sidebar.selectbox("Table", list_sql_tables(uri)) | |
if tbl: | |
df = fetch_sql_table(uri, tbl) | |
if df.empty: | |
st.info("โฌ ๏ธย Load data to start analysis.") | |
st.stop() | |
st.success("โ ย Data loaded") | |
st.dataframe(df.head(), use_container_width=True) | |
# โโโโโโโโโโโโโโโโโโโโโ 3โฏยทโฏOVERVIEW & GEMINI INSIGHT โโโโโโโโโ | |
rows, cols = df.shape | |
missing_pct = df.isna().sum().sum() / (rows * cols) * 100 | |
m1,m2,m3 = st.columns(3) | |
m1.metric("Rows", f"{rows:,}") | |
m2.metric("Columns", cols) | |
m3.metric("Missingย %", f"{missing_pct:.1f}") | |
st.subheader("๐ง ย Gemini Insights") | |
with st.spinner("Generating narrativeโฆ"): | |
summ_json = df.describe(include="all", datetime_is_numeric=True).round(2).to_json() | |
st.markdown(gemini( | |
"You are a senior BI analyst. Provide five bullet insights and three actions for this dataset:\n\n" + summ_json | |
)) | |
# โโโโโโโโโโโโโโโโโโโโโ 4โฏยทโฏTIMEโSERIES PREP โโโโโโโโโโโโโโโโโโ | |
# attempt datetime coercion | |
for col in df.columns: | |
if not pd.api.types.is_datetime64_any_dtype(df[col]): | |
try: | |
df[col] = pd.to_datetime(df[col]) | |
except: pass | |
DATE_COL = st.selectbox("Date column", [c for c in df.columns if pd.api.types.is_datetime64_any_dtype(df[c])]) | |
NUM_COL = st.selectbox("Numeric metric", [c for c in df.select_dtypes("number").columns]) | |
ts = df[[DATE_COL, NUM_COL]].dropna().groupby(DATE_COL)[NUM_COL].mean().sort_index() | |
fig_tr = px.line(ts, title=f"{NUM_COL} Trend", labels={"index":"Date", NUM_COL:NUM_COL}) | |
st.plotly_chart(fig_tr, use_container_width=True) | |
# โโโโโโโโโโโโโโโโโโโโโ 5โฏยทโฏARIMA FORECAST โโโโโโโโโโโโโโโโโโโ | |
st.subheader("๐ฎย ARIMA Forecast") | |
steps = st.slider("Horizon (days)", 3, 365, 90) | |
p = st.number_input("p", 0, 5, 1); d = st.number_input("d", 0, 2, 1); q = st.number_input("q", 0, 5, 1) | |
try: | |
model = ARIMA(ts, order=(p, d, q)).fit() | |
future_idx = pd.date_range(ts.index[-1], periods=steps + 1, freq=pd.infer_freq(ts.index) or "D")[1:] | |
forecast = pd.Series(model.forecast(steps), index=future_idx, name="Forecast") | |
fig_fc = px.line(pd.concat([ts, forecast], axis=1), title="Actual vs Forecast") | |
st.plotly_chart(fig_fc, use_container_width=True) | |
except Exception as e: | |
st.error(f"ARIMA failed: {e}") | |
# โโโโโโโโโโโโโโโโโโโโโ 6โฏยทโฏEDA EXPANDERS โโโโโโโโโโโโโโโโโโโโ | |
st.subheader("๐ย EDA Dashboard") | |
with st.expander("Histogram + Box"): | |
st.plotly_chart(px.histogram(df, x=NUM_COL, marginal="box", template="plotly_dark"), use_container_width=True) | |
with st.expander("Correlation Heatmap"): | |
corr = df.select_dtypes("number").corr() | |
st.plotly_chart(px.imshow(corr, color_continuous_scale="RdBu", title="Correlation"), use_container_width=True) | |
# โโโโโโโโโโโโโโโโโโโโโ 7โฏยทโฏSTRATEGY BRIEF โโโโโโโโโโโโโโโโโโโ | |
brief = ( | |
"# Strategy Brief\n" | |
"โข Clean missing date values for stable models.\n" | |
"โข Investigate high correlations for driver analysis.\n" | |
"โข Use forecast for inventory & workforce planning.\n" | |
"โข Monitor outliers (>3ฯ) weekly.\n" | |
"โข Segment by region/product for targeted actions." | |
) | |
st.download_button("โฌ๏ธย Download Strategy (.md)", brief, "bizintel_brief.md", "text/markdown") | |