Spaces:
Sleeping
Sleeping
File size: 6,650 Bytes
a875537 5f67bb9 a875537 5f67bb9 dc51ef8 5f67bb9 0a40e29 a875537 5f67bb9 a875537 5f67bb9 29aad45 a875537 dc51ef8 a875537 5f67bb9 dc51ef8 a875537 5f67bb9 a875537 dc51ef8 b5d6aaa dc51ef8 5f67bb9 a875537 dc51ef8 5f67bb9 a875537 dc51ef8 5f67bb9 a875537 dc51ef8 a875537 dc51ef8 a875537 5f67bb9 a875537 8a0173b dc51ef8 a875537 5f67bb9 a875537 5f67bb9 b5d6aaa a875537 5f67bb9 a875537 b5d6aaa dc51ef8 a875537 b5d6aaa a875537 dc51ef8 a875537 5f67bb9 a875537 b5d6aaa 5f67bb9 b5d6aaa a875537 b5d6aaa a875537 b5d6aaa a875537 dc51ef8 a875537 5f67bb9 a875537 5f67bb9 a875537 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
"""app.pyย โย BizIntelย AIย Ultraย (Geminiโonly,ย v4)
Robust BI copilot featuring:
โข CSVย /ย Excelย /ย Parquet uploads + live SQL ingestion
โข Memoryโsafe sampling for large files (โคโฏ5โฏM rows sample)
โข Geminiโgenerated narrative insights & quick schema audit
โข Interactive EDA (histogramย +ย box, correlation heatmap)
โข Tunable ARIMA forecasting (p,d,q & horizon)
โข Oneโclick strategy brief download
"""
from __future__ import annotations
import io, os, tempfile
from pathlib import Path
from typing import List
import pandas as pd
import plotly.express as px
import streamlit as st
from sqlalchemy import create_engine
from statsmodels.tsa.arima.model import ARIMA
import google.generativeai as genai
# โโโโโโโโโโโโโโโโโโโโโ 0โฏยทโฏPAGE CONFIG โโโโโโโโโโโโโโโโโโโโโโ
st.set_page_config(
page_title="BizIntelย AIย Ultra",
page_icon="๐",
layout="wide",
initial_sidebar_state="expanded",
)
auth_key = None
try:
auth_key = st.secrets["GEMINI_APIKEY"] # type: ignore[attr-defined]
except Exception:
auth_key = os.getenv("GEMINI_APIKEY")
if not auth_key:
st.error(
"โ **GEMINI_APIKEY** missing. Add it in *Settings โ Secrets* or set envโฏvar before running."
)
st.stop()
genai.configure(api_key=auth_key)
GEM_MODEL = "gemini-1.5-pro-latest"
TMP = Path(tempfile.gettempdir())
# โโโโโโโโโโโโโโโโโโโโโ 1โฏยทโฏHELPERS โโโโโโโโโโโโโโโโโโโโโโโโโโ
@st.cache_data(show_spinner=False)
def load_file(buf: io.BufferedReader, sample: bool = False) -> pd.DataFrame:
suf = Path(buf.name).suffix.lower()
if suf in {".xls", ".xlsx"}:
return pd.read_excel(buf, engine="openpyxl")
if suf == ".parquet":
return pd.read_parquet(buf)
return pd.read_csv(buf, nrows=5_000_000 if sample else None)
@st.cache_data(show_spinner=False)
def list_sql_tables(uri: str) -> List[str]:
return create_engine(uri).table_names()
@st.cache_data(show_spinner=True)
def fetch_sql_table(uri: str, tbl: str) -> pd.DataFrame:
return pd.read_sql_table(tbl, create_engine(uri))
@st.cache_data(show_spinner=False)
def gemini(prompt: str) -> str:
return genai.GenerativeModel(GEM_MODEL).generate_content(prompt).text.strip()
# โโโโโโโโโโโโโโโโโโโโโ 2โฏยทโฏDATA LOAD UI โโโโโโโโโโโโโโโโโโโโโ
st.title("๐ย BizIntelย AIย Ultra โ Geminiย 1.5ย Pro BI Copilot")
mode = st.sidebar.radio("Data source", ["Upload file", "SQL DB"], horizontal=True)
df: pd.DataFrame = pd.DataFrame()
if mode == "Upload file":
upl = st.sidebar.file_uploader("CSV / Excel / Parquet", ["csv", "xls", "xlsx", "parquet"], help="โคโฏ2โฏGB")
sample = st.sidebar.checkbox("Sample firstย 5โฏM rows only")
if upl:
df = load_file(upl, sample)
else:
uri = st.sidebar.text_input("SQLAlchemy URI")
if uri:
tbl = st.sidebar.selectbox("Table", list_sql_tables(uri))
if tbl:
df = fetch_sql_table(uri, tbl)
if df.empty:
st.info("โฌ
๏ธย Load data to start analysis.")
st.stop()
st.success("โ
ย Data loaded")
st.dataframe(df.head(), use_container_width=True)
# โโโโโโโโโโโโโโโโโโโโโ 3โฏยทโฏOVERVIEW & GEMINI INSIGHT โโโโโโโโโ
rows, cols = df.shape
missing_pct = df.isna().sum().sum() / (rows * cols) * 100
m1,m2,m3 = st.columns(3)
m1.metric("Rows", f"{rows:,}")
m2.metric("Columns", cols)
m3.metric("Missingย %", f"{missing_pct:.1f}")
st.subheader("๐ง ย Gemini Insights")
with st.spinner("Generating narrativeโฆ"):
summ_json = df.describe(include="all", datetime_is_numeric=True).round(2).to_json()
st.markdown(gemini(
"You are a senior BI analyst. Provide five bullet insights and three actions for this dataset:\n\n" + summ_json
))
# โโโโโโโโโโโโโโโโโโโโโ 4โฏยทโฏTIMEโSERIES PREP โโโโโโโโโโโโโโโโโโ
# attempt datetime coercion
for col in df.columns:
if not pd.api.types.is_datetime64_any_dtype(df[col]):
try:
df[col] = pd.to_datetime(df[col])
except: pass
DATE_COL = st.selectbox("Date column", [c for c in df.columns if pd.api.types.is_datetime64_any_dtype(df[c])])
NUM_COL = st.selectbox("Numeric metric", [c for c in df.select_dtypes("number").columns])
ts = df[[DATE_COL, NUM_COL]].dropna().groupby(DATE_COL)[NUM_COL].mean().sort_index()
fig_tr = px.line(ts, title=f"{NUM_COL} Trend", labels={"index":"Date", NUM_COL:NUM_COL})
st.plotly_chart(fig_tr, use_container_width=True)
# โโโโโโโโโโโโโโโโโโโโโ 5โฏยทโฏARIMA FORECAST โโโโโโโโโโโโโโโโโโโ
st.subheader("๐ฎย ARIMA Forecast")
steps = st.slider("Horizon (days)", 3, 365, 90)
p = st.number_input("p", 0, 5, 1); d = st.number_input("d", 0, 2, 1); q = st.number_input("q", 0, 5, 1)
try:
model = ARIMA(ts, order=(p, d, q)).fit()
future_idx = pd.date_range(ts.index[-1], periods=steps + 1, freq=pd.infer_freq(ts.index) or "D")[1:]
forecast = pd.Series(model.forecast(steps), index=future_idx, name="Forecast")
fig_fc = px.line(pd.concat([ts, forecast], axis=1), title="Actual vs Forecast")
st.plotly_chart(fig_fc, use_container_width=True)
except Exception as e:
st.error(f"ARIMA failed: {e}")
# โโโโโโโโโโโโโโโโโโโโโ 6โฏยทโฏEDA EXPANDERS โโโโโโโโโโโโโโโโโโโโ
st.subheader("๐ย EDA Dashboard")
with st.expander("Histogram + Box"):
st.plotly_chart(px.histogram(df, x=NUM_COL, marginal="box", template="plotly_dark"), use_container_width=True)
with st.expander("Correlation Heatmap"):
corr = df.select_dtypes("number").corr()
st.plotly_chart(px.imshow(corr, color_continuous_scale="RdBu", title="Correlation"), use_container_width=True)
# โโโโโโโโโโโโโโโโโโโโโ 7โฏยทโฏSTRATEGY BRIEF โโโโโโโโโโโโโโโโโโโ
brief = (
"# Strategy Brief\n"
"โข Clean missing date values for stable models.\n"
"โข Investigate high correlations for driver analysis.\n"
"โข Use forecast for inventory & workforce planning.\n"
"โข Monitor outliers (>3ฯ) weekly.\n"
"โข Segment by region/product for targeted actions."
)
st.download_button("โฌ๏ธย Download Strategy (.md)", brief, "bizintel_brief.md", "text/markdown")
|