Spaces:

mgbam
/

BizIntel_AI

Sleeping

App Files Files Community

mgbam commited on May 7

Commit

8a0173b

verified ·

1 Parent(s): 0a40e29

Update app.py

Browse files

Files changed (1) hide show

app.py +141 -129

app.py CHANGED Viewed

@@ -1,189 +1,201 @@
 import streamlit as st
 import pandas as pd
 import numpy as np
-import tempfile
-from io import BytesIO
-from sqlalchemy import create_engine
 import plotly.express as px
 import matplotlib.pyplot as plt
 from statsmodels.tsa.arima.model import ARIMA
-# ── Helpers to read CSV/Excel robustly ───────────────────────────────────────────
 @st.cache_data
-def load_file(uploaded):
-    """Read a CSV or Excel file into a DataFrame."""
     try:
         if uploaded.name.lower().endswith((".xls", ".xlsx")):
             return pd.read_excel(uploaded, engine="openpyxl")
         else:
             return pd.read_csv(uploaded)
     except Exception as e:
-        raise st.Error(f"Error parsing file: {e}")
-# ── Helpers for SQL database ────────────────────────────────────────────────────
-SUPPORTED_ENGINES = ["postgresql", "mysql", "mssql+pyodbc", "oracle+cx_oracle"]
 @st.cache_data
-def list_tables(connection_string):
-    engine = create_engine(connection_string)
     return engine.table_names()
 @st.cache_data
-def fetch_table(connection_string, table_name):
-    engine = create_engine(connection_string)
-    return pd.read_sql_table(table_name, engine)
-# ── Streamlit page setup ────────────────────────────────────────────────────────
-st.set_page_config(
-    page_title="BizIntel AI Ultra",
-    layout="wide",
-    initial_sidebar_state="expanded",
-)
 st.title("📊 BizIntel AI Ultra – Advanced Analytics + Gemini 1.5 Pro")
-# ── Data source selection ───────────────────────────────────────────────────────
-data_source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
-df = None
-if data_source == "Upload CSV / Excel":
     uploaded = st.file_uploader(
-        "Drag & drop file here (≤ 500 MB)",
-        type=["csv", "xls", "xlsx"],
-        accept_multiple_files=False,
     )
     if uploaded:
         with st.spinner("Loading file…"):
-            df = load_file(uploaded)
-        st.success("✅ File loaded into memory")
-elif data_source == "Connect to SQL Database":
-    engine = st.selectbox("Select DB engine", SUPPORTED_ENGINES)
-    conn_str = st.text_input("Connection string (SQLAlchemy format)", placeholder="e.g. postgresql://user:pass@host:port/dbname")
     if conn_str:
-        tables = list_tables(conn_str)
         table = st.selectbox("Choose table", tables)
         if table:
             with st.spinner(f"Fetching `{table}`…"):
-                df = fetch_table(conn_str, table)
-            st.success(f"✅ `{table}` loaded from database")
-# ── If DataFrame is ready, show overview and proceed ───────────────────────────
-if df is not None:
-    st.markdown("### 🗂️ Preview")
-    st.dataframe(df.head(5), use_container_width=True)
-    # Dataset overview metrics
-    n_rows, n_cols = df.shape
-    missing_pct = (df.isna().sum().sum() / (n_rows * n_cols)) * 100
     st.markdown("---")
-    c1, c2, c3 = st.columns(3)
-    c1.metric("Rows", f"{n_rows:,}")
-    c2.metric("Columns", f"{n_cols:,}")
-    c3.metric("Missing %", f"{missing_pct:.1f}%")
-    # Detailed stats
-    st.markdown("#### 📋 Detailed descriptive statistics")
-    st.dataframe(df.describe(include="all").transpose(), use_container_width=True)
-    # Optional exploratory visuals
-    st.markdown("---")
-    st.markdown("#### 🔎 Optional Exploratory Visuals")
     col1, col2, col3 = st.columns(3)
-    with col1:
-        if st.checkbox("Histogram"):
-            num_cols = df.select_dtypes(include="number").columns.tolist()
-            col = st.selectbox("Choose numeric column for histogram", num_cols, key="hist")
-            fig = px.histogram(df, x=col, nbins=30, title=f"Histogram of {col}")
-            st.plotly_chart(fig, use_container_width=True)
-    with col2:
-        if st.checkbox("Scatter matrix"):
-            num_cols = df.select_dtypes(include="number").columns.tolist()[:6]  # limit to first 6
-            fig = px.scatter_matrix(df[num_cols], dimensions=num_cols, title="Scatter Matrix")
-            st.plotly_chart(fig, use_container_width=True)
-    with col3:
-        if st.checkbox("Correlation heatmap"):
-            corr = df.select_dtypes(include="number").corr()
-            fig, ax = plt.subplots(figsize=(6, 5))
-            im = ax.imshow(corr, vmin=-1, vmax=1, cmap="RdBu")
-            plt.xticks(range(len(corr)), corr.columns, rotation=45, ha="right")
-            plt.yticks(range(len(corr)), corr.columns)
-            plt.colorbar(im, ax=ax)
-            st.pyplot(fig)
-    # ── Trend & Forecast ──────────────────────────────────────────────────────
     st.markdown("---")
-    st.markdown("### 📈 Trend & Forecast")
-    # pick date/time column
-    dt_cols = df.columns[df.dtypes.isin([np.dtype("datetime64[ns]"), np.dtype("object")])].tolist()
-    date_col = st.selectbox("Select date/time column", dt_cols)
-    df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
-    # pick numeric metric
-    num_cols = df.select_dtypes(include="number").columns.tolist()
-    metric_col = st.selectbox("Select numeric metric", num_cols)
-    # prepare time series
-    ts = df[[date_col, metric_col]].dropna()
-    ts = ts.set_index(date_col).sort_index()
-    ts = ts[~ts.index.duplicated(keep="first")]
-    # Trend plot
-    fig_trend = px.line(ts, y=metric_col, title=f"{metric_col} over Time")
     st.plotly_chart(fig_trend, use_container_width=True)
-    # Forecast next 90 days with ARIMA
-    with st.spinner("Running 90-day forecast…"):
         try:
-            model = ARIMA(ts, order=(1, 1, 1)).fit()
-            fcast = model.get_forecast(90)
-            idx = pd.date_range(ts.index.max(), periods=91, freq="D")[1:]
-            df_f = pd.DataFrame({"forecast": fcast.predicted_mean}, index=idx)
             fig_fc = px.line(
-                pd.concat([ts, df_f], axis=1),
-                labels={metric_col: metric_col, "forecast": "Forecast"},
-                title=f"{metric_col} & 90-Day Forecast",
             )
             st.plotly_chart(fig_fc, use_container_width=True)
         except Exception as e:
             st.error(f"Forecast failed: {e}")
-    # ── Strategy Recommendations ─────────────────────────────────────────────
     st.markdown("---")
-    st.markdown("### 🚀 Strategy Recommendations")
-    st.markdown(
-        """
-1. **Data Quality First**
-   Address any missing or malformed dates before further time-series analysis.
-2. **Trend & Seasonality**
-   Investigate any upward/downward trends and repeating seasonal patterns.
-3. **Outlier Management**
-   Identify extreme highs/lows in your metric—could be bulk orders or data errors.
-4. **Segment-Level Analysis**
-   Drill into key dimensions (e.g. region, product) to tailor growth strategies.
-5. **Predict & Act**
-   Use your 90-day forecasts to guide inventory, staffing, and marketing decisions.
-        """
-    )
-    # downloadable strategy as markdown
-    strategy_md = st.session_state.get("strategy_md", "")
-    if not strategy_md:
-        strategy_md = st.session_state["strategy_md"] = st.container().markdown("…")  # dummy to store
     st.download_button(
         "📥 Download Strategy (.md)",
-        data="""
-# BizIntel AI Ultra – Strategy Recommendations
-1. Data Quality First: …
-2. Trend & Seasonality: …
-3. Outlier Management: …
-4. Segment-Level Analysis: …
-5. Predict & Act: …
-""",
-        file_name="strategy.md",
-        mime="text/markdown",
     )

 import streamlit as st
 import pandas as pd
 import numpy as np
 import plotly.express as px
 import matplotlib.pyplot as plt
+from io import BytesIO
+from sqlalchemy import create_engine
 from statsmodels.tsa.arima.model import ARIMA
+import openai
+# ── CONFIG ───────────────────────────────────────────────────────────────────────
+st.set_page_config(
+    page_title="BizIntel AI Ultra",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# You must set OPENAI_API_KEY in your Streamlit Secrets
+openai.api_key = st.secrets["OPENAI_API_KEY"]
+# ── CACHEABLE HELPERS ────────────────────────────────────────────────────────────
 @st.cache_data
+def load_uploaded_file(uploaded):
+    """Load CSV or Excel from memory into a DataFrame."""
     try:
         if uploaded.name.lower().endswith((".xls", ".xlsx")):
             return pd.read_excel(uploaded, engine="openpyxl")
         else:
             return pd.read_csv(uploaded)
     except Exception as e:
+        st.error(f"⚠️ File parsing failed: {e}")
+        return pd.DataFrame()
 @st.cache_data
+def list_db_tables(conn_str):
+    engine = create_engine(conn_str)
     return engine.table_names()
 @st.cache_data
+def fetch_db_table(conn_str, table):
+    engine = create_engine(conn_str)
+    return pd.read_sql_table(table, engine)
+# ── DATA NARRATIVE VIA OPENAI ───────────────────────────────────────────────────
+def generate_data_narrative(df: pd.DataFrame) -> str:
+    """Send a summary of df to OpenAI and return a polished narrative."""
+    summary = df.describe(include="all").transpose().round(2).to_dict()
+    prompt = (
+        "You are a world-class data analyst. "
+        "Below is a JSON summary of a dataset. "
+        "Write a concise, professional narrative highlighting the top 5 business-critical insights, "
+        "in bullet format:\n\n"
+        f"{summary}\n\n"
+    )
+    resp = openai.ChatCompletion.create(
+        model="gpt-4o-mini",  # or "gpt-4o", "gpt-4o-mini-high"
+        messages=[{"role":"user","content":prompt}],
+        temperature=0.3,
+    )
+    return resp.choices[0].message.content.strip()
+# ── APP ─────────────────────────────────────────────────────────────────────────
 st.title("📊 BizIntel AI Ultra – Advanced Analytics + Gemini 1.5 Pro")
+# 1) Choose data source
+source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
+df = pd.DataFrame()
+if source == "Upload CSV / Excel":
     uploaded = st.file_uploader(
+        "Drag & drop file here (≤500 MB) • .csv, .xls, .xlsx",
+        type=["csv","xls","xlsx"]
     )
     if uploaded:
         with st.spinner("Loading file…"):
+            df = load_uploaded_file(uploaded)
+else:
+    engine = st.selectbox("DB engine", ["postgresql","mysql","mssql+pyodbc","oracle+cx_oracle"])
+    conn_str = st.text_input("Connection string", placeholder="dialect+driver://user:pass@host/db")
     if conn_str:
+        tables = list_db_tables(conn_str)
         table = st.selectbox("Choose table", tables)
         if table:
             with st.spinner(f"Fetching `{table}`…"):
+                df = fetch_db_table(conn_str, table)
+# 2) If we have data…
+if not df.empty:
+    st.success("✅ Data loaded!")
     st.markdown("---")
+    # 2a) Preview & summary metrics
+    st.subheader("🗂 Data Preview & Overview")
+    st.dataframe(df.head(5), use_container_width=True)
+    r, c = df.shape
+    missing_pct = (df.isna().sum().sum() / (r*c) * 100).round(1)
     col1, col2, col3 = st.columns(3)
+    col1.metric("Rows", f"{r:,}")
+    col2.metric("Cols", f"{c:,}")
+    col3.metric("Missing %", f"{missing_pct}%")
     st.markdown("---")
+    # 2b) Automated data narrative
+    st.subheader("📝 Data Narrative")
+    with st.spinner("Generating insights…"):
+        narrative = generate_data_narrative(df)
+    st.markdown(narrative)
+    # 2c) Optional EDA visuals
+    st.subheader("🔎 Exploratory Visuals")
+    num_cols = df.select_dtypes("number").columns.tolist()
+    if st.checkbox("Show histogram"):
+        col = st.selectbox("Histogram column", num_cols, key="hist")
+        fig = px.histogram(df, x=col, nbins=30, title=f"Histogram of {col}")
+        st.plotly_chart(fig, use_container_width=True)
+    if st.checkbox("Show scatter matrix"):
+        dims = num_cols[:6]
+        fig = px.scatter_matrix(df[dims], dimensions=dims, title="Scatter Matrix")
+        st.plotly_chart(fig, use_container_width=True)
+    if st.checkbox("Show correlation heatmap"):
+        corr = df[num_cols].corr()
+        fig, ax = plt.subplots(figsize=(6,5))
+        im = ax.imshow(corr, cmap="RdBu", vmin=-1, vmax=1)
+        plt.xticks(range(len(corr)), corr.columns, rotation=45, ha="right")
+        plt.yticks(range(len(corr)), corr.columns)
+        plt.colorbar(im, ax=ax)
+        st.pyplot(fig)
+    # 3) Trend & forecast
+    st.markdown("---")
+    st.subheader("📈 Time-Series Trend & 90-Day Forecast")
+    # pick columns
+    dt_opts = [col for col in df.columns if pd.api.types.is_datetime64_any_dtype(df[col]) or df[col].dtype == "object"]
+    date_col = st.selectbox("Date column", dt_opts)
+    df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
+    metric_col = st.selectbox("Metric column", num_cols)
+    ts = (
+        df[[date_col, metric_col]]
+        .dropna()
+        .set_index(date_col)
+        .sort_index()
+        .loc[~df.index.duplicated(keep="first")]
+    )
+    # plot trend
+    fig_trend = px.line(ts, y=metric_col, title=f"{metric_col} over Time", labels={"index":"Date"})
     st.plotly_chart(fig_trend, use_container_width=True)
+    # forecast
+    with st.spinner("Running ARIMA…"):
         try:
+            model = ARIMA(ts, order=(1,1,1)).fit()
+            future_idx = pd.date_range(start=ts.index.max(), periods=91, freq="D")[1:]
+            pred = model.get_forecast(90).predicted_mean
+            df_pred = pd.Series(pred.values, index=future_idx, name="Forecast")
+            combo = pd.concat([ts[metric_col], df_pred], axis=1)
             fig_fc = px.line(
+                combo,
+                labels={metric_col:metric_col, "Forecast":"Forecast"},
+                title=f"{metric_col} & 90-Day Forecast"
             )
             st.plotly_chart(fig_fc, use_container_width=True)
         except Exception as e:
             st.error(f"Forecast failed: {e}")
+    # 4) Strategy download
     st.markdown("---")
+    st.subheader("🚀 Actionable Strategy Brief")
+    strategy_md = """
+# BizIntel AI Ultra – Strategy Brief
+**1. Data Quality First**
+Ensure all dates are parsed correctly—critical for any time-series modeling.
+**2. Trend & Seasonality**
+Investigate the underlying patterns and adjust your operations calendar.
+**3. Outlier Management**
+Flag and validate extreme observations to avoid skewed forecasts.
+**4. Segment-Level Insights**
+Drill into regions or product lines for targeted interventions.
+**5. Predict & Act**
+Leverage your 90-day projections for inventory, staffing, and marketing plans.
+    """.strip()
     st.download_button(
         "📥 Download Strategy (.md)",
+        data=strategy_md,
+        file_name="bizintel_strategy.md",
+        mime="text/markdown"
     )