File size: 8,765 Bytes
9414ba2
e490e03
309eec4
 
f0be302
773f0cf
2c362d2
309eec4
10c7dea
3acbc9c
309eec4
9414ba2
 
 
 
 
10c7dea
e490e03
9414ba2
e490e03
10c7dea
2c362d2
b3a1f0c
f0be302
2c362d2
67e3963
e490e03
9414ba2
e490e03
773f0cf
c7c64f3
67e3963
72a73ff
 
e490e03
d037161
e490e03
9414ba2
f0be302
9414ba2
f0be302
9414ba2
f0be302
c7c64f3
f0be302
 
c7c64f3
773f0cf
9414ba2
 
 
f0be302
9414ba2
f0be302
9414ba2
 
f0be302
 
 
d037161
f0be302
d037161
773f0cf
9414ba2
c7c64f3
773f0cf
c7c64f3
9414ba2
c7c64f3
 
9414ba2
d037161
773f0cf
 
 
 
 
309eec4
 
773f0cf
f0be302
10c7dea
773f0cf
9414ba2
773f0cf
9414ba2
 
 
 
 
 
10c7dea
e490e03
9414ba2
e490e03
f0be302
2c362d2
10c7dea
9414ba2
 
 
 
773f0cf
9414ba2
10c7dea
d037161
9414ba2
 
e490e03
c7c64f3
9414ba2
773f0cf
e490e03
9414ba2
e490e03
523228c
 
9414ba2
 
 
 
 
e490e03
 
523228c
 
 
773f0cf
3acbc9c
d826a13
c7c64f3
 
 
9414ba2
c7c64f3
9414ba2
c7c64f3
d037161
c7c64f3
 
f0be302
 
d037161
9414ba2
 
d037161
523228c
c7c64f3
 
 
 
 
 
773f0cf
e490e03
c7c64f3
e490e03
523228c
3acbc9c
 
773f0cf
9414ba2
 
d826a13
773f0cf
9414ba2
d037161
 
3acbc9c
773f0cf
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# app.py  โ€”  BizIntelย AIย Ultraย (Any metric, CSV/Excel/DB, Plotly, Geminiย 1.5โ€ฏPro)

import os
import tempfile
from typing import Literal

import pandas as pd
import streamlit as st
import google.generativeai as genai
import plotly.graph_objects as go

from tools.csv_parser      import parse_csv_tool
from tools.plot_generator  import plot_metric_tool          # NEW generic
from tools.forecaster      import forecast_metric_tool      # NEW generic
from tools.visuals         import histogram_tool, scatter_matrix_tool, corr_heatmap_tool
from db_connector          import fetch_data_from_db, list_tables, SUPPORTED_ENGINES

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 1.  GEMINI CONFIG
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
gemini = genai.GenerativeModel(
    "gemini-1.5-pro-latest",
    generation_config={"temperature": 0.7, "top_p": 0.9, "response_mime_type": "text/plain"},
)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 2.  PAGE CONFIG
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.set_page_config(page_title="BizIntelย AIย Ultra", layout="wide")
st.title("๐Ÿ“Š BizIntelย AIย Ultraย โ€“ Advanced Analytics + Geminiย 1.5ย Pro")

TEMP_DIR = tempfile.gettempdir()

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 3.  DATA SOURCE  (CSV, Excel, or DB)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
src = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
csv_path: str | None = None
file_kind: Literal["csv", "excel"] | None = None

if src == "Upload CSV / Excel":
    up = st.file_uploader("Upload CSV or Excel (โ‰คโ€ฏ500โ€ฏMB)", type=["csv", "xlsx", "xls"])
    if up:
        temp_path = os.path.join(TEMP_DIR, up.name)
        with open(temp_path, "wb") as f:
            f.write(up.read())

        if up.name.lower().endswith("csv"):
            csv_path, file_kind = temp_path, "csv"
        else:  # Excel โ†’ convert first sheet to CSV
            try:
                df_xl = pd.read_excel(temp_path, sheet_name=0)
                csv_path = os.path.splitext(temp_path)[0] + ".csv"
                df_xl.to_csv(csv_path, index=False)
                file_kind = "excel"
            except Exception as e:
                st.error(f"Excel parsing failed: {e}")
                st.stop()
        st.success(f"{up.name} saved โœ…")

else:  # SQL DB
    engine = st.selectbox("DB engine", SUPPORTED_ENGINES)
    conn   = st.text_input("SQLAlchemy connection string")
    if conn:
        try:
            tbls = list_tables(conn)
            tbl  = st.selectbox("Table", tbls)
            if st.button("Fetch table"):
                csv_path = fetch_data_from_db(conn, tbl)
                file_kind = "csv"
                st.success(f"Fetched **{tbl}** as CSV โœ…")
        except Exception as e:
            st.error(f"Connection failed: {e}")
            st.stop()

if csv_path is None:
    st.stop()

with open(csv_path, "rb") as f:
    st.download_button("โฌ‡๏ธย Download working CSV", f, file_name=os.path.basename(csv_path))

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 4.  COLUMN PICKERS
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
df_head = pd.read_csv(csv_path, nrows=5)
st.dataframe(df_head)

date_col   = st.selectbox("Select date/time column", df_head.columns)
numeric_cols = df_head.select_dtypes("number").columns
metric_col = st.selectbox("Select numeric metric column", numeric_cols)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 5.  LOCAL TOOLS (TREND + FORECAST)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with st.spinner("Parsing datasetโ€ฆ"):
    summary_text = parse_csv_tool(csv_path)

with st.spinner("๐Ÿ“ˆ Building trend chartโ€ฆ"):
    trend_fig = plot_metric_tool(csv_path, date_col, metric_col)
if isinstance(trend_fig, go.Figure):
    st.plotly_chart(trend_fig, use_container_width=True)
else:
    st.warning(trend_fig)

with st.spinner("๐Ÿ”ฎ Forecastingโ€ฆ"):
    forecast_text = forecast_metric_tool(csv_path, date_col, metric_col)
    forecast_png  = "forecast_plot.png" if os.path.exists("forecast_plot.png") else None

if forecast_png:
    st.image(forecast_png, caption=f"{metric_col} Forecast", use_column_width=True)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 6.  GEMINI INSIGHTS
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
prompt = (
    f"You are **BizIntel Strategist AI**.\n\n"
    f"### Dataset Summary\n```\n{summary_text}\n```\n\n"
    f"### {metric_col} Forecast\n```\n{forecast_text}\n```\n\n"
    "Deliver **Markdown** with:\n"
    f"1. Five key insights focused on **{metric_col}**\n"
    "2. Three actionable strategies (impactโ€‘oriented)\n"
    "3. Risk factors or anomalies\n"
    "4. Suggested additional visuals\n"
)

st.subheader("๐Ÿš€ Strategy Recommendations (Geminiย 1.5ย Pro)")
with st.spinner("Generating insightsโ€ฆ"):
    strategy_md = gemini.generate_content(prompt).text
st.markdown(strategy_md)
st.download_button("โฌ‡๏ธย Download Strategy (.md)", strategy_md, file_name="strategy.md")

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 7.  KPI CARDS + STAT EXPANDER
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
full_df    = pd.read_csv(csv_path, low_memory=False)
total_rows = len(full_df)
num_cols   = len(full_df.columns)
missing_pct = full_df.isna().mean().mean() * 100

st.markdown("---")
st.subheader("๐Ÿ“‘ Dataset Overview")
c1, c2, c3 = st.columns(3)
c1.metric("Rows", f"{total_rows:,}")
c2.metric("Columns", str(num_cols))
c3.metric("Missingย %", f"{missing_pct:.1f}%")

with st.expander("๐Ÿ”Žย Detailed descriptive statistics"):
    stats_df = full_df.describe().T.reset_index().rename(columns={"index": "Feature"})
    st.dataframe(
        stats_df.style.format(precision=2).background_gradient(cmap="Blues"),
        use_container_width=True,
    )

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 8.  OPTIONAL EXPLORATORY VISUALS
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.markdown("---")
st.subheader("๐Ÿ” Optional Exploratory Visuals")

if st.checkbox("Histogram"):
    hist_col = st.selectbox("Variable", numeric_cols, key="hist")
    st.plotly_chart(histogram_tool(csv_path, hist_col), use_container_width=True)

if st.checkbox("Scatterโ€‘matrix"):
    sel = st.multiselect("Choose columns", numeric_cols, default=numeric_cols[:3])
    if sel:
        st.plotly_chart(scatter_matrix_tool(csv_path, sel), use_container_width=True)

if st.checkbox("Correlation heatโ€‘map"):
    st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)