Adding Application Files
Browse files- app.py +299 -0
- dashboard.py +67 -0
- sales_data.csv +0 -0
app.py
ADDED
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import datetime
|
5 |
+
import warnings
|
6 |
+
import os
|
7 |
+
import tempfile
|
8 |
+
from cachetools import cached, TTLCache
|
9 |
+
|
10 |
+
warnings.filterwarnings("ignore", category=FutureWarning, module="seaborn")
|
11 |
+
|
12 |
+
# ------------------------------------------------------------------
|
13 |
+
# 1) Load CSV data once
|
14 |
+
# ------------------------------------------------------------------
|
15 |
+
csv_data = None
|
16 |
+
|
17 |
+
def load_csv_data():
|
18 |
+
global csv_data
|
19 |
+
|
20 |
+
# Optional: specify column dtypes if known; adjust as necessary
|
21 |
+
dtype_dict = {
|
22 |
+
"order_id": "Int64",
|
23 |
+
"customer_id": "Int64",
|
24 |
+
"product_id": "Int64",
|
25 |
+
"quantity": "Int64",
|
26 |
+
"price": "float",
|
27 |
+
"total": "float",
|
28 |
+
"customer_name": "string",
|
29 |
+
"product_names": "string",
|
30 |
+
"categories": "string"
|
31 |
+
}
|
32 |
+
|
33 |
+
csv_data = pd.read_csv(
|
34 |
+
"sales_data.csv",
|
35 |
+
parse_dates=["order_date"],
|
36 |
+
dayfirst=True, # if your dates are DD/MM/YYYY format
|
37 |
+
low_memory=False,
|
38 |
+
dtype=dtype_dict
|
39 |
+
)
|
40 |
+
|
41 |
+
load_csv_data()
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
cache = TTLCache(maxsize=128, ttl=300)
|
46 |
+
|
47 |
+
@cached(cache)
|
48 |
+
def get_unique_categories():
|
49 |
+
global csv_data
|
50 |
+
if csv_data is None:
|
51 |
+
return []
|
52 |
+
cats = sorted(csv_data['categories'].dropna().unique().tolist())
|
53 |
+
cats = [cat.capitalize() for cat in cats]
|
54 |
+
return cats
|
55 |
+
|
56 |
+
def get_date_range():
|
57 |
+
global csv_data
|
58 |
+
if csv_data is None or csv_data.empty:
|
59 |
+
return None, None
|
60 |
+
return csv_data['order_date'].min(), csv_data['order_date'].max()
|
61 |
+
|
62 |
+
def filter_data(start_date, end_date, category):
|
63 |
+
global csv_data
|
64 |
+
|
65 |
+
if isinstance(start_date, str):
|
66 |
+
start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d').date()
|
67 |
+
if isinstance(end_date, str):
|
68 |
+
end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d').date()
|
69 |
+
|
70 |
+
df = csv_data.loc[
|
71 |
+
(csv_data['order_date'] >= pd.to_datetime(start_date)) &
|
72 |
+
(csv_data['order_date'] <= pd.to_datetime(end_date))
|
73 |
+
].copy()
|
74 |
+
|
75 |
+
if category != "All Categories":
|
76 |
+
df = df.loc[df['categories'].str.capitalize() == category].copy()
|
77 |
+
|
78 |
+
return df
|
79 |
+
|
80 |
+
def get_dashboard_stats(start_date, end_date, category):
|
81 |
+
df = filter_data(start_date, end_date, category)
|
82 |
+
if df.empty:
|
83 |
+
return (0, 0, 0, "N/A")
|
84 |
+
|
85 |
+
df['revenue'] = df['price'] * df['quantity']
|
86 |
+
total_revenue = df['revenue'].sum()
|
87 |
+
total_orders = df['order_id'].nunique()
|
88 |
+
avg_order_value = total_revenue / total_orders if total_orders else 0
|
89 |
+
|
90 |
+
cat_revenues = df.groupby('categories')['revenue'].sum().sort_values(ascending=False)
|
91 |
+
top_category = cat_revenues.index[0] if not cat_revenues.empty else "N/A"
|
92 |
+
|
93 |
+
return (total_revenue, total_orders, avg_order_value, top_category.capitalize())
|
94 |
+
|
95 |
+
def get_data_for_table(start_date, end_date, category):
|
96 |
+
df = filter_data(start_date, end_date, category)
|
97 |
+
if df.empty:
|
98 |
+
return pd.DataFrame()
|
99 |
+
|
100 |
+
df = df.sort_values(by=["order_id", "order_date"], ascending=[True, False]).copy()
|
101 |
+
|
102 |
+
columns_order = [
|
103 |
+
"order_id", "order_date", "customer_id", "customer_name",
|
104 |
+
"product_id", "product_names", "categories", "quantity",
|
105 |
+
"price", "total"
|
106 |
+
]
|
107 |
+
columns_order = [col for col in columns_order if col in df.columns]
|
108 |
+
df = df[columns_order].copy()
|
109 |
+
|
110 |
+
df['revenue'] = df['price'] * df['quantity']
|
111 |
+
return df
|
112 |
+
|
113 |
+
def get_plot_data(start_date, end_date, category):
|
114 |
+
df = filter_data(start_date, end_date, category)
|
115 |
+
if df.empty:
|
116 |
+
return pd.DataFrame()
|
117 |
+
df['revenue'] = df['price'] * df['quantity']
|
118 |
+
plot_data = df.groupby(df['order_date'].dt.date)['revenue'].sum().reset_index()
|
119 |
+
plot_data.rename(columns={'order_date': 'date'}, inplace=True)
|
120 |
+
return plot_data
|
121 |
+
|
122 |
+
def get_revenue_by_category(start_date, end_date, category):
|
123 |
+
df = filter_data(start_date, end_date, category)
|
124 |
+
if df.empty:
|
125 |
+
return pd.DataFrame()
|
126 |
+
df['revenue'] = df['price'] * df['quantity']
|
127 |
+
cat_data = df.groupby('categories')['revenue'].sum().reset_index()
|
128 |
+
cat_data = cat_data.sort_values(by='revenue', ascending=False)
|
129 |
+
return cat_data
|
130 |
+
|
131 |
+
def get_top_products(start_date, end_date, category):
|
132 |
+
df = filter_data(start_date, end_date, category)
|
133 |
+
if df.empty:
|
134 |
+
return pd.DataFrame()
|
135 |
+
df['revenue'] = df['price'] * df['quantity']
|
136 |
+
prod_data = df.groupby('product_names')['revenue'].sum().reset_index()
|
137 |
+
prod_data = prod_data.sort_values(by='revenue', ascending=False).head(10)
|
138 |
+
return prod_data
|
139 |
+
|
140 |
+
def create_matplotlib_figure(data, x_col, y_col, title, xlabel, ylabel, orientation='v'):
|
141 |
+
plt.figure(figsize=(10, 6))
|
142 |
+
if data.empty:
|
143 |
+
plt.text(0.5, 0.5, 'No data available', ha='center', va='center')
|
144 |
+
else:
|
145 |
+
if orientation == 'v':
|
146 |
+
plt.bar(data[x_col], data[y_col])
|
147 |
+
plt.xticks(rotation=45, ha='right')
|
148 |
+
else:
|
149 |
+
plt.barh(data[x_col], data[y_col])
|
150 |
+
plt.gca().invert_yaxis()
|
151 |
+
|
152 |
+
plt.title(title)
|
153 |
+
plt.xlabel(xlabel)
|
154 |
+
plt.ylabel(ylabel)
|
155 |
+
plt.tight_layout()
|
156 |
+
|
157 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
|
158 |
+
plt.savefig(tmpfile.name)
|
159 |
+
plt.close()
|
160 |
+
return tmpfile.name
|
161 |
+
|
162 |
+
|
163 |
+
|
164 |
+
def update_dashboard(start_date, end_date, category):
|
165 |
+
total_revenue, total_orders, avg_order_value, top_category = get_dashboard_stats(start_date, end_date, category)
|
166 |
+
|
167 |
+
# Generate plots
|
168 |
+
revenue_data = get_plot_data(start_date, end_date, category)
|
169 |
+
category_data = get_revenue_by_category(start_date, end_date, category)
|
170 |
+
top_products_data = get_top_products(start_date, end_date, category)
|
171 |
+
|
172 |
+
revenue_over_time_path = create_matplotlib_figure(
|
173 |
+
revenue_data, 'date', 'revenue',
|
174 |
+
"Revenue Over Time", "Date", "Revenue"
|
175 |
+
)
|
176 |
+
revenue_by_category_path = create_matplotlib_figure(
|
177 |
+
category_data, 'categories', 'revenue',
|
178 |
+
"Revenue by Category", "Category", "Revenue"
|
179 |
+
)
|
180 |
+
top_products_path = create_matplotlib_figure(
|
181 |
+
top_products_data, 'product_names', 'revenue',
|
182 |
+
"Top Products", "Revenue", "Product Name", orientation='h'
|
183 |
+
)
|
184 |
+
|
185 |
+
# Data table
|
186 |
+
table_data = get_data_for_table(start_date, end_date, category)
|
187 |
+
|
188 |
+
return (
|
189 |
+
revenue_over_time_path,
|
190 |
+
revenue_by_category_path,
|
191 |
+
top_products_path,
|
192 |
+
table_data,
|
193 |
+
total_revenue,
|
194 |
+
total_orders,
|
195 |
+
avg_order_value,
|
196 |
+
top_category
|
197 |
+
)
|
198 |
+
|
199 |
+
def create_dashboard():
|
200 |
+
min_date, max_date = get_date_range()
|
201 |
+
if min_date is None or max_date is None:
|
202 |
+
min_date = datetime.datetime.now()
|
203 |
+
max_date = datetime.datetime.now()
|
204 |
+
|
205 |
+
default_start_date = min_date
|
206 |
+
default_end_date = max_date
|
207 |
+
|
208 |
+
with gr.Blocks(css="""
|
209 |
+
footer {display: none !important;}
|
210 |
+
.tabs {border: none !important;}
|
211 |
+
.gr-plot {border: none !important; box-shadow: none !important;}
|
212 |
+
""") as dashboard:
|
213 |
+
|
214 |
+
gr.Markdown("# Sales Performance Dashboard")
|
215 |
+
|
216 |
+
# Filters row
|
217 |
+
with gr.Row():
|
218 |
+
start_date = gr.DateTime(
|
219 |
+
label="Start Date",
|
220 |
+
value=default_start_date.strftime('%Y-%m-%d'),
|
221 |
+
include_time=False,
|
222 |
+
type="datetime"
|
223 |
+
)
|
224 |
+
end_date = gr.DateTime(
|
225 |
+
label="End Date",
|
226 |
+
value=default_end_date.strftime('%Y-%m-%d'),
|
227 |
+
include_time=False,
|
228 |
+
type="datetime"
|
229 |
+
)
|
230 |
+
category_filter = gr.Dropdown(
|
231 |
+
choices=["All Categories"] + get_unique_categories(),
|
232 |
+
label="Category",
|
233 |
+
value="All Categories"
|
234 |
+
)
|
235 |
+
|
236 |
+
gr.Markdown("# Key Metrics")
|
237 |
+
|
238 |
+
# Stats row
|
239 |
+
with gr.Row():
|
240 |
+
total_revenue = gr.Number(label="Total Revenue", value=0)
|
241 |
+
total_orders = gr.Number(label="Total Orders", value=0)
|
242 |
+
avg_order_value = gr.Number(label="Average Order Value", value=0)
|
243 |
+
top_category = gr.Textbox(label="Top Category", value="N/A")
|
244 |
+
|
245 |
+
gr.Markdown("# Visualisations")
|
246 |
+
# Tabs for Plots
|
247 |
+
with gr.Tabs():
|
248 |
+
with gr.Tab("Revenue Over Time"):
|
249 |
+
revenue_over_time_image = gr.Image(label="Revenue Over Time", container=False)
|
250 |
+
with gr.Tab("Revenue by Category"):
|
251 |
+
revenue_by_category_image = gr.Image(label="Revenue by Category", container=False)
|
252 |
+
with gr.Tab("Top Products"):
|
253 |
+
top_products_image = gr.Image(label="Top Products", container=False)
|
254 |
+
|
255 |
+
gr.Markdown("# Raw Data")
|
256 |
+
# Data Table (below the plots)
|
257 |
+
data_table = gr.DataFrame(
|
258 |
+
label="Sales Data",
|
259 |
+
type="pandas",
|
260 |
+
interactive=False
|
261 |
+
)
|
262 |
+
|
263 |
+
# When filters change, update everything
|
264 |
+
for f in [start_date, end_date, category_filter]:
|
265 |
+
f.change(
|
266 |
+
fn=lambda s, e, c: update_dashboard(s, e, c),
|
267 |
+
inputs=[start_date, end_date, category_filter],
|
268 |
+
outputs=[
|
269 |
+
revenue_over_time_image,
|
270 |
+
revenue_by_category_image,
|
271 |
+
top_products_image,
|
272 |
+
data_table,
|
273 |
+
total_revenue,
|
274 |
+
total_orders,
|
275 |
+
avg_order_value,
|
276 |
+
top_category
|
277 |
+
]
|
278 |
+
)
|
279 |
+
|
280 |
+
# Initial load
|
281 |
+
dashboard.load(
|
282 |
+
fn=lambda: update_dashboard(default_start_date, default_end_date, "All Categories"),
|
283 |
+
outputs=[
|
284 |
+
revenue_over_time_image,
|
285 |
+
revenue_by_category_image,
|
286 |
+
top_products_image,
|
287 |
+
data_table,
|
288 |
+
total_revenue,
|
289 |
+
total_orders,
|
290 |
+
avg_order_value,
|
291 |
+
top_category
|
292 |
+
]
|
293 |
+
)
|
294 |
+
|
295 |
+
return dashboard
|
296 |
+
|
297 |
+
if __name__ == "__main__":
|
298 |
+
dashboard = create_dashboard()
|
299 |
+
dashboard.launch(share=True)
|
dashboard.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# generate the 100K record CSV file
|
2 |
+
#
|
3 |
+
import polars as pl
|
4 |
+
import numpy as np
|
5 |
+
from datetime import datetime, timedelta
|
6 |
+
|
7 |
+
def generate(nrows: int, filename: str):
|
8 |
+
names = np.asarray(
|
9 |
+
[
|
10 |
+
"Laptop",
|
11 |
+
"Smartphone",
|
12 |
+
"Desk",
|
13 |
+
"Chair",
|
14 |
+
"Monitor",
|
15 |
+
"Printer",
|
16 |
+
"Paper",
|
17 |
+
"Pen",
|
18 |
+
"Notebook",
|
19 |
+
"Coffee Maker",
|
20 |
+
"Cabinet",
|
21 |
+
"Plastic Cups",
|
22 |
+
]
|
23 |
+
)
|
24 |
+
categories = np.asarray(
|
25 |
+
[
|
26 |
+
"Electronics",
|
27 |
+
"Electronics",
|
28 |
+
"Office",
|
29 |
+
"Office",
|
30 |
+
"Electronics",
|
31 |
+
"Electronics",
|
32 |
+
"Stationery",
|
33 |
+
"Stationery",
|
34 |
+
"Stationery",
|
35 |
+
"Electronics",
|
36 |
+
"Office",
|
37 |
+
"Sundry",
|
38 |
+
]
|
39 |
+
)
|
40 |
+
product_id = np.random.randint(len(names), size=nrows)
|
41 |
+
quantity = np.random.randint(1, 11, size=nrows)
|
42 |
+
price = np.random.randint(199, 10000, size=nrows) / 100
|
43 |
+
# Generate random dates between 2010-01-01 and 2023-12-31
|
44 |
+
start_date = datetime(2010, 1, 1)
|
45 |
+
end_date = datetime(2023, 12, 31)
|
46 |
+
date_range = (end_date - start_date).days
|
47 |
+
# Create random dates as np.array and convert to string format
|
48 |
+
order_dates = np.array([(start_date + timedelta(days=np.random.randint(0, date_range))).strftime('%Y-%m-%d') for _ in range(nrows)])
|
49 |
+
# Define columns
|
50 |
+
columns = {
|
51 |
+
"order_id": np.arange(nrows),
|
52 |
+
"order_date": order_dates,
|
53 |
+
"customer_id": np.random.randint(100, 1000, size=nrows),
|
54 |
+
"customer_name": [f"Customer_{i}" for i in np.random.randint(2**15, size=nrows)],
|
55 |
+
"product_id": product_id + 200,
|
56 |
+
"product_names": names[product_id],
|
57 |
+
"categories": categories[product_id],
|
58 |
+
"quantity": quantity,
|
59 |
+
"price": price,
|
60 |
+
"total": price * quantity,
|
61 |
+
}
|
62 |
+
# Create Polars DataFrame and write to CSV with explicit delimiter
|
63 |
+
df = pl.DataFrame(columns)
|
64 |
+
df.write_csv(filename, separator=',',include_header=True) # Ensure comma is used as the delimiter
|
65 |
+
|
66 |
+
# Generate 100,000 rows of data with random order_date and save to CSV
|
67 |
+
generate(100_000, "D:/Python_Projects/dashboard/sales_data.csv")
|
sales_data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|