v0idgy commited on
Commit
1b73364
·
verified ·
1 Parent(s): 8bb399b

Adding Application Files

Browse files
Files changed (3) hide show
  1. app.py +299 -0
  2. dashboard.py +67 -0
  3. sales_data.csv +0 -0
app.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import datetime
5
+ import warnings
6
+ import os
7
+ import tempfile
8
+ from cachetools import cached, TTLCache
9
+
10
+ warnings.filterwarnings("ignore", category=FutureWarning, module="seaborn")
11
+
12
+ # ------------------------------------------------------------------
13
+ # 1) Load CSV data once
14
+ # ------------------------------------------------------------------
15
+ csv_data = None
16
+
17
+ def load_csv_data():
18
+ global csv_data
19
+
20
+ # Optional: specify column dtypes if known; adjust as necessary
21
+ dtype_dict = {
22
+ "order_id": "Int64",
23
+ "customer_id": "Int64",
24
+ "product_id": "Int64",
25
+ "quantity": "Int64",
26
+ "price": "float",
27
+ "total": "float",
28
+ "customer_name": "string",
29
+ "product_names": "string",
30
+ "categories": "string"
31
+ }
32
+
33
+ csv_data = pd.read_csv(
34
+ "sales_data.csv",
35
+ parse_dates=["order_date"],
36
+ dayfirst=True, # if your dates are DD/MM/YYYY format
37
+ low_memory=False,
38
+ dtype=dtype_dict
39
+ )
40
+
41
+ load_csv_data()
42
+
43
+
44
+
45
+ cache = TTLCache(maxsize=128, ttl=300)
46
+
47
+ @cached(cache)
48
+ def get_unique_categories():
49
+ global csv_data
50
+ if csv_data is None:
51
+ return []
52
+ cats = sorted(csv_data['categories'].dropna().unique().tolist())
53
+ cats = [cat.capitalize() for cat in cats]
54
+ return cats
55
+
56
+ def get_date_range():
57
+ global csv_data
58
+ if csv_data is None or csv_data.empty:
59
+ return None, None
60
+ return csv_data['order_date'].min(), csv_data['order_date'].max()
61
+
62
+ def filter_data(start_date, end_date, category):
63
+ global csv_data
64
+
65
+ if isinstance(start_date, str):
66
+ start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d').date()
67
+ if isinstance(end_date, str):
68
+ end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d').date()
69
+
70
+ df = csv_data.loc[
71
+ (csv_data['order_date'] >= pd.to_datetime(start_date)) &
72
+ (csv_data['order_date'] <= pd.to_datetime(end_date))
73
+ ].copy()
74
+
75
+ if category != "All Categories":
76
+ df = df.loc[df['categories'].str.capitalize() == category].copy()
77
+
78
+ return df
79
+
80
+ def get_dashboard_stats(start_date, end_date, category):
81
+ df = filter_data(start_date, end_date, category)
82
+ if df.empty:
83
+ return (0, 0, 0, "N/A")
84
+
85
+ df['revenue'] = df['price'] * df['quantity']
86
+ total_revenue = df['revenue'].sum()
87
+ total_orders = df['order_id'].nunique()
88
+ avg_order_value = total_revenue / total_orders if total_orders else 0
89
+
90
+ cat_revenues = df.groupby('categories')['revenue'].sum().sort_values(ascending=False)
91
+ top_category = cat_revenues.index[0] if not cat_revenues.empty else "N/A"
92
+
93
+ return (total_revenue, total_orders, avg_order_value, top_category.capitalize())
94
+
95
+ def get_data_for_table(start_date, end_date, category):
96
+ df = filter_data(start_date, end_date, category)
97
+ if df.empty:
98
+ return pd.DataFrame()
99
+
100
+ df = df.sort_values(by=["order_id", "order_date"], ascending=[True, False]).copy()
101
+
102
+ columns_order = [
103
+ "order_id", "order_date", "customer_id", "customer_name",
104
+ "product_id", "product_names", "categories", "quantity",
105
+ "price", "total"
106
+ ]
107
+ columns_order = [col for col in columns_order if col in df.columns]
108
+ df = df[columns_order].copy()
109
+
110
+ df['revenue'] = df['price'] * df['quantity']
111
+ return df
112
+
113
+ def get_plot_data(start_date, end_date, category):
114
+ df = filter_data(start_date, end_date, category)
115
+ if df.empty:
116
+ return pd.DataFrame()
117
+ df['revenue'] = df['price'] * df['quantity']
118
+ plot_data = df.groupby(df['order_date'].dt.date)['revenue'].sum().reset_index()
119
+ plot_data.rename(columns={'order_date': 'date'}, inplace=True)
120
+ return plot_data
121
+
122
+ def get_revenue_by_category(start_date, end_date, category):
123
+ df = filter_data(start_date, end_date, category)
124
+ if df.empty:
125
+ return pd.DataFrame()
126
+ df['revenue'] = df['price'] * df['quantity']
127
+ cat_data = df.groupby('categories')['revenue'].sum().reset_index()
128
+ cat_data = cat_data.sort_values(by='revenue', ascending=False)
129
+ return cat_data
130
+
131
+ def get_top_products(start_date, end_date, category):
132
+ df = filter_data(start_date, end_date, category)
133
+ if df.empty:
134
+ return pd.DataFrame()
135
+ df['revenue'] = df['price'] * df['quantity']
136
+ prod_data = df.groupby('product_names')['revenue'].sum().reset_index()
137
+ prod_data = prod_data.sort_values(by='revenue', ascending=False).head(10)
138
+ return prod_data
139
+
140
+ def create_matplotlib_figure(data, x_col, y_col, title, xlabel, ylabel, orientation='v'):
141
+ plt.figure(figsize=(10, 6))
142
+ if data.empty:
143
+ plt.text(0.5, 0.5, 'No data available', ha='center', va='center')
144
+ else:
145
+ if orientation == 'v':
146
+ plt.bar(data[x_col], data[y_col])
147
+ plt.xticks(rotation=45, ha='right')
148
+ else:
149
+ plt.barh(data[x_col], data[y_col])
150
+ plt.gca().invert_yaxis()
151
+
152
+ plt.title(title)
153
+ plt.xlabel(xlabel)
154
+ plt.ylabel(ylabel)
155
+ plt.tight_layout()
156
+
157
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
158
+ plt.savefig(tmpfile.name)
159
+ plt.close()
160
+ return tmpfile.name
161
+
162
+
163
+
164
+ def update_dashboard(start_date, end_date, category):
165
+ total_revenue, total_orders, avg_order_value, top_category = get_dashboard_stats(start_date, end_date, category)
166
+
167
+ # Generate plots
168
+ revenue_data = get_plot_data(start_date, end_date, category)
169
+ category_data = get_revenue_by_category(start_date, end_date, category)
170
+ top_products_data = get_top_products(start_date, end_date, category)
171
+
172
+ revenue_over_time_path = create_matplotlib_figure(
173
+ revenue_data, 'date', 'revenue',
174
+ "Revenue Over Time", "Date", "Revenue"
175
+ )
176
+ revenue_by_category_path = create_matplotlib_figure(
177
+ category_data, 'categories', 'revenue',
178
+ "Revenue by Category", "Category", "Revenue"
179
+ )
180
+ top_products_path = create_matplotlib_figure(
181
+ top_products_data, 'product_names', 'revenue',
182
+ "Top Products", "Revenue", "Product Name", orientation='h'
183
+ )
184
+
185
+ # Data table
186
+ table_data = get_data_for_table(start_date, end_date, category)
187
+
188
+ return (
189
+ revenue_over_time_path,
190
+ revenue_by_category_path,
191
+ top_products_path,
192
+ table_data,
193
+ total_revenue,
194
+ total_orders,
195
+ avg_order_value,
196
+ top_category
197
+ )
198
+
199
+ def create_dashboard():
200
+ min_date, max_date = get_date_range()
201
+ if min_date is None or max_date is None:
202
+ min_date = datetime.datetime.now()
203
+ max_date = datetime.datetime.now()
204
+
205
+ default_start_date = min_date
206
+ default_end_date = max_date
207
+
208
+ with gr.Blocks(css="""
209
+ footer {display: none !important;}
210
+ .tabs {border: none !important;}
211
+ .gr-plot {border: none !important; box-shadow: none !important;}
212
+ """) as dashboard:
213
+
214
+ gr.Markdown("# Sales Performance Dashboard")
215
+
216
+ # Filters row
217
+ with gr.Row():
218
+ start_date = gr.DateTime(
219
+ label="Start Date",
220
+ value=default_start_date.strftime('%Y-%m-%d'),
221
+ include_time=False,
222
+ type="datetime"
223
+ )
224
+ end_date = gr.DateTime(
225
+ label="End Date",
226
+ value=default_end_date.strftime('%Y-%m-%d'),
227
+ include_time=False,
228
+ type="datetime"
229
+ )
230
+ category_filter = gr.Dropdown(
231
+ choices=["All Categories"] + get_unique_categories(),
232
+ label="Category",
233
+ value="All Categories"
234
+ )
235
+
236
+ gr.Markdown("# Key Metrics")
237
+
238
+ # Stats row
239
+ with gr.Row():
240
+ total_revenue = gr.Number(label="Total Revenue", value=0)
241
+ total_orders = gr.Number(label="Total Orders", value=0)
242
+ avg_order_value = gr.Number(label="Average Order Value", value=0)
243
+ top_category = gr.Textbox(label="Top Category", value="N/A")
244
+
245
+ gr.Markdown("# Visualisations")
246
+ # Tabs for Plots
247
+ with gr.Tabs():
248
+ with gr.Tab("Revenue Over Time"):
249
+ revenue_over_time_image = gr.Image(label="Revenue Over Time", container=False)
250
+ with gr.Tab("Revenue by Category"):
251
+ revenue_by_category_image = gr.Image(label="Revenue by Category", container=False)
252
+ with gr.Tab("Top Products"):
253
+ top_products_image = gr.Image(label="Top Products", container=False)
254
+
255
+ gr.Markdown("# Raw Data")
256
+ # Data Table (below the plots)
257
+ data_table = gr.DataFrame(
258
+ label="Sales Data",
259
+ type="pandas",
260
+ interactive=False
261
+ )
262
+
263
+ # When filters change, update everything
264
+ for f in [start_date, end_date, category_filter]:
265
+ f.change(
266
+ fn=lambda s, e, c: update_dashboard(s, e, c),
267
+ inputs=[start_date, end_date, category_filter],
268
+ outputs=[
269
+ revenue_over_time_image,
270
+ revenue_by_category_image,
271
+ top_products_image,
272
+ data_table,
273
+ total_revenue,
274
+ total_orders,
275
+ avg_order_value,
276
+ top_category
277
+ ]
278
+ )
279
+
280
+ # Initial load
281
+ dashboard.load(
282
+ fn=lambda: update_dashboard(default_start_date, default_end_date, "All Categories"),
283
+ outputs=[
284
+ revenue_over_time_image,
285
+ revenue_by_category_image,
286
+ top_products_image,
287
+ data_table,
288
+ total_revenue,
289
+ total_orders,
290
+ avg_order_value,
291
+ top_category
292
+ ]
293
+ )
294
+
295
+ return dashboard
296
+
297
+ if __name__ == "__main__":
298
+ dashboard = create_dashboard()
299
+ dashboard.launch(share=True)
dashboard.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # generate the 100K record CSV file
2
+ #
3
+ import polars as pl
4
+ import numpy as np
5
+ from datetime import datetime, timedelta
6
+
7
+ def generate(nrows: int, filename: str):
8
+ names = np.asarray(
9
+ [
10
+ "Laptop",
11
+ "Smartphone",
12
+ "Desk",
13
+ "Chair",
14
+ "Monitor",
15
+ "Printer",
16
+ "Paper",
17
+ "Pen",
18
+ "Notebook",
19
+ "Coffee Maker",
20
+ "Cabinet",
21
+ "Plastic Cups",
22
+ ]
23
+ )
24
+ categories = np.asarray(
25
+ [
26
+ "Electronics",
27
+ "Electronics",
28
+ "Office",
29
+ "Office",
30
+ "Electronics",
31
+ "Electronics",
32
+ "Stationery",
33
+ "Stationery",
34
+ "Stationery",
35
+ "Electronics",
36
+ "Office",
37
+ "Sundry",
38
+ ]
39
+ )
40
+ product_id = np.random.randint(len(names), size=nrows)
41
+ quantity = np.random.randint(1, 11, size=nrows)
42
+ price = np.random.randint(199, 10000, size=nrows) / 100
43
+ # Generate random dates between 2010-01-01 and 2023-12-31
44
+ start_date = datetime(2010, 1, 1)
45
+ end_date = datetime(2023, 12, 31)
46
+ date_range = (end_date - start_date).days
47
+ # Create random dates as np.array and convert to string format
48
+ order_dates = np.array([(start_date + timedelta(days=np.random.randint(0, date_range))).strftime('%Y-%m-%d') for _ in range(nrows)])
49
+ # Define columns
50
+ columns = {
51
+ "order_id": np.arange(nrows),
52
+ "order_date": order_dates,
53
+ "customer_id": np.random.randint(100, 1000, size=nrows),
54
+ "customer_name": [f"Customer_{i}" for i in np.random.randint(2**15, size=nrows)],
55
+ "product_id": product_id + 200,
56
+ "product_names": names[product_id],
57
+ "categories": categories[product_id],
58
+ "quantity": quantity,
59
+ "price": price,
60
+ "total": price * quantity,
61
+ }
62
+ # Create Polars DataFrame and write to CSV with explicit delimiter
63
+ df = pl.DataFrame(columns)
64
+ df.write_csv(filename, separator=',',include_header=True) # Ensure comma is used as the delimiter
65
+
66
+ # Generate 100,000 rows of data with random order_date and save to CSV
67
+ generate(100_000, "D:/Python_Projects/dashboard/sales_data.csv")
sales_data.csv ADDED
The diff for this file is too large to render. See raw diff