Gopala Krishna
commited on
Commit
·
8c3633d
1
Parent(s):
a6f325e
initial commit
Browse files- .vs/UBCFProductRecommendations/FileContentIndex/23001fe7-f3c2-40de-ac4d-18f66948daf0.vsidx +0 -0
- .vs/UBCFProductRecommendations/FileContentIndex/3d2d6251-3187-4dd7-b788-001dd23bb9a5.vsidx +0 -0
- .vs/UBCFProductRecommendations/FileContentIndex/3d67ad77-3441-41ca-8028-b6802390d8c7.vsidx +0 -0
- .vs/UBCFProductRecommendations/FileContentIndex/4de817bd-07d4-46fb-b29a-c52bae7ffd85.vsidx +0 -0
- .vs/UBCFProductRecommendations/FileContentIndex/read.lock +0 -0
- .vs/UBCFProductRecommendations/v17/.wsuo +0 -0
- .vs/VSWorkspaceState.json +7 -0
- .vs/slnx.sqlite +0 -0
- Online_Retail.xlsx +0 -0
- app.py +89 -0
- requirements.txt +0 -0
.vs/UBCFProductRecommendations/FileContentIndex/23001fe7-f3c2-40de-ac4d-18f66948daf0.vsidx
ADDED
Binary file (455 Bytes). View file
|
|
.vs/UBCFProductRecommendations/FileContentIndex/3d2d6251-3187-4dd7-b788-001dd23bb9a5.vsidx
ADDED
Binary file (4.95 kB). View file
|
|
.vs/UBCFProductRecommendations/FileContentIndex/3d67ad77-3441-41ca-8028-b6802390d8c7.vsidx
ADDED
Binary file (9.21 kB). View file
|
|
.vs/UBCFProductRecommendations/FileContentIndex/4de817bd-07d4-46fb-b29a-c52bae7ffd85.vsidx
ADDED
Binary file (587 Bytes). View file
|
|
.vs/UBCFProductRecommendations/FileContentIndex/read.lock
ADDED
File without changes
|
.vs/UBCFProductRecommendations/v17/.wsuo
ADDED
Binary file (24.1 kB). View file
|
|
.vs/VSWorkspaceState.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"ExpandedNodes": [
|
3 |
+
""
|
4 |
+
],
|
5 |
+
"SelectedNode": "\\C:\\Python\\Programs\\Gradio\\HuggingSpace\\UBCFProductRecommendations",
|
6 |
+
"PreviewInSolutionExplorer": false
|
7 |
+
}
|
.vs/slnx.sqlite
ADDED
Binary file (90.1 kB). View file
|
|
Online_Retail.xlsx
ADDED
Binary file (455 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Import necessary libraries.
|
3 |
+
import pandas as pd
|
4 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
5 |
+
|
6 |
+
# Read data source Excel files.
|
7 |
+
df1 = pd.read_excel('Online_Retail.xlsx')
|
8 |
+
|
9 |
+
# Check dataframe information.
|
10 |
+
#df1.info()
|
11 |
+
|
12 |
+
# Read header of dataframe.
|
13 |
+
#df1.head()
|
14 |
+
|
15 |
+
# Check any column containing the null value.
|
16 |
+
#df1.isnull().any()
|
17 |
+
|
18 |
+
# Count the number of null value records in the CustomerID column.
|
19 |
+
#df1['CustomerID'].isna().sum()
|
20 |
+
|
21 |
+
df1a = df1.dropna(subset=['CustomerID'])
|
22 |
+
|
23 |
+
# Check dataframe information.
|
24 |
+
#df1a.info()
|
25 |
+
|
26 |
+
# Read header of dataframe.
|
27 |
+
#df1a.head()
|
28 |
+
|
29 |
+
# Create CustomerID vs Item (Purchased Items, by StockCode) matrix by pivot table function.
|
30 |
+
CustomerID_Item_matrix = df1a.pivot_table(
|
31 |
+
index='CustomerID',
|
32 |
+
columns='StockCode',
|
33 |
+
values='Quantity',
|
34 |
+
aggfunc='sum'
|
35 |
+
)
|
36 |
+
|
37 |
+
# Display the shape of matrix, 4372 rows of CustomerID, 3684 columns of Item.
|
38 |
+
#CustomerID_Item_matrix.shape
|
39 |
+
|
40 |
+
# Update illustration of the matrix, 1 to represent customer have purchased item, 0 to represent customer haven't purchased.
|
41 |
+
CustomerID_Item_matrix = CustomerID_Item_matrix.applymap(lambda x: 1 if x > 0 else 0)
|
42 |
+
|
43 |
+
# Read header of CustomerID vs Item matrix.
|
44 |
+
#CustomerID_Item_matrix.loc[12680:].head()
|
45 |
+
|
46 |
+
# Create User to User similarity matrix.
|
47 |
+
user_to_user_similarity_matrix = pd.DataFrame(
|
48 |
+
cosine_similarity(CustomerID_Item_matrix)
|
49 |
+
)
|
50 |
+
|
51 |
+
# Display header of User to User similarity matrix.
|
52 |
+
#user_to_user_similarity_matrix.head()
|
53 |
+
|
54 |
+
# Update index to corresponding CustomerID.
|
55 |
+
user_to_user_similarity_matrix.columns = CustomerID_Item_matrix.index
|
56 |
+
user_to_user_similarity_matrix['CustomerID'] = CustomerID_Item_matrix.index
|
57 |
+
user_to_user_similarity_matrix = user_to_user_similarity_matrix.set_index('CustomerID')
|
58 |
+
|
59 |
+
# Display header of User to User similarity matrix.
|
60 |
+
#user_to_user_similarity_matrix.head()
|
61 |
+
|
62 |
+
# Randomly pick CustomerID (12702) to display the most similar CustomerID.
|
63 |
+
# The most similar CustomerID is 14608, which has 51% similarity.
|
64 |
+
#user_to_user_similarity_matrix.loc[12702.0].sort_values(ascending=False)
|
65 |
+
|
66 |
+
# Display CustomerID (12702) purchased items.
|
67 |
+
items_purchased_by_X = set(CustomerID_Item_matrix.loc[12702.0].iloc[
|
68 |
+
CustomerID_Item_matrix.loc[12702.0].to_numpy().nonzero()].index)
|
69 |
+
#items_purchased_by_X
|
70 |
+
|
71 |
+
# Display CustomerID (14608) purchased items.
|
72 |
+
items_purchased_by_Y = set(CustomerID_Item_matrix.loc[14608.0].iloc[
|
73 |
+
CustomerID_Item_matrix.loc[14608.0].to_numpy().nonzero()].index)
|
74 |
+
#items_purchased_by_Y
|
75 |
+
|
76 |
+
# Find out items which purchased by X (12702) but not yet purchased by Y (14608).
|
77 |
+
items_to_recommend_to_Y = items_purchased_by_X - items_purchased_by_Y
|
78 |
+
|
79 |
+
# Display the list of items recommended for Y (14608).
|
80 |
+
#items_to_recommend_to_Y
|
81 |
+
|
82 |
+
# Display the list of items recommended for Y (14608) with item Description.
|
83 |
+
print(df1a.loc[
|
84 |
+
df1a['StockCode'].isin(items_to_recommend_to_Y),
|
85 |
+
['StockCode', 'Description']
|
86 |
+
].drop_duplicates().set_index('StockCode'))
|
87 |
+
|
88 |
+
|
89 |
+
|
requirements.txt
ADDED
File without changes
|