Gopala Krishna commited on
Commit
8c3633d
·
1 Parent(s): a6f325e

initial commit

Browse files
.vs/UBCFProductRecommendations/FileContentIndex/23001fe7-f3c2-40de-ac4d-18f66948daf0.vsidx ADDED
Binary file (455 Bytes). View file
 
.vs/UBCFProductRecommendations/FileContentIndex/3d2d6251-3187-4dd7-b788-001dd23bb9a5.vsidx ADDED
Binary file (4.95 kB). View file
 
.vs/UBCFProductRecommendations/FileContentIndex/3d67ad77-3441-41ca-8028-b6802390d8c7.vsidx ADDED
Binary file (9.21 kB). View file
 
.vs/UBCFProductRecommendations/FileContentIndex/4de817bd-07d4-46fb-b29a-c52bae7ffd85.vsidx ADDED
Binary file (587 Bytes). View file
 
.vs/UBCFProductRecommendations/FileContentIndex/read.lock ADDED
File without changes
.vs/UBCFProductRecommendations/v17/.wsuo ADDED
Binary file (24.1 kB). View file
 
.vs/VSWorkspaceState.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "ExpandedNodes": [
3
+ ""
4
+ ],
5
+ "SelectedNode": "\\C:\\Python\\Programs\\Gradio\\HuggingSpace\\UBCFProductRecommendations",
6
+ "PreviewInSolutionExplorer": false
7
+ }
.vs/slnx.sqlite ADDED
Binary file (90.1 kB). View file
 
Online_Retail.xlsx ADDED
Binary file (455 kB). View file
 
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Import necessary libraries.
3
+ import pandas as pd
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+
6
+ # Read data source Excel files.
7
+ df1 = pd.read_excel('Online_Retail.xlsx')
8
+
9
+ # Check dataframe information.
10
+ #df1.info()
11
+
12
+ # Read header of dataframe.
13
+ #df1.head()
14
+
15
+ # Check any column containing the null value.
16
+ #df1.isnull().any()
17
+
18
+ # Count the number of null value records in the CustomerID column.
19
+ #df1['CustomerID'].isna().sum()
20
+
21
+ df1a = df1.dropna(subset=['CustomerID'])
22
+
23
+ # Check dataframe information.
24
+ #df1a.info()
25
+
26
+ # Read header of dataframe.
27
+ #df1a.head()
28
+
29
+ # Create CustomerID vs Item (Purchased Items, by StockCode) matrix by pivot table function.
30
+ CustomerID_Item_matrix = df1a.pivot_table(
31
+ index='CustomerID',
32
+ columns='StockCode',
33
+ values='Quantity',
34
+ aggfunc='sum'
35
+ )
36
+
37
+ # Display the shape of matrix, 4372 rows of CustomerID, 3684 columns of Item.
38
+ #CustomerID_Item_matrix.shape
39
+
40
+ # Update illustration of the matrix, 1 to represent customer have purchased item, 0 to represent customer haven't purchased.
41
+ CustomerID_Item_matrix = CustomerID_Item_matrix.applymap(lambda x: 1 if x > 0 else 0)
42
+
43
+ # Read header of CustomerID vs Item matrix.
44
+ #CustomerID_Item_matrix.loc[12680:].head()
45
+
46
+ # Create User to User similarity matrix.
47
+ user_to_user_similarity_matrix = pd.DataFrame(
48
+ cosine_similarity(CustomerID_Item_matrix)
49
+ )
50
+
51
+ # Display header of User to User similarity matrix.
52
+ #user_to_user_similarity_matrix.head()
53
+
54
+ # Update index to corresponding CustomerID.
55
+ user_to_user_similarity_matrix.columns = CustomerID_Item_matrix.index
56
+ user_to_user_similarity_matrix['CustomerID'] = CustomerID_Item_matrix.index
57
+ user_to_user_similarity_matrix = user_to_user_similarity_matrix.set_index('CustomerID')
58
+
59
+ # Display header of User to User similarity matrix.
60
+ #user_to_user_similarity_matrix.head()
61
+
62
+ # Randomly pick CustomerID (12702) to display the most similar CustomerID.
63
+ # The most similar CustomerID is 14608, which has 51% similarity.
64
+ #user_to_user_similarity_matrix.loc[12702.0].sort_values(ascending=False)
65
+
66
+ # Display CustomerID (12702) purchased items.
67
+ items_purchased_by_X = set(CustomerID_Item_matrix.loc[12702.0].iloc[
68
+ CustomerID_Item_matrix.loc[12702.0].to_numpy().nonzero()].index)
69
+ #items_purchased_by_X
70
+
71
+ # Display CustomerID (14608) purchased items.
72
+ items_purchased_by_Y = set(CustomerID_Item_matrix.loc[14608.0].iloc[
73
+ CustomerID_Item_matrix.loc[14608.0].to_numpy().nonzero()].index)
74
+ #items_purchased_by_Y
75
+
76
+ # Find out items which purchased by X (12702) but not yet purchased by Y (14608).
77
+ items_to_recommend_to_Y = items_purchased_by_X - items_purchased_by_Y
78
+
79
+ # Display the list of items recommended for Y (14608).
80
+ #items_to_recommend_to_Y
81
+
82
+ # Display the list of items recommended for Y (14608) with item Description.
83
+ print(df1a.loc[
84
+ df1a['StockCode'].isin(items_to_recommend_to_Y),
85
+ ['StockCode', 'Description']
86
+ ].drop_duplicates().set_index('StockCode'))
87
+
88
+
89
+
requirements.txt ADDED
File without changes