initial commit

Browse files

Files changed (11) hide show

.vs/UBCFProductRecommendations/FileContentIndex/23001fe7-f3c2-40de-ac4d-18f66948daf0.vsidx +0 -0
.vs/UBCFProductRecommendations/FileContentIndex/3d2d6251-3187-4dd7-b788-001dd23bb9a5.vsidx +0 -0
.vs/UBCFProductRecommendations/FileContentIndex/3d67ad77-3441-41ca-8028-b6802390d8c7.vsidx +0 -0
.vs/UBCFProductRecommendations/FileContentIndex/4de817bd-07d4-46fb-b29a-c52bae7ffd85.vsidx +0 -0
.vs/UBCFProductRecommendations/FileContentIndex/read.lock +0 -0
.vs/UBCFProductRecommendations/v17/.wsuo +0 -0
.vs/VSWorkspaceState.json +7 -0
.vs/slnx.sqlite +0 -0
Online_Retail.xlsx +0 -0
app.py +89 -0
requirements.txt +0 -0

.vs/UBCFProductRecommendations/FileContentIndex/23001fe7-f3c2-40de-ac4d-18f66948daf0.vsidx ADDED Viewed

Binary file (455 Bytes). View file

.vs/UBCFProductRecommendations/FileContentIndex/3d2d6251-3187-4dd7-b788-001dd23bb9a5.vsidx ADDED Viewed

Binary file (4.95 kB). View file

.vs/UBCFProductRecommendations/FileContentIndex/3d67ad77-3441-41ca-8028-b6802390d8c7.vsidx ADDED Viewed

Binary file (9.21 kB). View file

.vs/UBCFProductRecommendations/FileContentIndex/4de817bd-07d4-46fb-b29a-c52bae7ffd85.vsidx ADDED Viewed

Binary file (587 Bytes). View file

.vs/UBCFProductRecommendations/FileContentIndex/read.lock ADDED Viewed

File without changes

.vs/UBCFProductRecommendations/v17/.wsuo ADDED Viewed

Binary file (24.1 kB). View file

.vs/VSWorkspaceState.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "ExpandedNodes": [
+    ""
+  ],
+  "SelectedNode": "\\C:\\Python\\Programs\\Gradio\\HuggingSpace\\UBCFProductRecommendations",
+  "PreviewInSolutionExplorer": false
+}

.vs/slnx.sqlite ADDED Viewed

Binary file (90.1 kB). View file

Online_Retail.xlsx ADDED Viewed

Binary file (455 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# Import necessary libraries.
+import pandas as pd
+from sklearn.metrics.pairwise import cosine_similarity
+# Read data source Excel files.
+df1 = pd.read_excel('Online_Retail.xlsx')
+# Check dataframe information.
+#df1.info()
+# Read header of dataframe.
+#df1.head()
+# Check any column containing the null value.
+#df1.isnull().any()
+# Count the number of null value records in the CustomerID column.
+#df1['CustomerID'].isna().sum()
+df1a = df1.dropna(subset=['CustomerID'])
+# Check dataframe information.
+#df1a.info()
+# Read header of dataframe.
+#df1a.head()
+# Create CustomerID vs Item (Purchased Items, by StockCode) matrix by pivot table function.
+CustomerID_Item_matrix = df1a.pivot_table(
+    index='CustomerID',
+    columns='StockCode',
+    values='Quantity',
+    aggfunc='sum'
+)
+# Display the shape of matrix, 4372 rows of CustomerID, 3684 columns of Item.
+#CustomerID_Item_matrix.shape
+# Update illustration of the matrix, 1 to represent customer have purchased item, 0 to represent customer haven't purchased.
+CustomerID_Item_matrix = CustomerID_Item_matrix.applymap(lambda x: 1 if x > 0 else 0)
+# Read header of CustomerID vs Item matrix.
+#CustomerID_Item_matrix.loc[12680:].head()
+# Create User to User similarity matrix.
+user_to_user_similarity_matrix = pd.DataFrame(
+    cosine_similarity(CustomerID_Item_matrix)
+)
+# Display header of User to User similarity matrix.
+#user_to_user_similarity_matrix.head()
+# Update index to corresponding CustomerID.
+user_to_user_similarity_matrix.columns = CustomerID_Item_matrix.index
+user_to_user_similarity_matrix['CustomerID'] = CustomerID_Item_matrix.index
+user_to_user_similarity_matrix = user_to_user_similarity_matrix.set_index('CustomerID')
+# Display header of User to User similarity matrix.
+#user_to_user_similarity_matrix.head()
+# Randomly pick CustomerID (12702) to display the most similar CustomerID.
+# The most similar CustomerID is 14608, which has 51% similarity.
+#user_to_user_similarity_matrix.loc[12702.0].sort_values(ascending=False)
+# Display CustomerID (12702) purchased items.
+items_purchased_by_X = set(CustomerID_Item_matrix.loc[12702.0].iloc[
+    CustomerID_Item_matrix.loc[12702.0].to_numpy().nonzero()].index)
+#items_purchased_by_X
+# Display CustomerID (14608) purchased items.
+items_purchased_by_Y = set(CustomerID_Item_matrix.loc[14608.0].iloc[
+    CustomerID_Item_matrix.loc[14608.0].to_numpy().nonzero()].index)
+#items_purchased_by_Y
+# Find out items which purchased by X (12702) but not yet purchased by Y (14608).
+items_to_recommend_to_Y = items_purchased_by_X - items_purchased_by_Y
+# Display the list of items recommended for Y (14608).
+#items_to_recommend_to_Y
+# Display the list of items recommended for Y (14608) with item Description.
+print(df1a.loc[
+    df1a['StockCode'].isin(items_to_recommend_to_Y),
+    ['StockCode', 'Description']
+].drop_duplicates().set_index('StockCode'))

requirements.txt ADDED Viewed

File without changes