Spaces:

Polarisailabs
/

UBCFProductRecommendations

Running

Gopala Krishna

initial commit

8c3633d over 2 years ago

2.9 kB


	# Import necessary libraries.
	import pandas as pd
	from sklearn.metrics.pairwise import cosine_similarity

	# Read data source Excel files.
	df1 = pd.read_excel('Online_Retail.xlsx')

	# Check dataframe information.
	#df1.info()

	# Read header of dataframe.
	#df1.head()

	# Check any column containing the null value.
	#df1.isnull().any()

	# Count the number of null value records in the CustomerID column.
	#df1['CustomerID'].isna().sum()

	df1a = df1.dropna(subset=['CustomerID'])

	# Check dataframe information.
	#df1a.info()

	# Read header of dataframe.
	#df1a.head()

	# Create CustomerID vs Item (Purchased Items, by StockCode) matrix by pivot table function.
	CustomerID_Item_matrix = df1a.pivot_table(
	index='CustomerID',
	columns='StockCode',
	values='Quantity',
	aggfunc='sum'
	)

	# Display the shape of matrix, 4372 rows of CustomerID, 3684 columns of Item.
	#CustomerID_Item_matrix.shape

	# Update illustration of the matrix, 1 to represent customer have purchased item, 0 to represent customer haven't purchased.
	CustomerID_Item_matrix = CustomerID_Item_matrix.applymap(lambda x: 1 if x > 0 else 0)

	# Read header of CustomerID vs Item matrix.
	#CustomerID_Item_matrix.loc[12680:].head()

	# Create User to User similarity matrix.
	user_to_user_similarity_matrix = pd.DataFrame(
	cosine_similarity(CustomerID_Item_matrix)
	)

	# Display header of User to User similarity matrix.
	#user_to_user_similarity_matrix.head()

	# Update index to corresponding CustomerID.
	user_to_user_similarity_matrix.columns = CustomerID_Item_matrix.index
	user_to_user_similarity_matrix['CustomerID'] = CustomerID_Item_matrix.index
	user_to_user_similarity_matrix = user_to_user_similarity_matrix.set_index('CustomerID')

	# Display header of User to User similarity matrix.
	#user_to_user_similarity_matrix.head()

	# Randomly pick CustomerID (12702) to display the most similar CustomerID.
	# The most similar CustomerID is 14608, which has 51% similarity.
	#user_to_user_similarity_matrix.loc[12702.0].sort_values(ascending=False)

	# Display CustomerID (12702) purchased items.
	items_purchased_by_X = set(CustomerID_Item_matrix.loc[12702.0].iloc[
	CustomerID_Item_matrix.loc[12702.0].to_numpy().nonzero()].index)
	#items_purchased_by_X

	# Display CustomerID (14608) purchased items.
	items_purchased_by_Y = set(CustomerID_Item_matrix.loc[14608.0].iloc[
	CustomerID_Item_matrix.loc[14608.0].to_numpy().nonzero()].index)
	#items_purchased_by_Y

	# Find out items which purchased by X (12702) but not yet purchased by Y (14608).
	items_to_recommend_to_Y = items_purchased_by_X - items_purchased_by_Y

	# Display the list of items recommended for Y (14608).
	#items_to_recommend_to_Y

	# Display the list of items recommended for Y (14608) with item Description.
	print(df1a.loc[
	df1a['StockCode'].isin(items_to_recommend_to_Y),
	['StockCode', 'Description']
	].drop_duplicates().set_index('StockCode'))