debisoft commited on
Commit
fef9fa7
·
1 Parent(s): 34b8562
Files changed (2) hide show
  1. app.py +256 -1
  2. tinder_data.csv +0 -0
app.py CHANGED
@@ -15,8 +15,263 @@ from sklearn.preprocessing import OneHotEncoder
15
 
16
 
17
 
18
- def greet(name, str2):
19
  return "Hello " + name + "!!" + " str2=" + str2
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  iface = gr.Interface(fn=greet, inputs=["text", "text"], outputs="text")
22
  iface.launch()
 
15
 
16
 
17
 
18
+ def greet_o(name, str2):
19
  return "Hello " + name + "!!" + " str2=" + str2
20
 
21
+ def greet(name, str2):
22
+ user_df = {}
23
+
24
+ # Get user input for numerical columns
25
+ user_df['age'] = 22.0
26
+ user_df['status'] = 1.0
27
+ user_df['sex'] = 0.0
28
+ user_df['height'] = 60.0
29
+ user_df['smokes'] = 1.0
30
+ user_df['new_languages'] = 2.0
31
+ user_df['body_profile'] = 0.0
32
+ user_df['education_level'] = 4.0
33
+ user_df['dropped_out'] = 0.0
34
+ user_df['bio'] = 'I am a foodie and traveller. But sometimes like to sit alone in a corner and read a good fiction.'
35
+ user_df['location_preference'] = 2.0
36
+ user_df['num_languages'] = 2.0
37
+ user_df['drinks_encoded'] = 0.0
38
+ user_df['drugs_encoded'] = 0.0
39
+ # Get user input for one-hot encoded categorical columns
40
+ user_df['location_new_york'] = 0.0
41
+ user_df['location_northern_california'] = 1.0
42
+ user_df['location_southern_california'] = 0.0
43
+ user_df['job_encoded'] = 4.0
44
+ user_df['pets_0'] = 1.0
45
+ user_df['pets_1'] = 1.0
46
+ user_df['pets_2'] = 1.0
47
+ user_df['pets_3'] = 1.0
48
+
49
+ # Convert tfidf matrix to DataFrame
50
+ tfidf_df = pd.DataFrame(tfidf.transform([user_df['bio']]).toarray(), columns=feature_names)
51
+
52
+ # Convert the user input
53
+ # dictionary to a Pandas DataFrame
54
+ user_df = pd.DataFrame(user_df, index=[0])
55
+ user_df.drop("bio", axis=1, inplace=True)
56
+ user_df = pd.concat([user_df, tfidf_df], axis=1)
57
+
58
+ suggested_arr = recommend(user_df)
59
+
60
+ return "Hello " + suggested_arr[0] + "!!" + " str2=" + str2
61
+
62
+ # reading dataset using panda
63
+ tinder_df = pd.read_csv("tinder_data.csv")
64
+
65
+ # count the number of languages in each row
66
+ tinder_df['num_languages'] = tinder_df['language']\
67
+ .str.count(',') + 1
68
+ tinder_df.drop(["language"], axis=1, inplace=True)
69
+
70
+ place_type_strength = {
71
+ 'anywhere': 1.0,
72
+ 'same state': 2.0,
73
+ 'same city': 2.5
74
+ }
75
+
76
+ tinder_df['location_preference'] = \
77
+ tinder_df['location_preference']\
78
+ .apply(lambda x: place_type_strength[x])
79
+
80
+ two_unique_values_column = {
81
+ 'sex': {'f': 1, 'm': 0},
82
+ 'dropped_out': {'no': 0, 'yes': 1}
83
+ }
84
+
85
+ tinder_df.replace(two_unique_values_column,
86
+ inplace=True)
87
+
88
+ status_type_strength = {
89
+ 'single': 2.0,
90
+ 'available': 2.0,
91
+ 'seeing someone': 1.0,
92
+ 'married': 1.0
93
+ }
94
+ tinder_df['status'] = tinder_df['status']\
95
+ .apply(lambda x:
96
+ status_type_strength[x])
97
+
98
+ # create a LabelEncoder object
99
+ orientation_encoder = LabelEncoder()
100
+
101
+ # fit the encoder on the orientation column
102
+ orientation_encoder.fit(tinder_df['orientation'])
103
+
104
+ # encode the orientation column using the fitted encoder
105
+ tinder_df['orientation'] = orientation_encoder.\
106
+ transform(tinder_df['orientation'])
107
+
108
+ # Drop the existing orientation column
109
+ tinder_df.drop("orientation", axis=1, inplace=True)
110
+
111
+ drinking_habit = {
112
+ 'socially': 'sometimes',
113
+ 'rarely': 'sometimes',
114
+ 'not at all': 'do not drink',
115
+ 'often': 'drinks often',
116
+ 'very often': 'drinks often',
117
+ 'desperately': 'drinks often'
118
+ }
119
+ tinder_df['drinks'] = tinder_df['drinks']\
120
+ .apply(lambda x:
121
+ drinking_habit[x])
122
+ # create a LabelEncoder object
123
+ habit_encoder = LabelEncoder()
124
+
125
+ # fit the encoder on the drinks and drugs columns
126
+ habit_encoder.fit(tinder_df[['drinks', 'drugs']]
127
+ .values.reshape(-1))
128
+
129
+ # encode the drinks and drugs columns
130
+ # using the fitted encoder
131
+ tinder_df['drinks_encoded'] = \
132
+ habit_encoder.transform(tinder_df['drinks'])
133
+ tinder_df['drugs_encoded'] = \
134
+ habit_encoder.transform(tinder_df['drugs'])
135
+
136
+ # Drop the existing drink and drugs column
137
+ tinder_df.drop(["drinks", "drugs"], axis=1,
138
+ inplace=True)
139
+
140
+ region_dict = {'southern_california': ['los angeles',
141
+ 'san diego', 'hacienda heights',
142
+ 'north hollywood', 'phoenix'],
143
+ 'new_york': ['brooklyn',
144
+ 'new york']}
145
+
146
+ def get_region(city):
147
+ for region, cities in region_dict.items():
148
+ if city.lower() in [c.lower() for c in cities]:
149
+ return region
150
+ return "northern_california"
151
+
152
+
153
+ tinder_df['location'] = tinder_df['location']\
154
+ .str.split(', ')\
155
+ .str[0].apply(get_region)
156
+ # perform one hot encoding
157
+ location_encoder = OneHotEncoder()
158
+
159
+ # fit and transform the location column
160
+ location_encoded = location_encoder.fit_transform\
161
+ (tinder_df[['location']])
162
+
163
+ # create a new DataFrame with the encoded columns
164
+ location_encoded_df = pd.DataFrame(location_encoded.toarray()\
165
+ , columns=location_encoder.\
166
+ get_feature_names_out(['location']))
167
+
168
+ # concatenate the new DataFrame with the original DataFrame
169
+ tinder_df = pd.concat([tinder_df, location_encoded_df], axis=1)
170
+ # Drop the existing location column
171
+ tinder_df.drop(["location"], axis=1, inplace=True)
172
+
173
+ # create a LabelEncoder object
174
+ job_encoder = LabelEncoder()
175
+
176
+ # fit the encoder on the job column
177
+ job_encoder.fit(tinder_df['job'])
178
+
179
+ # encode the job column using the fitted encoder
180
+ tinder_df['job_encoded'] = job_encoder.\
181
+ transform(tinder_df['job'])
182
+
183
+ # drop the original job column
184
+ tinder_df.drop('job', axis=1, inplace=True)
185
+
186
+ smokes = {
187
+ 'no': 1.0,
188
+ 'sometimes': 0,
189
+ 'yes': 0,
190
+ 'when drinking':0,
191
+ 'trying to quit':0
192
+ }
193
+ tinder_df['smokes'] = tinder_df['smokes']\
194
+ .apply(lambda x: smokes[x])
195
+
196
+ bin_enc = ce.BinaryEncoder(cols=['pets'])
197
+
198
+ # fit and transform the pet column
199
+ pet_enc = bin_enc.fit_transform(tinder_df['pets'])
200
+
201
+ # add the encoded columns to the original dataframe
202
+ tinder_df = pd.concat([tinder_df, pet_enc], axis=1)
203
+
204
+ tinder_df.drop("pets",axis=1,inplace = True)
205
+
206
+ # create a LabelEncoder object
207
+ location_encoder = LabelEncoder()
208
+
209
+ # fit the encoder on the job column
210
+ location_encoder.fit(tinder_df['new_languages'])
211
+
212
+ # encode the job column using the fitted encoder
213
+ tinder_df['new_languages'] = location_encoder.transform(
214
+ tinder_df['new_languages'])
215
+
216
+ # create an instance of LabelEncoder
217
+ le = LabelEncoder()
218
+
219
+ # encode the body_profile column
220
+ tinder_df["body_profile"] = le.fit_transform(tinder_df["body_profile"])
221
+
222
+ # Initialize TfidfVectorizer object
223
+ tfidf = TfidfVectorizer(stop_words='english')
224
+
225
+ # Fit and transform the text data
226
+ tfidf_matrix = tfidf.fit_transform(tinder_df['bio'])
227
+
228
+ # Get the feature names from the TfidfVectorizer object
229
+ feature_names = tfidf.vocabulary_
230
+
231
+ # Convert tfidf matrix to DataFrame
232
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(),
233
+ columns=feature_names)
234
+
235
+ # Add non-text features to the tfidf_df dataframe
236
+ tinder_dfs = tinder_df.drop(["bio", "user_id",
237
+ "username"], axis=1)
238
+ tinder_dfs = pd.concat([tinder_dfs,
239
+ tfidf_df], axis=1)
240
+
241
+ # Apply SVD to the feature matrix
242
+ svd = TruncatedSVD(n_components=100)
243
+ #svd = TruncatedSVD()
244
+
245
+ svd_matrix = svd.fit_transform(tinder_dfs)
246
+
247
+
248
+
249
+ # Calculate the cosine similarity
250
+ # between all pairs of users
251
+ cosine_sim = cosine_similarity(svd_matrix)
252
+
253
+ def recommend(user_df, num_recommendations=5):
254
+
255
+ # Apply SVD to the feature
256
+ # matrix of the user_df dataframe
257
+ svd_matrixs = svd.transform(user_df)
258
+
259
+ # Calculate the cosine similarity
260
+ # between the user_df and training set users
261
+ cosine_sim_new = cosine_similarity(svd_matrixs, svd_matrix)
262
+
263
+ # Get the indices of the top
264
+ # num_recommendations similar users
265
+ sim_scores = list(enumerate(cosine_sim_new[0]))
266
+ sim_scores = sorted(sim_scores,
267
+ key=lambda x: x[1], reverse=True)
268
+ sim_indices = [i[0] for i in
269
+ sim_scores[1:num_recommendations+1]]
270
+
271
+ # Return the user_ids of the recommended users
272
+ return tinder_df['username'].iloc[sim_indices]
273
+
274
+ # Setup complete!
275
+
276
  iface = gr.Interface(fn=greet, inputs=["text", "text"], outputs="text")
277
  iface.launch()
tinder_data.csv ADDED
The diff for this file is too large to render. See raw diff