productizationlabs
commited on
Commit
·
b6c196f
1
Parent(s):
a92bb84
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
from nltk.corpus import stopwords
|
5 |
+
from nltk.tokenize import word_tokenize
|
6 |
+
from nltk.stem.wordnet import WordNetLemmatizer
|
7 |
+
|
8 |
+
# Import the dataset
|
9 |
+
df = pd.read_csv('Hotel_Reviews.csv')
|
10 |
+
df['countries'] = df.Hotel_Address.apply(lambda x: x.split(' ')[-1])
|
11 |
+
|
12 |
+
# Define the function to recommend hotels
|
13 |
+
def Input_your_destination_and_description(location,description):
|
14 |
+
# Making these columns lowercase
|
15 |
+
df['countries']=df['countries'].str.lower()
|
16 |
+
df['Tags']=df['Tags'].str.lower()
|
17 |
+
|
18 |
+
# Dividing the texts into small tokens (sentences into words)
|
19 |
+
description = description.lower()
|
20 |
+
description_tokens=word_tokenize(description)
|
21 |
+
|
22 |
+
sw = stopwords.words('english') # List of predefined english stopwords to be used for computing
|
23 |
+
lemm = WordNetLemmatizer() # We now define the functions below connecting these imported packages
|
24 |
+
filtered_sen = {w for w in description_tokens if not w in sw}
|
25 |
+
f_set=set()
|
26 |
+
for fs in filtered_sen:
|
27 |
+
f_set.add(lemm.lemmatize(fs))
|
28 |
+
|
29 |
+
# Defining a new variable that takes in the location inputted and bring out the features defined below
|
30 |
+
country_feat = df[df['countries']==location.lower()]
|
31 |
+
country_feat = country_feat.set_index(np.arange(country_feat.shape[0]))
|
32 |
+
cos=[];
|
33 |
+
for i in range(country_feat.shape[0]):
|
34 |
+
temp_tokens=word_tokenize(country_feat['Tags'][i])
|
35 |
+
temp1_set={w for w in temp_tokens if not w in sw}
|
36 |
+
temp_set=set()
|
37 |
+
for se in temp1_set:
|
38 |
+
temp_set.add(lemm.lemmatize(se))
|
39 |
+
rvector = temp_set.intersection(f_set)
|
40 |
+
cos.append(len(rvector))
|
41 |
+
country_feat['similarity']=cos
|
42 |
+
country_feat=country_feat.sort_values(by='similarity',ascending=False)
|
43 |
+
country_feat.drop_duplicates(subset='Hotel_Name',keep='first',inplace=True)
|
44 |
+
country_feat.sort_values('Average_Score',ascending=False,inplace=True)
|
45 |
+
country_feat.reset_index(inplace=True)
|
46 |
+
return country_feat[['Hotel_Name','Average_Score','Hotel_Address']].head(10)
|
47 |
+
|
48 |
+
# Create the input interface
|
49 |
+
inputs = [gr.inputs.Textbox(label="Location"),
|
50 |
+
gr.inputs.Textbox(label="Purpose of Travel")]
|
51 |
+
|
52 |
+
# Create the output interface
|
53 |
+
outputs=gr.outputs.Dataframe(label="Hotel Recommendations",type="pandas")
|
54 |
+
|
55 |
+
# Create the interface
|
56 |
+
gr.Interface(fn=Input_your_destination_and_description,
|
57 |
+
inputs=inputs,
|
58 |
+
outputs=outputs).launch()
|
59 |
+
|