File size: 2,520 Bytes
b6c196f
 
 
 
 
 
6359f91
 
4a09375
 
92c0e67
4a09375
b6c196f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr
import numpy as np
import pandas as pd
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize
from nltk.stem.wordnet import WordNetLemmatizer
import nltk

nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

# Import the dataset
df = pd.read_csv('Hotel_Reviews.csv')
df['countries'] = df.Hotel_Address.apply(lambda x: x.split(' ')[-1])

# Define the function to recommend hotels
def Input_your_destination_and_description(location,description):
    # Making these columns lowercase
    df['countries']=df['countries'].str.lower()
    df['Tags']=df['Tags'].str.lower()
    
    # Dividing the texts into small tokens (sentences into words)
    description = description.lower()
    description_tokens=word_tokenize(description)  
    
    sw = stopwords.words('english') # List of predefined english  stopwords to be used for computing
    lemm = WordNetLemmatizer() # We now define the functions below connecting these imported packages
    filtered_sen = {w for w in description_tokens if not w in sw}
    f_set=set()
    for fs in filtered_sen:
        f_set.add(lemm.lemmatize(fs))
    
    # Defining a new variable that takes in the location inputted and bring out the features defined below
    country_feat = df[df['countries']==location.lower()]
    country_feat = country_feat.set_index(np.arange(country_feat.shape[0]))
    cos=[];
    for i in range(country_feat.shape[0]):
        temp_tokens=word_tokenize(country_feat['Tags'][i])
        temp1_set={w for w in temp_tokens if not w in sw}
        temp_set=set()
        for se in temp1_set:
            temp_set.add(lemm.lemmatize(se))
        rvector = temp_set.intersection(f_set)
        cos.append(len(rvector))
    country_feat['similarity']=cos
    country_feat=country_feat.sort_values(by='similarity',ascending=False)
    country_feat.drop_duplicates(subset='Hotel_Name',keep='first',inplace=True)
    country_feat.sort_values('Average_Score',ascending=False,inplace=True)
    country_feat.reset_index(inplace=True)
    return country_feat[['Hotel_Name','Average_Score','Hotel_Address']].head(10)

# Create the input interface
inputs = [gr.inputs.Textbox(label="Location"), 
          gr.inputs.Textbox(label="Purpose of Travel")]

# Create the output interface
outputs=gr.outputs.Dataframe(label="Hotel Recommendations",type="pandas")

# Create the interface
gr.Interface(fn=Input_your_destination_and_description, 
             inputs=inputs, 
             outputs=outputs).launch()