chaaim123 commited on
Commit
0ec9770
Β·
verified Β·
1 Parent(s): ec1fd34

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -0
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sentence_transformers import SentenceTransformer
3
+ import gradio as gr
4
+ import spacy
5
+
6
+ import subprocess
7
+
8
+ # Run the spacy model download command
9
+
10
+ # try:
11
+ # Try to load the model to check if it's already installed
12
+ # nlp = spacy.load("en_core_web_trf")
13
+ # except OSError:
14
+ # If the model is not found, download it
15
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_trf"])
16
+ nlp = spacy.load("en_core_web_trf")
17
+
18
+ model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
19
+
20
+ df_new = pd.read_csv('last_df.csv')
21
+
22
+
23
+ df_new['country'] = df_new['country'].replace('TΓΌrkiye', 'Turkey')
24
+ #
25
+ #
26
+
27
+ # Function to extract city name from the query
28
+ def get_city_name(query):
29
+ text_query = nlp(query)
30
+ for city in text_query.ents:
31
+ if city.label_ == "GPE":
32
+ return city.text.lower()
33
+ return None
34
+
35
+ # Function to filter DataFrame by location
36
+ def filter_by_loc(query):
37
+ city_name = get_city_name(query)
38
+ if city_name in df_new['locality'].str.lower().unique():
39
+ filtered_df = df_new[df_new['locality'].str.lower() == city_name.lower()]
40
+ return filtered_df
41
+ else:
42
+ return df_new
43
+
44
+
45
+
46
+ import torch.nn as nn
47
+ import torch
48
+ import ast
49
+
50
+
51
+
52
+ # Function to calculate similarity score
53
+ def get_similarity_score(row, query_embedding):
54
+ similarity = nn.CosineSimilarity(dim=0) # Use dim=0 for 1D tensors
55
+
56
+ # Safely evaluate string representations of lists
57
+ rating_value_embedding = torch.tensor(ast.literal_eval(row['rating_value_embedding']))
58
+ hotel_combined_embedding = torch.tensor(ast.literal_eval(row['hotel_combined_embedding']))
59
+ review_embedding = torch.tensor(ast.literal_eval(row['review_embedding']))
60
+
61
+ sim1 = similarity(rating_value_embedding, query_embedding)
62
+ sim2 = similarity(hotel_combined_embedding, query_embedding)
63
+ sim3 = similarity(review_embedding, query_embedding)
64
+
65
+ return sim1.item() + sim2.item() + sim3.item()
66
+
67
+ # Main function to process the query and return results
68
+ def process_query(query):
69
+
70
+ query_embedding = model.encode(query)
71
+
72
+ # Filter DataFrame by location
73
+ filtered_data = filter_by_loc(query)
74
+
75
+ # Convert query_embedding to a tensor if it is not already
76
+ query_embedding_tensor = torch.tensor(query_embedding)
77
+
78
+ # Apply the similarity function to the filtered DataFrame
79
+ filtered_data['similarity_score'] = filtered_data.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1)
80
+
81
+ # df_new['similarity_score'] = df_new.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1)
82
+
83
+
84
+ top_similar = filtered_data.sort_values('similarity_score', ascending=False).head(1)
85
+
86
+
87
+ hotel_name = top_similar['hotel_name'].values[0]
88
+ hotel_description = top_similar['hotel_description'].values[0]
89
+ hotel_rate = top_similar['rate'].values[0]
90
+ hotel_price_range = top_similar['price_range'].values[0]
91
+ hotel_review = top_similar['review_title'].values[0]
92
+ hotel_city = top_similar['locality'].values[0]
93
+ hotel_country = top_similar['country'].values[0]
94
+
95
+ # Format the output
96
+
97
+ result = "Here's the most similar hotel we found:\n"
98
+ result += "-" * 30 + "\n"
99
+ result += f"Hotel Name: {hotel_name}\n"
100
+ result += f"City: {hotel_city}\n"
101
+ result += f"Country: {hotel_country}\n"
102
+ result += f"Star Rating: {hotel_rate}\n"
103
+ result += f"Price Range: {hotel_price_range}\n"
104
+
105
+ return result
106
+
107
+
108
+
109
+
110
+
111
+ ui = gr.Interface(
112
+ fn=process_query,
113
+ inputs=gr.Textbox(label="Query", placeholder="Enter your query"),
114
+ outputs="text",
115
+ title="Hotel Similarity Finder",
116
+ description="Enter a query to find similar hotels."
117
+ )
118
+
119
+ ui.launch()
120
+