Spaces:

chaaim123
/

Similarity_search

Runtime error

App Files Files Community

chaaim123 commited on May 7

Commit

0ec9770

verified ·

1 Parent(s): ec1fd34

Create app.py

Browse files

Files changed (1) hide show

app.py +120 -0

app.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import pandas as pd
+from sentence_transformers import SentenceTransformer
+import gradio as gr
+import spacy
+import subprocess
+# Run the spacy model download command
+# try:
+    # Try to load the model to check if it's already installed
+    # nlp = spacy.load("en_core_web_trf")
+# except OSError:
+    # If the model is not found, download it
+subprocess.run(["python", "-m", "spacy", "download", "en_core_web_trf"])
+nlp = spacy.load("en_core_web_trf")
+model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
+df_new = pd.read_csv('last_df.csv')
+df_new['country'] = df_new['country'].replace('Türkiye', 'Turkey')
+#
+#
+# Function to extract city name from the query
+def get_city_name(query):
+    text_query = nlp(query)
+    for city in text_query.ents:
+        if city.label_ == "GPE":
+            return city.text.lower()
+    return None
+# Function to filter DataFrame by location
+def filter_by_loc(query):
+    city_name = get_city_name(query)
+    if city_name in df_new['locality'].str.lower().unique():
+        filtered_df = df_new[df_new['locality'].str.lower() == city_name.lower()]
+        return filtered_df
+    else:
+        return df_new
+import torch.nn as nn
+import torch
+import ast
+# Function to calculate similarity score
+def get_similarity_score(row, query_embedding):
+    similarity = nn.CosineSimilarity(dim=0)  # Use dim=0 for 1D tensors
+    # Safely evaluate string representations of lists
+    rating_value_embedding = torch.tensor(ast.literal_eval(row['rating_value_embedding']))
+    hotel_combined_embedding = torch.tensor(ast.literal_eval(row['hotel_combined_embedding']))
+    review_embedding = torch.tensor(ast.literal_eval(row['review_embedding']))
+    sim1 = similarity(rating_value_embedding, query_embedding)
+    sim2 = similarity(hotel_combined_embedding, query_embedding)
+    sim3 = similarity(review_embedding, query_embedding)
+    return sim1.item() + sim2.item() + sim3.item()
+# Main function to process the query and return results
+def process_query(query):
+    query_embedding = model.encode(query)
+    # Filter DataFrame by location
+    filtered_data = filter_by_loc(query)
+    # Convert query_embedding to a tensor if it is not already
+    query_embedding_tensor = torch.tensor(query_embedding)
+    # Apply the similarity function to the filtered DataFrame
+    filtered_data['similarity_score'] = filtered_data.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1)
+    # df_new['similarity_score'] = df_new.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1)
+    top_similar = filtered_data.sort_values('similarity_score', ascending=False).head(1)
+    hotel_name = top_similar['hotel_name'].values[0]
+    hotel_description = top_similar['hotel_description'].values[0]
+    hotel_rate = top_similar['rate'].values[0]
+    hotel_price_range = top_similar['price_range'].values[0]
+    hotel_review = top_similar['review_title'].values[0]
+    hotel_city = top_similar['locality'].values[0]
+    hotel_country = top_similar['country'].values[0]
+    # Format the output
+    result = "Here's the most similar hotel we found:\n"
+    result += "-" * 30 + "\n"
+    result += f"Hotel Name: {hotel_name}\n"
+    result += f"City: {hotel_city}\n"
+    result += f"Country: {hotel_country}\n"
+    result += f"Star Rating: {hotel_rate}\n"
+    result += f"Price Range: {hotel_price_range}\n"
+    return result
+ui = gr.Interface(
+    fn=process_query,
+    inputs=gr.Textbox(label="Query", placeholder="Enter your query"),
+    outputs="text",
+    title="Hotel Similarity Finder",
+    description="Enter a query to find similar hotels."
+)
+ui.launch()