Harsh1306's picture
Upload 4 files
5840396 verified
import streamlit as st
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
# Load precomputed embeddings
df_with_embeddings = pd.read_pickle('df_with_embeddings.pkl')
# Load the SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')
def get_user_input():
companions = st.selectbox("Who are you traveling with?", options=["solo", "couple", "family"])
if companions == "solo":
num_people = 1
elif companions == "couple":
num_people = 2
elif companions == "family":
num_people = st.number_input("Enter the number of people:", min_value=1, step=1)
budget = st.number_input("Enter your budget per person:", min_value=0.0, step=0.01)
days_of_lodging = st.number_input("Enter the number of days of lodging:", min_value=1, step=1)
preferred_weather = st.selectbox("Enter preferred weather:", options=["Sunny", "Rainy", "Snowy"])
return budget, num_people, companions, days_of_lodging, preferred_weather
def encode_user_input(user_input):
user_description = f"budget {user_input[0]} companions {user_input[2]} days {user_input[3]} weather {user_input[4]}"
# Encode user description and return it as a tensor
user_embedding = model.encode(user_description, convert_to_tensor=True)
return user_embedding
def recommend_destinations(user_input, df):
# Get device (use GPU if available, else fallback to CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Encode user input and move to the correct device
user_embedding = encode_user_input(user_input).to(device)
# Compute cosine similarity between user_embedding and each row's embedding in df
df['similarity'] = df['embedding'].apply(lambda x: util.pytorch_cos_sim(user_embedding, x.to(device)).item())
# Sort by similarity and return the top 5 recommendations
recommendations = df.sort_values(by='similarity', ascending=False).drop_duplicates(subset='Primary').head(5)
return recommendations[['Primary', 'per_person_price', 'Topography', 'Temprature', 'Weather', 'Mood']]
def display_package_details(selection, df):
selected_row = df.loc[df['Primary'] == selection]
if not selected_row.empty:
st.write(f"*Package Name:* {selected_row['package_name'].values[0]}")
st.write(f"*Itinerary:* {selected_row['itinerary'].values[0]}")
st.write(f"*Sightseeing Places Covered:* {selected_row['sightseeing_places_covered'].values[0]}")
else:
st.write("Invalid selection. No package found.")
def evaluate_model(df, model):
# Get the correct device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Split the data into train and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
# Encode the descriptions and move them to the appropriate device
train_embeddings = model.encode(train_df['description'].tolist(), convert_to_tensor=True).to(device)
test_embeddings = model.encode(test_df['description'].tolist(), convert_to_tensor=True).to(device)
# Function to get the most similar label from the training set for a given test embedding
def get_most_similar_label(test_embedding, train_embeddings, train_labels):
similarities = util.pytorch_cos_sim(test_embedding, train_embeddings)
most_similar_idx = similarities.argmax().item()
return train_labels[most_similar_idx]
# Predict labels for the test set
predicted_labels = [get_most_similar_label(embed, train_embeddings, train_df['Primary'].tolist()) for embed in test_embeddings]
# Calculate accuracy metrics
accuracy = accuracy_score(test_df['Primary'], predicted_labels)
precision = precision_score(test_df['Primary'], predicted_labels, average='weighted')
recall = recall_score(test_df['Primary'], predicted_labels, average='weighted')
f1 = f1_score(test_df['Primary'], predicted_labels, average='weighted')
return accuracy, precision, recall, f1
# Streamlit app
st.title("Travel Recommendation System")
st.write("Please provide your travel preferences below:")
user_input = get_user_input()
if st.button("Get Recommendations"):
recommendations = recommend_destinations(user_input, df_with_embeddings)
st.write("Top recommended destinations for you:")
st.session_state.recommendations = recommendations
st.dataframe(recommendations)
if 'recommendations' in st.session_state:
primary_selection = st.selectbox("Select a package to view details", options=st.session_state.recommendations['Primary'].tolist())
if st.button("View Details"):
st.session_state.selected_package = primary_selection
if 'selected_package' in st.session_state:
st.write(f"Details for {st.session_state.selected_package}:")
display_package_details(st.session_state.selected_package, df_with_embeddings)
if st.button("Evaluate Model Accuracy"):
accuracy, precision, recall, f1 = evaluate_model(df_with_embeddings, model)
st.write(f'Accuracy: {accuracy}')
st.write(f'Precision: {precision}')
st.write(f'Recall: {recall}')
st.write(f'F1 Score: {f1}')