File size: 5,454 Bytes
5840396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import streamlit as st
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Load precomputed embeddings
df_with_embeddings = pd.read_pickle('df_with_embeddings.pkl')

# Load the SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

def get_user_input():
    companions = st.selectbox("Who are you traveling with?", options=["solo", "couple", "family"])

    if companions == "solo":
        num_people = 1
    elif companions == "couple":
        num_people = 2
    elif companions == "family":
        num_people = st.number_input("Enter the number of people:", min_value=1, step=1)
    
    budget = st.number_input("Enter your budget per person:", min_value=0.0, step=0.01)
    days_of_lodging = st.number_input("Enter the number of days of lodging:", min_value=1, step=1)
    preferred_weather = st.selectbox("Enter preferred weather:", options=["Sunny", "Rainy", "Snowy"])

    return budget, num_people, companions, days_of_lodging, preferred_weather

def encode_user_input(user_input):
    user_description = f"budget {user_input[0]} companions {user_input[2]} days {user_input[3]} weather {user_input[4]}"
    # Encode user description and return it as a tensor
    user_embedding = model.encode(user_description, convert_to_tensor=True)
    return user_embedding

def recommend_destinations(user_input, df):
    # Get device (use GPU if available, else fallback to CPU)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Encode user input and move to the correct device
    user_embedding = encode_user_input(user_input).to(device)
    
    # Compute cosine similarity between user_embedding and each row's embedding in df
    df['similarity'] = df['embedding'].apply(lambda x: util.pytorch_cos_sim(user_embedding, x.to(device)).item())

    # Sort by similarity and return the top 5 recommendations
    recommendations = df.sort_values(by='similarity', ascending=False).drop_duplicates(subset='Primary').head(5)

    return recommendations[['Primary', 'per_person_price', 'Topography', 'Temprature', 'Weather', 'Mood']]

def display_package_details(selection, df):
    selected_row = df.loc[df['Primary'] == selection]
    if not selected_row.empty:
        st.write(f"*Package Name:* {selected_row['package_name'].values[0]}")
        st.write(f"*Itinerary:* {selected_row['itinerary'].values[0]}")
        st.write(f"*Sightseeing Places Covered:* {selected_row['sightseeing_places_covered'].values[0]}")
    else:
        st.write("Invalid selection. No package found.")

def evaluate_model(df, model):
    # Get the correct device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Split the data into train and test sets
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
    
    # Encode the descriptions and move them to the appropriate device
    train_embeddings = model.encode(train_df['description'].tolist(), convert_to_tensor=True).to(device)
    test_embeddings = model.encode(test_df['description'].tolist(), convert_to_tensor=True).to(device)
    
    # Function to get the most similar label from the training set for a given test embedding
    def get_most_similar_label(test_embedding, train_embeddings, train_labels):
        similarities = util.pytorch_cos_sim(test_embedding, train_embeddings)
        most_similar_idx = similarities.argmax().item()
        return train_labels[most_similar_idx]
    
    # Predict labels for the test set
    predicted_labels = [get_most_similar_label(embed, train_embeddings, train_df['Primary'].tolist()) for embed in test_embeddings]
    
    # Calculate accuracy metrics
    accuracy = accuracy_score(test_df['Primary'], predicted_labels)
    precision = precision_score(test_df['Primary'], predicted_labels, average='weighted')
    recall = recall_score(test_df['Primary'], predicted_labels, average='weighted')
    f1 = f1_score(test_df['Primary'], predicted_labels, average='weighted')
    
    return accuracy, precision, recall, f1

# Streamlit app
st.title("Travel Recommendation System")

st.write("Please provide your travel preferences below:")

user_input = get_user_input()

if st.button("Get Recommendations"):
    recommendations = recommend_destinations(user_input, df_with_embeddings)
    st.write("Top recommended destinations for you:")
    st.session_state.recommendations = recommendations
    st.dataframe(recommendations)

if 'recommendations' in st.session_state:
    primary_selection = st.selectbox("Select a package to view details", options=st.session_state.recommendations['Primary'].tolist())
    if st.button("View Details"):
        st.session_state.selected_package = primary_selection

if 'selected_package' in st.session_state:
    st.write(f"Details for {st.session_state.selected_package}:")
    display_package_details(st.session_state.selected_package, df_with_embeddings)

if st.button("Evaluate Model Accuracy"):
    accuracy, precision, recall, f1 = evaluate_model(df_with_embeddings, model)
    st.write(f'Accuracy: {accuracy}')
    st.write(f'Precision: {precision}')
    st.write(f'Recall: {recall}')
    st.write(f'F1 Score: {f1}')