Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,7 @@ embedding_model = SentenceTransformer("thenlper/gte-large")
|
|
11 |
|
12 |
# Example dataset with genres (replace with your actual data)
|
13 |
dataset = load_dataset("hugginglearners/netflix-shows")
|
|
|
14 |
data = dataset['train'] # Accessing the 'train' split of the dataset
|
15 |
|
16 |
# Convert the dataset to a list of dictionaries for easier indexing
|
@@ -29,7 +30,7 @@ def vector_search(query):
|
|
29 |
query_embedding = get_embedding(query)
|
30 |
|
31 |
# Generate embeddings for the combined description and genre
|
32 |
-
embeddings = np.array([get_embedding(combine_description_title_and_genre(item["description"], item["listed_in"],item["title"])) for item in data_list])
|
33 |
|
34 |
# Calculate cosine similarity between the query and all embeddings
|
35 |
similarities = cosine_similarity([query_embedding], embeddings)
|
|
|
11 |
|
12 |
# Example dataset with genres (replace with your actual data)
|
13 |
dataset = load_dataset("hugginglearners/netflix-shows")
|
14 |
+
dataset = dataset[0].filter(lambda x: x['description'] is not None and x['listed_in'] is not None and x['title'] is not None)
|
15 |
data = dataset['train'] # Accessing the 'train' split of the dataset
|
16 |
|
17 |
# Convert the dataset to a list of dictionaries for easier indexing
|
|
|
30 |
query_embedding = get_embedding(query)
|
31 |
|
32 |
# Generate embeddings for the combined description and genre
|
33 |
+
embeddings = np.array([get_embedding(combine_description_title_and_genre(item["description"], item["listed_in"],item["title"])) for item in data_list[0]])
|
34 |
|
35 |
# Calculate cosine similarity between the query and all embeddings
|
36 |
similarities = cosine_similarity([query_embedding], embeddings)
|