Spaces:

ThirdEyeData
/

Semantic-Search

Runtime error

Semantic-Search / app.py

Update app.py

f637ac1 over 2 years ago

1.27 kB

	import pandas as pd
	import tiktoken
	import os
	import openai
	from openai.embeddings_utils import get_embedding, cosine_similarity
	import numpy as np
	import streamlit as st

	input_datapath = "fine_food_reviews_with_embeddings_1k.csv"
	df = pd.read_csv(input_datapath, index_col=0)

	st.title("Semanti Search")


	#adding another column having the summary as title and the actual text as content
	df["combined"] = (
	"Title: " + df.Summary.str.strip() + "; Content: " + df.Text.str.strip()
	)


	# embedding model parameters
	embedding_model = "text-embedding-ada-002"
	embedding_encoding = "cl100k_base" # this the encoding for text-embedding-ada-002
	max_tokens = 8000 # the maximum for text-embedding-ada-002 is 8191


	encoding = tiktoken.get_encoding(embedding_encoding)
	top_n = 500
	# omit reviews that are too long to embed
	df["n_tokens"] = df.combined.apply(lambda x: len(encoding.encode(x)))
	df = df[df.n_tokens <= max_tokens].tail(top_n)


	datafile_path = "fine_food_reviews_with_embeddings_1k.csv"
	df = pd.read_csv(datafile_path)
	df["embedding"] = df.embedding.apply(eval).apply(np.array)

	prompt = input("What do you want to search for? : ")
	top_n = int(input("How many results do you want to see? : "))
	print()
	results,product = search_reviews(df, prompt, top_n)