Spaces:
Runtime error
Runtime error
File size: 1,269 Bytes
0d178f3 5ca010f 0d178f3 d600cc0 f637ac1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import pandas as pd
import tiktoken
import os
import openai
from openai.embeddings_utils import get_embedding, cosine_similarity
import numpy as np
import streamlit as st
input_datapath = "fine_food_reviews_with_embeddings_1k.csv"
df = pd.read_csv(input_datapath, index_col=0)
st.title("Semanti Search")
#adding another column having the summary as title and the actual text as content
df["combined"] = (
"Title: " + df.Summary.str.strip() + "; Content: " + df.Text.str.strip()
)
# embedding model parameters
embedding_model = "text-embedding-ada-002"
embedding_encoding = "cl100k_base" # this the encoding for text-embedding-ada-002
max_tokens = 8000 # the maximum for text-embedding-ada-002 is 8191
encoding = tiktoken.get_encoding(embedding_encoding)
top_n = 500
# omit reviews that are too long to embed
df["n_tokens"] = df.combined.apply(lambda x: len(encoding.encode(x)))
df = df[df.n_tokens <= max_tokens].tail(top_n)
datafile_path = "fine_food_reviews_with_embeddings_1k.csv"
df = pd.read_csv(datafile_path)
df["embedding"] = df.embedding.apply(eval).apply(np.array)
prompt = input("What do you want to search for? : ")
top_n = int(input("How many results do you want to see? : "))
print()
results,product = search_reviews(df, prompt, top_n)
|