Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -11,3 +11,27 @@ df = pd.read_csv(input_datapath, index_col=0)
|
|
11 |
|
12 |
st.title("Semanti Search")
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
st.title("Semanti Search")
|
13 |
|
14 |
+
|
15 |
+
#adding another column having the summary as title and the actual text as content
|
16 |
+
df["combined"] = (
|
17 |
+
"Title: " + df.Summary.str.strip() + "; Content: " + df.Text.str.strip()
|
18 |
+
)
|
19 |
+
|
20 |
+
|
21 |
+
# embedding model parameters
|
22 |
+
embedding_model = "text-embedding-ada-002"
|
23 |
+
embedding_encoding = "cl100k_base" # this the encoding for text-embedding-ada-002
|
24 |
+
max_tokens = 8000 # the maximum for text-embedding-ada-002 is 8191
|
25 |
+
|
26 |
+
|
27 |
+
encoding = tiktoken.get_encoding(embedding_encoding)
|
28 |
+
top_n = 500
|
29 |
+
# omit reviews that are too long to embed
|
30 |
+
df["n_tokens"] = df.combined.apply(lambda x: len(encoding.encode(x)))
|
31 |
+
df = df[df.n_tokens <= max_tokens].tail(top_n)
|
32 |
+
|
33 |
+
|
34 |
+
datafile_path = "fine_food_reviews_with_embeddings_1k.csv"
|
35 |
+
df = pd.read_csv(datafile_path)
|
36 |
+
df["embedding"] = df.embedding.apply(eval).apply(np.array)
|
37 |
+
|