saritha5 commited on
Commit
d600cc0
·
1 Parent(s): f18f66d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -0
app.py CHANGED
@@ -11,3 +11,27 @@ df = pd.read_csv(input_datapath, index_col=0)
11
 
12
  st.title("Semanti Search")
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  st.title("Semanti Search")
13
 
14
+
15
+ #adding another column having the summary as title and the actual text as content
16
+ df["combined"] = (
17
+ "Title: " + df.Summary.str.strip() + "; Content: " + df.Text.str.strip()
18
+ )
19
+
20
+
21
+ # embedding model parameters
22
+ embedding_model = "text-embedding-ada-002"
23
+ embedding_encoding = "cl100k_base" # this the encoding for text-embedding-ada-002
24
+ max_tokens = 8000 # the maximum for text-embedding-ada-002 is 8191
25
+
26
+
27
+ encoding = tiktoken.get_encoding(embedding_encoding)
28
+ top_n = 500
29
+ # omit reviews that are too long to embed
30
+ df["n_tokens"] = df.combined.apply(lambda x: len(encoding.encode(x)))
31
+ df = df[df.n_tokens <= max_tokens].tail(top_n)
32
+
33
+
34
+ datafile_path = "fine_food_reviews_with_embeddings_1k.csv"
35
+ df = pd.read_csv(datafile_path)
36
+ df["embedding"] = df.embedding.apply(eval).apply(np.array)
37
+