awacke1 commited on
Commit
86c4485
·
1 Parent(s): b827309

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -14,7 +14,7 @@ https://github.com/pinecone-io/examples/tree/master/learn/algos-and-libraries/be
14
  # data = load_dataset('jamescalam/python-reddit')
15
  data = load_dataset("awacke1/LOINC-Panels-and-Forms")
16
  data = data.filter(
17
- lambda x: True if len(x['selftext']) > 30 else 0
18
  )
19
  from bertopic import BERTopic
20
  from sklearn.feature_extraction.text import CountVectorizer
@@ -46,7 +46,7 @@ embeds = np.zeros((n, model.get_sentence_embedding_dimension()))
46
 
47
  for i in tqdm(range(0, n, batch_size)):
48
  i_end = min(i+batch_size, n)
49
- batch = data['selftext'][i:i_end]
50
  batch_embed = model.encode(batch)
51
  embeds[i:i_end,:] = batch_embed
52
 
 
14
  # data = load_dataset('jamescalam/python-reddit')
15
  data = load_dataset("awacke1/LOINC-Panels-and-Forms")
16
  data = data.filter(
17
+ lambda x: True if len(x[0]) > 30 else 0
18
  )
19
  from bertopic import BERTopic
20
  from sklearn.feature_extraction.text import CountVectorizer
 
46
 
47
  for i in tqdm(range(0, n, batch_size)):
48
  i_end = min(i+batch_size, n)
49
+ batch = data[0][i:i_end]
50
  batch_embed = model.encode(batch)
51
  embeds[i:i_end,:] = batch_embed
52