Pringled commited on
Commit
4f9641d
·
1 Parent(s): 24f1526
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -122,6 +122,11 @@ def display_word_differences(x: str, y: str) -> str:
122
  diff = ndiff(x.split(), y.split())
123
  return " ".join([word for word in diff if word.startswith(("+", "-"))])
124
 
 
 
 
 
 
125
  def perform_deduplication(
126
  deduplication_type,
127
  dataset1_name,
@@ -159,7 +164,8 @@ def perform_deduplication(
159
  # Compute embeddings
160
  status = "Computing embeddings for Dataset 1..."
161
  yield status, ""
162
- embedding_matrix = model.encode(texts, show_progressbar=True)
 
163
  # embedding_matrix = compute_embeddings(
164
  # texts,
165
  # batch_size=64,
 
122
  diff = ndiff(x.split(), y.split())
123
  return " ".join([word for word in diff if word.startswith(("+", "-"))])
124
 
125
+
126
+ def encode_texts(texts, progress=None):
127
+ embedding_matrix = model.encode(texts, show_progressbar=False)
128
+ return embedding_matrix
129
+
130
  def perform_deduplication(
131
  deduplication_type,
132
  dataset1_name,
 
164
  # Compute embeddings
165
  status = "Computing embeddings for Dataset 1..."
166
  yield status, ""
167
+ embedding_matrix = encode_texts(texts, progress=progress)
168
+ #embedding_matrix = model.encode(texts, show_progressbar=True)
169
  # embedding_matrix = compute_embeddings(
170
  # texts,
171
  # batch_size=64,