Updates
Browse files
app.py
CHANGED
@@ -122,6 +122,11 @@ def display_word_differences(x: str, y: str) -> str:
|
|
122 |
diff = ndiff(x.split(), y.split())
|
123 |
return " ".join([word for word in diff if word.startswith(("+", "-"))])
|
124 |
|
|
|
|
|
|
|
|
|
|
|
125 |
def perform_deduplication(
|
126 |
deduplication_type,
|
127 |
dataset1_name,
|
@@ -159,7 +164,8 @@ def perform_deduplication(
|
|
159 |
# Compute embeddings
|
160 |
status = "Computing embeddings for Dataset 1..."
|
161 |
yield status, ""
|
162 |
-
embedding_matrix =
|
|
|
163 |
# embedding_matrix = compute_embeddings(
|
164 |
# texts,
|
165 |
# batch_size=64,
|
|
|
122 |
diff = ndiff(x.split(), y.split())
|
123 |
return " ".join([word for word in diff if word.startswith(("+", "-"))])
|
124 |
|
125 |
+
|
126 |
+
def encode_texts(texts, progress=None):
|
127 |
+
embedding_matrix = model.encode(texts, show_progressbar=False)
|
128 |
+
return embedding_matrix
|
129 |
+
|
130 |
def perform_deduplication(
|
131 |
deduplication_type,
|
132 |
dataset1_name,
|
|
|
164 |
# Compute embeddings
|
165 |
status = "Computing embeddings for Dataset 1..."
|
166 |
yield status, ""
|
167 |
+
embedding_matrix = encode_texts(texts, progress=progress)
|
168 |
+
#embedding_matrix = model.encode(texts, show_progressbar=True)
|
169 |
# embedding_matrix = compute_embeddings(
|
170 |
# texts,
|
171 |
# batch_size=64,
|