Spaces:

minishlab
/

semantic-deduplication

Running

Pringled commited on Oct 12, 2024

Commit

4f9641d

1 Parent(s): 24f1526

Updates

Files changed (1) hide show

app.py CHANGED Viewed

@@ -122,6 +122,11 @@ def display_word_differences(x: str, y: str) -> str:
     diff = ndiff(x.split(), y.split())
     return " ".join([word for word in diff if word.startswith(("+", "-"))])
 def perform_deduplication(
     deduplication_type,
     dataset1_name,
@@ -159,7 +164,8 @@ def perform_deduplication(
             # Compute embeddings
             status = "Computing embeddings for Dataset 1..."
             yield status, ""
-            embedding_matrix = model.encode(texts, show_progressbar=True)
             # embedding_matrix = compute_embeddings(
             #     texts,
             #     batch_size=64,

     diff = ndiff(x.split(), y.split())
     return " ".join([word for word in diff if word.startswith(("+", "-"))])
+def encode_texts(texts, progress=None):
+    embedding_matrix = model.encode(texts, show_progressbar=False)
+    return embedding_matrix
 def perform_deduplication(
     deduplication_type,
     dataset1_name,
             # Compute embeddings
             status = "Computing embeddings for Dataset 1..."
             yield status, ""
+            embedding_matrix = encode_texts(texts, progress=progress)
+            #embedding_matrix = model.encode(texts, show_progressbar=True)
             # embedding_matrix = compute_embeddings(
             #     texts,
             #     batch_size=64,