debug hide resolve conflicts
Browse files
utils.py
CHANGED
@@ -41,29 +41,28 @@ def align_text(txt1: str, txt2: str, lang1: str, lang2: str) -> pd.DataFrame:
|
|
41 |
normalize_embeddings=True,
|
42 |
show_progress_bar=True)
|
43 |
|
44 |
-
conflicts_to_solve, rest = resolver.get_all_conflicts(db_path, min_chain_length=2, max_conflicts_len=6, batch_id=-1)
|
45 |
-
resolver.get_statistics(conflicts_to_solve)
|
46 |
-
resolver.get_statistics(rest)
|
47 |
|
48 |
steps = 3
|
49 |
batch_id = -1
|
50 |
|
51 |
-
for i in range(steps):
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
|
56 |
-
if len(rest) == 0:
|
57 |
-
|
58 |
|
59 |
paragraphs_dict, par_ids, meta_info, sent_counter_dict = reader.get_paragraphs(db_path)
|
60 |
-
|
61 |
-
paragraphs_from = paragraphs_dict[lang1]
|
62 |
-
paragraphs_to = paragraphs_dict[lang2]
|
63 |
-
|
64 |
# Debug prints to understand the structure
|
65 |
-
print("
|
66 |
-
|
|
|
|
|
67 |
|
68 |
data = []
|
69 |
for from_paragraph, to_paragraph in zip(paragraphs_from, paragraphs_to):
|
@@ -74,6 +73,7 @@ def align_text(txt1: str, txt2: str, lang1: str, lang2: str) -> pd.DataFrame:
|
|
74 |
for from_line, to_line in zip(from_paragraph, to_paragraph):
|
75 |
data.append({"From": from_line, "To": to_line})
|
76 |
|
|
|
77 |
df = pd.DataFrame(data)
|
78 |
return df
|
79 |
|
|
|
41 |
normalize_embeddings=True,
|
42 |
show_progress_bar=True)
|
43 |
|
44 |
+
#conflicts_to_solve, rest = resolver.get_all_conflicts(db_path, min_chain_length=2, max_conflicts_len=6, batch_id=-1)
|
45 |
+
#resolver.get_statistics(conflicts_to_solve)
|
46 |
+
#resolver.get_statistics(rest)
|
47 |
|
48 |
steps = 3
|
49 |
batch_id = -1
|
50 |
|
51 |
+
#for i in range(steps):
|
52 |
+
# conflicts, rest = resolver.get_all_conflicts(db_path, min_chain_length=2 + i, max_conflicts_len=6 * (i + 1), batch_id=batch_id)
|
53 |
+
# resolver.resolve_all_conflicts(db_path, conflicts, model_name, show_logs=False)
|
54 |
+
# vis_helper.visualize_alignment_by_db(db_path, output_path="img_test1.png", lang_name_from=lang1, lang_name_to=lang2, batch_size=400, size=(600, 600), plt_show=True)
|
55 |
|
56 |
+
#if len(rest) == 0:
|
57 |
+
# break
|
58 |
|
59 |
paragraphs_dict, par_ids, meta_info, sent_counter_dict = reader.get_paragraphs(db_path)
|
60 |
+
|
|
|
|
|
|
|
61 |
# Debug prints to understand the structure
|
62 |
+
print("paragraphs_dict keys:", paragraphs_dict.keys())
|
63 |
+
|
64 |
+
paragraphs_from = paragraphs_dict["from"]
|
65 |
+
paragraphs_to = paragraphs_dict["to"]
|
66 |
|
67 |
data = []
|
68 |
for from_paragraph, to_paragraph in zip(paragraphs_from, paragraphs_to):
|
|
|
73 |
for from_line, to_line in zip(from_paragraph, to_paragraph):
|
74 |
data.append({"From": from_line, "To": to_line})
|
75 |
|
76 |
+
|
77 |
df = pd.DataFrame(data)
|
78 |
return df
|
79 |
|