nelsonjq commited on
Commit
00d6aea
·
verified ·
1 Parent(s): 7518e72

debug hide resolve conflicts

Browse files
Files changed (1) hide show
  1. utils.py +15 -15
utils.py CHANGED
@@ -41,29 +41,28 @@ def align_text(txt1: str, txt2: str, lang1: str, lang2: str) -> pd.DataFrame:
41
  normalize_embeddings=True,
42
  show_progress_bar=True)
43
 
44
- conflicts_to_solve, rest = resolver.get_all_conflicts(db_path, min_chain_length=2, max_conflicts_len=6, batch_id=-1)
45
- resolver.get_statistics(conflicts_to_solve)
46
- resolver.get_statistics(rest)
47
 
48
  steps = 3
49
  batch_id = -1
50
 
51
- for i in range(steps):
52
- conflicts, rest = resolver.get_all_conflicts(db_path, min_chain_length=2 + i, max_conflicts_len=6 * (i + 1), batch_id=batch_id)
53
- resolver.resolve_all_conflicts(db_path, conflicts, model_name, show_logs=False)
54
- vis_helper.visualize_alignment_by_db(db_path, output_path="img_test1.png", lang_name_from=lang1, lang_name_to=lang2, batch_size=400, size=(600, 600), plt_show=True)
55
 
56
- if len(rest) == 0:
57
- break
58
 
59
  paragraphs_dict, par_ids, meta_info, sent_counter_dict = reader.get_paragraphs(db_path)
60
-
61
- paragraphs_from = paragraphs_dict[lang1]
62
- paragraphs_to = paragraphs_dict[lang2]
63
-
64
  # Debug prints to understand the structure
65
- print("paragraphs_from:", paragraphs_from)
66
- print("paragraphs_to:", paragraphs_to)
 
 
67
 
68
  data = []
69
  for from_paragraph, to_paragraph in zip(paragraphs_from, paragraphs_to):
@@ -74,6 +73,7 @@ def align_text(txt1: str, txt2: str, lang1: str, lang2: str) -> pd.DataFrame:
74
  for from_line, to_line in zip(from_paragraph, to_paragraph):
75
  data.append({"From": from_line, "To": to_line})
76
 
 
77
  df = pd.DataFrame(data)
78
  return df
79
 
 
41
  normalize_embeddings=True,
42
  show_progress_bar=True)
43
 
44
+ #conflicts_to_solve, rest = resolver.get_all_conflicts(db_path, min_chain_length=2, max_conflicts_len=6, batch_id=-1)
45
+ #resolver.get_statistics(conflicts_to_solve)
46
+ #resolver.get_statistics(rest)
47
 
48
  steps = 3
49
  batch_id = -1
50
 
51
+ #for i in range(steps):
52
+ # conflicts, rest = resolver.get_all_conflicts(db_path, min_chain_length=2 + i, max_conflicts_len=6 * (i + 1), batch_id=batch_id)
53
+ # resolver.resolve_all_conflicts(db_path, conflicts, model_name, show_logs=False)
54
+ # vis_helper.visualize_alignment_by_db(db_path, output_path="img_test1.png", lang_name_from=lang1, lang_name_to=lang2, batch_size=400, size=(600, 600), plt_show=True)
55
 
56
+ #if len(rest) == 0:
57
+ # break
58
 
59
  paragraphs_dict, par_ids, meta_info, sent_counter_dict = reader.get_paragraphs(db_path)
60
+
 
 
 
61
  # Debug prints to understand the structure
62
+ print("paragraphs_dict keys:", paragraphs_dict.keys())
63
+
64
+ paragraphs_from = paragraphs_dict["from"]
65
+ paragraphs_to = paragraphs_dict["to"]
66
 
67
  data = []
68
  for from_paragraph, to_paragraph in zip(paragraphs_from, paragraphs_to):
 
73
  for from_line, to_line in zip(from_paragraph, to_paragraph):
74
  data.append({"From": from_line, "To": to_line})
75
 
76
+
77
  df = pd.DataFrame(data)
78
  return df
79