Aidan Phillips commited on
Commit
7fd4796
·
1 Parent(s): dc76b04

remove debugging

Browse files
Files changed (1) hide show
  1. categories/fluency.py +5 -5
categories/fluency.py CHANGED
@@ -32,9 +32,9 @@ def pseudo_perplexity(text, threshold=20, max_len=128):
32
  """
33
  encoding = tokenizer(text, return_tensors="pt", return_offsets_mapping=True)
34
  input_ids = encoding["input_ids"][0]
35
- print(input_ids)
36
  offset_mapping = encoding["offset_mapping"][0]
37
- print(offset_mapping)
38
  tokens = tokenizer.convert_ids_to_tokens(input_ids)
39
 
40
  # Group token indices by word based on offset mapping
@@ -87,7 +87,7 @@ def pseudo_perplexity(text, threshold=20, max_len=128):
87
  word_loss -= 0.6 * __get_word_pr_score(word)
88
  loss_values.append(word_loss)
89
 
90
- print(loss_values)
91
 
92
  errors = []
93
  for i, l in enumerate(loss_values):
@@ -99,9 +99,9 @@ def pseudo_perplexity(text, threshold=20, max_len=128):
99
  "message": f"Perplexity {l} over threshold {threshold}"
100
  })
101
 
102
- print(tok_loss)
103
  s_ppl = np.mean(tok_loss)
104
- print(s_ppl)
105
 
106
  res = {
107
  "score": __fluency_score_from_ppl(s_ppl),
 
32
  """
33
  encoding = tokenizer(text, return_tensors="pt", return_offsets_mapping=True)
34
  input_ids = encoding["input_ids"][0]
35
+ # print(input_ids)
36
  offset_mapping = encoding["offset_mapping"][0]
37
+ # print(offset_mapping)
38
  tokens = tokenizer.convert_ids_to_tokens(input_ids)
39
 
40
  # Group token indices by word based on offset mapping
 
87
  word_loss -= 0.6 * __get_word_pr_score(word)
88
  loss_values.append(word_loss)
89
 
90
+ # print(loss_values)
91
 
92
  errors = []
93
  for i, l in enumerate(loss_values):
 
99
  "message": f"Perplexity {l} over threshold {threshold}"
100
  })
101
 
102
+ # print(tok_loss)
103
  s_ppl = np.mean(tok_loss)
104
+ # print(s_ppl)
105
 
106
  res = {
107
  "score": __fluency_score_from_ppl(s_ppl),