svenwey commited on
Commit
e306ff9
·
1 Parent(s): 796bd91

implement jaccard-similarity + length difference score as similarity score for log-messages

Browse files
Files changed (1) hide show
  1. logmetric.py +32 -9
logmetric.py CHANGED
@@ -69,7 +69,6 @@ class LogMetric(evaluate.Metric):
69
  # Constant regex to get timestrings
70
  timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
71
  timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
72
- sentencesimilarity_metric = evaluate.load("sacrebleu")
73
 
74
 
75
  def _info(self):
@@ -98,7 +97,7 @@ class LogMetric(evaluate.Metric):
98
  # TODO: Download external resources if needed
99
  pass
100
 
101
- def getLogMetric(self, pred : str, ref : str, sentencesimilarity_metric):
102
  ref = ref.strip(' \t\n\r')
103
  pred = pred.strip(' \t\n\r')
104
 
@@ -172,12 +171,36 @@ class LogMetric(evaluate.Metric):
172
  matchesPatternScore = 0.0
173
  monotonicallyIncreasingScore = 0.0
174
 
175
- # We calculate the overall local score of all the log-entries (log-messages)
176
- local_score = sentencesimilarity_metric.compute(
177
- predictions=(list(map(lambda t: t[1], pred_logentries))[:min_logentries]),
178
- references=(list(map(lambda t: t[1], ref_logentries))[:min_logentries]),
179
- tokenize="char")["score"]
 
 
 
 
 
 
 
 
 
 
 
 
180
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
 
183
  # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
@@ -191,13 +214,13 @@ class LogMetric(evaluate.Metric):
191
  # TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
192
 
193
  t_before_logmetric = time.perf_counter()
194
- timestamp_score = np.mean([self.getLogMetric(p,r, self.sentencesimilarity_metric) for p,r in zip(predictions,references)])
195
  t_after_logmetric = time.perf_counter()
196
 
197
  logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"
198
 
199
  return {
200
  "score": timestamp_score,
201
- "duration": logmetric_duration,
202
  }
203
 
 
69
  # Constant regex to get timestrings
70
  timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
71
  timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
 
72
 
73
 
74
  def _info(self):
 
97
  # TODO: Download external resources if needed
98
  pass
99
 
100
+ def getLogMetric(self, pred : str, ref : str):
101
  ref = ref.strip(' \t\n\r')
102
  pred = pred.strip(' \t\n\r')
103
 
 
171
  matchesPatternScore = 0.0
172
  monotonicallyIncreasingScore = 0.0
173
 
174
+ # Jaccard Similarity to measure closeness of two log-messages
175
+ def get_jaccard_similarity(set1, set2):
176
+ intersection = set1.intersection(set2)
177
+ union = set1.union(set2)
178
+ return len(intersection) / len(union)
179
+
180
+ # A score depending on the difference in length of two sentences
181
+ def get_length_score(sentence1, sentence2):
182
+ s1len = len(sentence1)
183
+ s2len = len(sentence2)
184
+
185
+ return 1 - (abs(s1len - s2len) / max(s1len, s2len))
186
+
187
+ # Combine a weighted average of different scores
188
+ def get_overall_similarity(sentence1, sentence2):
189
+ s1split = sentence1.split()
190
+ s2split = sentence2.split()
191
 
192
+ jaccard_score = get_jaccard_similarity(set(s1split), set(s2split))
193
+ length_score = get_length_score(s1split, s2split)
194
+
195
+ return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
196
+
197
+
198
+ # apply jaccard-similarity to every pred-ref pair and then take mean score * 100
199
+ local_score = np.mean([get_overall_similarity(p, r) for p,r in
200
+ zip(
201
+ list(map(lambda t: t[1], pred_logentries))[:min_logentries],
202
+ list(map(lambda t: t[1], ref_logentries))[:min_logentries]
203
+ )])
204
 
205
 
206
  # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
 
214
  # TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
215
 
216
  t_before_logmetric = time.perf_counter()
217
+ timestamp_score = np.mean([self.getLogMetric(p,r) for p,r in zip(predictions,references)])
218
  t_after_logmetric = time.perf_counter()
219
 
220
  logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"
221
 
222
  return {
223
  "score": timestamp_score,
224
+ "duration": logmetric_duration
225
  }
226