Spaces:

svenwey
/

logmetric

Sleeping

App Files Files Community

svenwey commited on Jun 14, 2024

Commit

e306ff9

1 Parent(s): 796bd91

implement jaccard-similarity + length difference score as similarity score for log-messages

Browse files

Files changed (1) hide show

logmetric.py +32 -9

logmetric.py CHANGED Viewed

@@ -69,7 +69,6 @@ class LogMetric(evaluate.Metric):
     # Constant regex to get timestrings
     timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
     timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
-    sentencesimilarity_metric = evaluate.load("sacrebleu")
     def _info(self):
@@ -98,7 +97,7 @@ class LogMetric(evaluate.Metric):
         # TODO: Download external resources if needed
         pass
-    def getLogMetric(self, pred : str, ref : str, sentencesimilarity_metric):
         ref = ref.strip(' \t\n\r')
         pred = pred.strip(' \t\n\r')
@@ -172,12 +171,36 @@ class LogMetric(evaluate.Metric):
                 matchesPatternScore = 0.0
                 monotonicallyIncreasingScore = 0.0
-        # We calculate the overall local score of all the log-entries (log-messages)
-        local_score = sentencesimilarity_metric.compute(
-                            predictions=(list(map(lambda t: t[1], pred_logentries))[:min_logentries]),
-                            references=(list(map(lambda t: t[1], ref_logentries))[:min_logentries]),
-                            tokenize="char")["score"]
         # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
@@ -191,13 +214,13 @@ class LogMetric(evaluate.Metric):
         # TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
         t_before_logmetric = time.perf_counter()
-        timestamp_score = np.mean([self.getLogMetric(p,r, self.sentencesimilarity_metric) for p,r in zip(predictions,references)])
         t_after_logmetric = time.perf_counter()
         logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"
         return {
             "score": timestamp_score,
-            "duration": logmetric_duration,
         }

     # Constant regex to get timestrings
     timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
     timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
     def _info(self):
         # TODO: Download external resources if needed
         pass
+    def getLogMetric(self, pred : str, ref : str):
         ref = ref.strip(' \t\n\r')
         pred = pred.strip(' \t\n\r')
                 matchesPatternScore = 0.0
                 monotonicallyIncreasingScore = 0.0
+        # Jaccard Similarity to measure closeness of two log-messages
+        def get_jaccard_similarity(set1, set2):
+            intersection = set1.intersection(set2)
+            union = set1.union(set2)
+            return len(intersection) / len(union)
+        # A score depending on the difference in length of two sentences
+        def get_length_score(sentence1, sentence2):
+            s1len = len(sentence1)
+            s2len = len(sentence2)
+            return 1 - (abs(s1len - s2len) / max(s1len, s2len))
+        # Combine a weighted average of different scores
+        def get_overall_similarity(sentence1, sentence2):
+            s1split = sentence1.split()
+            s2split = sentence2.split()
+            jaccard_score = get_jaccard_similarity(set(s1split), set(s2split))
+            length_score = get_length_score(s1split, s2split)
+            return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
+        # apply jaccard-similarity to every pred-ref pair and then take mean score * 100
+        local_score = np.mean([get_overall_similarity(p, r) for p,r in
+                                       zip(
+                                        list(map(lambda t: t[1], pred_logentries))[:min_logentries],
+                                        list(map(lambda t: t[1], ref_logentries))[:min_logentries]
+                                        )])
         # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
         # TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
         t_before_logmetric = time.perf_counter()
+        timestamp_score = np.mean([self.getLogMetric(p,r) for p,r in zip(predictions,references)])
         t_after_logmetric = time.perf_counter()
         logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"
         return {
             "score": timestamp_score,
+            "duration": logmetric_duration
         }