implement jaccard-similarity + length difference score as similarity score for log-messages
Browse files- logmetric.py +32 -9
logmetric.py
CHANGED
@@ -69,7 +69,6 @@ class LogMetric(evaluate.Metric):
|
|
69 |
# Constant regex to get timestrings
|
70 |
timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
|
71 |
timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
|
72 |
-
sentencesimilarity_metric = evaluate.load("sacrebleu")
|
73 |
|
74 |
|
75 |
def _info(self):
|
@@ -98,7 +97,7 @@ class LogMetric(evaluate.Metric):
|
|
98 |
# TODO: Download external resources if needed
|
99 |
pass
|
100 |
|
101 |
-
def getLogMetric(self, pred : str, ref : str
|
102 |
ref = ref.strip(' \t\n\r')
|
103 |
pred = pred.strip(' \t\n\r')
|
104 |
|
@@ -172,12 +171,36 @@ class LogMetric(evaluate.Metric):
|
|
172 |
matchesPatternScore = 0.0
|
173 |
monotonicallyIncreasingScore = 0.0
|
174 |
|
175 |
-
#
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
|
183 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|
@@ -191,13 +214,13 @@ class LogMetric(evaluate.Metric):
|
|
191 |
# TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
|
192 |
|
193 |
t_before_logmetric = time.perf_counter()
|
194 |
-
timestamp_score = np.mean([self.getLogMetric(p,r
|
195 |
t_after_logmetric = time.perf_counter()
|
196 |
|
197 |
logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"
|
198 |
|
199 |
return {
|
200 |
"score": timestamp_score,
|
201 |
-
"duration": logmetric_duration
|
202 |
}
|
203 |
|
|
|
69 |
# Constant regex to get timestrings
|
70 |
timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
|
71 |
timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
|
|
|
72 |
|
73 |
|
74 |
def _info(self):
|
|
|
97 |
# TODO: Download external resources if needed
|
98 |
pass
|
99 |
|
100 |
+
def getLogMetric(self, pred : str, ref : str):
|
101 |
ref = ref.strip(' \t\n\r')
|
102 |
pred = pred.strip(' \t\n\r')
|
103 |
|
|
|
171 |
matchesPatternScore = 0.0
|
172 |
monotonicallyIncreasingScore = 0.0
|
173 |
|
174 |
+
# Jaccard Similarity to measure closeness of two log-messages
|
175 |
+
def get_jaccard_similarity(set1, set2):
|
176 |
+
intersection = set1.intersection(set2)
|
177 |
+
union = set1.union(set2)
|
178 |
+
return len(intersection) / len(union)
|
179 |
+
|
180 |
+
# A score depending on the difference in length of two sentences
|
181 |
+
def get_length_score(sentence1, sentence2):
|
182 |
+
s1len = len(sentence1)
|
183 |
+
s2len = len(sentence2)
|
184 |
+
|
185 |
+
return 1 - (abs(s1len - s2len) / max(s1len, s2len))
|
186 |
+
|
187 |
+
# Combine a weighted average of different scores
|
188 |
+
def get_overall_similarity(sentence1, sentence2):
|
189 |
+
s1split = sentence1.split()
|
190 |
+
s2split = sentence2.split()
|
191 |
|
192 |
+
jaccard_score = get_jaccard_similarity(set(s1split), set(s2split))
|
193 |
+
length_score = get_length_score(s1split, s2split)
|
194 |
+
|
195 |
+
return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
|
196 |
+
|
197 |
+
|
198 |
+
# apply jaccard-similarity to every pred-ref pair and then take mean score * 100
|
199 |
+
local_score = np.mean([get_overall_similarity(p, r) for p,r in
|
200 |
+
zip(
|
201 |
+
list(map(lambda t: t[1], pred_logentries))[:min_logentries],
|
202 |
+
list(map(lambda t: t[1], ref_logentries))[:min_logentries]
|
203 |
+
)])
|
204 |
|
205 |
|
206 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|
|
|
214 |
# TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
|
215 |
|
216 |
t_before_logmetric = time.perf_counter()
|
217 |
+
timestamp_score = np.mean([self.getLogMetric(p,r) for p,r in zip(predictions,references)])
|
218 |
t_after_logmetric = time.perf_counter()
|
219 |
|
220 |
logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"
|
221 |
|
222 |
return {
|
223 |
"score": timestamp_score,
|
224 |
+
"duration": logmetric_duration
|
225 |
}
|
226 |
|