Update visualize.py
Browse files- visualize.py +193 -37
visualize.py
CHANGED
@@ -91,31 +91,22 @@ def show_hallucinations(element):
|
|
91 |
]
|
92 |
|
93 |
|
94 |
-
|
95 |
"shroom-semeval25/cogumelo-hallucinations-detector-roberta-base"
|
96 |
)
|
97 |
-
|
98 |
"shroom-semeval25/cogumelo-hallucinations-detector-roberta-base"
|
99 |
)
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
"""The model will return 0 if it's not a hallucination, 1 if it is the beginning of a hallucination, and 2 if it's the continuation of a hallucination"""
|
104 |
-
hallucinated_tokens = prediction_tokenizer(
|
105 |
-
hallucinated_text,
|
106 |
-
return_offsets_mapping=True,
|
107 |
-
add_special_tokens=True,
|
108 |
-
return_tensors="pt",
|
109 |
)
|
|
|
110 |
|
111 |
-
|
112 |
-
|
113 |
-
"attention_mask": hallucinated_tokens["attention_mask"],
|
114 |
-
}
|
115 |
-
with torch.no_grad():
|
116 |
-
outputs = prediction_model(**inputs)
|
117 |
# Get the highest value for each token
|
118 |
-
predictions =
|
119 |
entities = []
|
120 |
current_entity = None
|
121 |
for i, prediction in enumerate(predictions):
|
@@ -129,20 +120,18 @@ def predict_hallucinations(hallucinated_text: str):
|
|
129 |
entities.append(current_entity)
|
130 |
current_entity = {
|
131 |
"entity": "hal",
|
132 |
-
"start":
|
133 |
-
"end":
|
134 |
}
|
135 |
if prediction == 2:
|
136 |
if current_entity is None:
|
137 |
current_entity = {
|
138 |
"entity": "hal",
|
139 |
-
"start":
|
140 |
-
"end":
|
141 |
}
|
142 |
else:
|
143 |
-
current_entity["end"] =
|
144 |
-
hallucinated_tokens["offset_mapping"][0][i][1] + 1
|
145 |
-
)
|
146 |
if current_entity is not None:
|
147 |
entities.append(current_entity)
|
148 |
return {
|
@@ -157,14 +146,150 @@ def update_selection(evt: gr.SelectData):
|
|
157 |
# Run the two functions
|
158 |
json_example, original_text, highlighted_text = show_hallucinations(element)
|
159 |
try:
|
160 |
-
|
161 |
-
element["hallucinated_answer_generated"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
)
|
163 |
except Exception as e:
|
164 |
logging.exception(f"An error occurred: {e}")
|
165 |
gr.Error(f"An error occurred: {e}")
|
166 |
-
|
167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
|
170 |
with gr.Blocks(title="Hallucinations Explorer") as demo:
|
@@ -186,8 +311,13 @@ _SHROOM '25: Detection of Hallucinated Content_
|
|
186 |
color_map={"+": "red", "-": "blue", "hal": "red"},
|
187 |
combine_adjacent=True,
|
188 |
)
|
189 |
-
|
190 |
-
label="Predicted Hallucinations",
|
|
|
|
|
|
|
|
|
|
|
191 |
color_map={"+": "red", "-": "blue", "hal": "red"},
|
192 |
combine_adjacent=True,
|
193 |
)
|
@@ -200,7 +330,8 @@ _SHROOM '25: Detection of Hallucinated Content_
|
|
200 |
json_example,
|
201 |
original_text,
|
202 |
highlighted_text,
|
203 |
-
|
|
|
204 |
],
|
205 |
)
|
206 |
|
@@ -213,15 +344,40 @@ _SHROOM '25: Detection of Hallucinated Content_
|
|
213 |
type="text",
|
214 |
)
|
215 |
|
216 |
-
|
217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
color_map={"+": "red", "-": "blue", "hal": "red"},
|
219 |
combine_adjacent=True,
|
220 |
)
|
|
|
221 |
model_manual_input.change(
|
222 |
-
|
223 |
-
inputs=[model_manual_input],
|
224 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
)
|
226 |
|
227 |
demo.launch(show_error=True)
|
|
|
91 |
]
|
92 |
|
93 |
|
94 |
+
roberta_base_predictor = transformers.AutoModelForTokenClassification.from_pretrained(
|
95 |
"shroom-semeval25/cogumelo-hallucinations-detector-roberta-base"
|
96 |
)
|
97 |
+
roberta_base_tokenizer = transformers.AutoTokenizer.from_pretrained(
|
98 |
"shroom-semeval25/cogumelo-hallucinations-detector-roberta-base"
|
99 |
)
|
100 |
+
roberta_large_qa_predictor = (
|
101 |
+
transformers.AutoModelForTokenClassification.from_pretrained(
|
102 |
+
"shroom-semeval25/cogumelo-hallucinations-detector-roberta-large-qa-15000"
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
)
|
104 |
+
)
|
105 |
|
106 |
+
|
107 |
+
def mark_hallucinations(logits, hallucinated_text, offsets):
|
|
|
|
|
|
|
|
|
108 |
# Get the highest value for each token
|
109 |
+
predictions = logits.argmax(dim=-1).squeeze(0).tolist()
|
110 |
entities = []
|
111 |
current_entity = None
|
112 |
for i, prediction in enumerate(predictions):
|
|
|
120 |
entities.append(current_entity)
|
121 |
current_entity = {
|
122 |
"entity": "hal",
|
123 |
+
"start": offsets[i][0],
|
124 |
+
"end": offsets[i][1] + 1,
|
125 |
}
|
126 |
if prediction == 2:
|
127 |
if current_entity is None:
|
128 |
current_entity = {
|
129 |
"entity": "hal",
|
130 |
+
"start": offsets[i][0],
|
131 |
+
"end": offsets[i][1] + 1,
|
132 |
}
|
133 |
else:
|
134 |
+
current_entity["end"] = offsets[i][1] + 1
|
|
|
|
|
135 |
if current_entity is not None:
|
136 |
entities.append(current_entity)
|
137 |
return {
|
|
|
146 |
# Run the two functions
|
147 |
json_example, original_text, highlighted_text = show_hallucinations(element)
|
148 |
try:
|
149 |
+
hallucinated_tokens = roberta_base_tokenizer(
|
150 |
+
text=element["hallucinated_answer_generated"],
|
151 |
+
return_offsets_mapping=True,
|
152 |
+
add_special_tokens=True,
|
153 |
+
return_tensors="pt",
|
154 |
+
return_special_tokens_mask=True,
|
155 |
+
)
|
156 |
+
q_a_tokens = roberta_base_tokenizer(
|
157 |
+
# We have to batch into a single-example batch, because otherwise the tokenizer will interpret that the second element of the pair is example #2 of the batch (while actually it is the second part of the pair of example #1)
|
158 |
+
text=[(element["question"], element["hallucinated_answer_generated"])],
|
159 |
+
return_offsets_mapping=True,
|
160 |
+
add_special_tokens=True,
|
161 |
+
return_tensors="pt",
|
162 |
+
return_special_tokens_mask=True,
|
163 |
+
)
|
164 |
+
with torch.no_grad():
|
165 |
+
outputs_roberta_base = roberta_base_predictor(
|
166 |
+
input_ids=hallucinated_tokens.input_ids,
|
167 |
+
attention_mask=hallucinated_tokens.attention_mask,
|
168 |
+
)
|
169 |
+
# Take only the outputs that are NOT special tokens and where the attention mask is 1
|
170 |
+
logits_roberta_base = outputs_roberta_base.logits[
|
171 |
+
...,
|
172 |
+
(hallucinated_tokens.special_tokens_mask == 0)
|
173 |
+
& (hallucinated_tokens.attention_mask == 1),
|
174 |
+
:,
|
175 |
+
]
|
176 |
+
outputs_roberta_large_qa = roberta_large_qa_predictor(
|
177 |
+
input_ids=q_a_tokens.input_ids,
|
178 |
+
attention_mask=q_a_tokens.attention_mask,
|
179 |
+
)
|
180 |
+
# Take only the outputs after the first special token and where the attention mask is 1 and the special tokens mask is 0
|
181 |
+
logits_roberta_large_qa = outputs_roberta_large_qa.logits[
|
182 |
+
...,
|
183 |
+
(q_a_tokens.special_tokens_mask.cumsum(dim=-1) > 1)
|
184 |
+
& (q_a_tokens.attention_mask == 1)
|
185 |
+
& (q_a_tokens.special_tokens_mask == 0),
|
186 |
+
:,
|
187 |
+
]
|
188 |
+
|
189 |
+
highlighted_text_predicted_roberta_base = mark_hallucinations(
|
190 |
+
hallucinated_text=element["hallucinated_answer_generated"],
|
191 |
+
logits=logits_roberta_base,
|
192 |
+
# Discard the first token, which is the BOS token
|
193 |
+
offsets=hallucinated_tokens["offset_mapping"][0][1:],
|
194 |
+
)
|
195 |
+
# How much should we remove from the offset_mapping?
|
196 |
+
# The length of the tokens before the first special token
|
197 |
+
number_of_offsets_to_remove = (
|
198 |
+
q_a_tokens.special_tokens_mask.cumsum(dim=-1) <= 1
|
199 |
+
).sum()
|
200 |
+
highlighted_text_predicted_roberta_large_qa = mark_hallucinations(
|
201 |
+
hallucinated_text=element["hallucinated_answer_generated"],
|
202 |
+
logits=logits_roberta_large_qa,
|
203 |
+
# Discard the first two tokens, which are the separators between the question and the answer
|
204 |
+
offsets=q_a_tokens["offset_mapping"][0][number_of_offsets_to_remove + 2 :],
|
205 |
)
|
206 |
except Exception as e:
|
207 |
logging.exception(f"An error occurred: {e}")
|
208 |
gr.Error(f"An error occurred: {e}")
|
209 |
+
highlighted_text_predicted_roberta_base = {"text": "", "entities": []}
|
210 |
+
highlighted_text_predicted_roberta_large_qa = {"text": "", "entities": []}
|
211 |
+
return (
|
212 |
+
json_example,
|
213 |
+
original_text,
|
214 |
+
highlighted_text,
|
215 |
+
highlighted_text_predicted_roberta_base,
|
216 |
+
highlighted_text_predicted_roberta_large_qa,
|
217 |
+
)
|
218 |
+
|
219 |
+
|
220 |
+
def predict_hallucinations_manual_input_roberta_base(text: str):
|
221 |
+
hallucinated_tokens = roberta_base_tokenizer(
|
222 |
+
text=text,
|
223 |
+
return_offsets_mapping=True,
|
224 |
+
add_special_tokens=True,
|
225 |
+
return_tensors="pt",
|
226 |
+
return_special_tokens_mask=True,
|
227 |
+
)
|
228 |
+
with torch.no_grad():
|
229 |
+
outputs_roberta_base = roberta_base_predictor(
|
230 |
+
input_ids=hallucinated_tokens.input_ids,
|
231 |
+
attention_mask=hallucinated_tokens.attention_mask,
|
232 |
+
)
|
233 |
+
logits_roberta_base = outputs_roberta_base.logits[
|
234 |
+
...,
|
235 |
+
(hallucinated_tokens.special_tokens_mask == 0)
|
236 |
+
& (hallucinated_tokens.attention_mask == 1),
|
237 |
+
:,
|
238 |
+
]
|
239 |
+
highlighted_text_predicted_roberta_base = mark_hallucinations(
|
240 |
+
hallucinated_text=text,
|
241 |
+
logits=logits_roberta_base,
|
242 |
+
offsets=hallucinated_tokens["offset_mapping"][0][1:],
|
243 |
+
)
|
244 |
+
return highlighted_text_predicted_roberta_base
|
245 |
+
|
246 |
+
|
247 |
+
def predict_hallucinations_manual_input_roberta_qa_large(text: str, question: str = ""):
|
248 |
+
q_a_tokens = roberta_base_tokenizer(
|
249 |
+
text=[(question, text)],
|
250 |
+
return_offsets_mapping=True,
|
251 |
+
add_special_tokens=True,
|
252 |
+
return_tensors="pt",
|
253 |
+
return_special_tokens_mask=True,
|
254 |
+
)
|
255 |
+
with torch.no_grad():
|
256 |
+
outputs_roberta_large_qa = roberta_large_qa_predictor(
|
257 |
+
input_ids=q_a_tokens.input_ids,
|
258 |
+
attention_mask=q_a_tokens.attention_mask,
|
259 |
+
)
|
260 |
+
number_of_offsets_to_remove = (
|
261 |
+
q_a_tokens.special_tokens_mask.cumsum(dim=-1) <= 1
|
262 |
+
).sum()
|
263 |
+
logits_roberta_large_qa = outputs_roberta_large_qa.logits[
|
264 |
+
...,
|
265 |
+
(q_a_tokens.special_tokens_mask.cumsum(dim=-1) > 1)
|
266 |
+
& (q_a_tokens.attention_mask == 1)
|
267 |
+
& (q_a_tokens.special_tokens_mask == 0),
|
268 |
+
:,
|
269 |
+
]
|
270 |
+
highlighted_text_predicted_roberta_large_qa = mark_hallucinations(
|
271 |
+
hallucinated_text=text,
|
272 |
+
logits=logits_roberta_large_qa,
|
273 |
+
offsets=q_a_tokens["offset_mapping"][0][number_of_offsets_to_remove + 2 :],
|
274 |
+
)
|
275 |
+
return highlighted_text_predicted_roberta_large_qa
|
276 |
+
|
277 |
+
|
278 |
+
def predict_hallucinations_manual_input(text: str, question: str = ""):
|
279 |
+
empty_response = {"text": "", "entities": []}
|
280 |
+
# If the text is empty, return nothing
|
281 |
+
if not text:
|
282 |
+
return empty_response, empty_response
|
283 |
+
# If the text is not empty, we can get the hallucinations with the RoBERTa Base model
|
284 |
+
roberta_base_prediction = predict_hallucinations_manual_input_roberta_base(text)
|
285 |
+
# If the question is empty, we can't use the RoBERTa Large QA model
|
286 |
+
if not question:
|
287 |
+
return roberta_base_prediction, empty_response
|
288 |
+
# If the question is not empty, we can use the RoBERTa Large QA model
|
289 |
+
roberta_large_qa_prediction = predict_hallucinations_manual_input_roberta_qa_large(
|
290 |
+
text, question
|
291 |
+
)
|
292 |
+
return roberta_base_prediction, roberta_large_qa_prediction
|
293 |
|
294 |
|
295 |
with gr.Blocks(title="Hallucinations Explorer") as demo:
|
|
|
311 |
color_map={"+": "red", "-": "blue", "hal": "red"},
|
312 |
combine_adjacent=True,
|
313 |
)
|
314 |
+
highlighted_text_predicted_roberta_base = gr.HighlightedText(
|
315 |
+
label="Predicted Hallucinations (RoBERTa Base)",
|
316 |
+
color_map={"+": "red", "-": "blue", "hal": "red"},
|
317 |
+
combine_adjacent=True,
|
318 |
+
)
|
319 |
+
highlighted_text_predicted_roberta_large_qa = gr.HighlightedText(
|
320 |
+
label="Predicted Hallucinations (RoBERTa Large QA)",
|
321 |
color_map={"+": "red", "-": "blue", "hal": "red"},
|
322 |
combine_adjacent=True,
|
323 |
)
|
|
|
330 |
json_example,
|
331 |
original_text,
|
332 |
highlighted_text,
|
333 |
+
highlighted_text_predicted_roberta_base,
|
334 |
+
highlighted_text_predicted_roberta_large_qa,
|
335 |
],
|
336 |
)
|
337 |
|
|
|
344 |
type="text",
|
345 |
)
|
346 |
|
347 |
+
model_question_input = gr.Textbox(
|
348 |
+
value="",
|
349 |
+
label="Question (only for RoBERTa Large QA)",
|
350 |
+
placeholder="Type the question here",
|
351 |
+
type="text",
|
352 |
+
)
|
353 |
+
|
354 |
+
manual_input_highlighted_text_roberta_base = gr.HighlightedText(
|
355 |
+
label="Predicted Hallucinations (RoBERTa Base)",
|
356 |
+
color_map={"+": "red", "-": "blue", "hal": "red"},
|
357 |
+
combine_adjacent=True,
|
358 |
+
)
|
359 |
+
|
360 |
+
manual_input_highlighted_text_roberta_large_qa = gr.HighlightedText(
|
361 |
+
label="Predicted Hallucinations (RoBERTa Large QA)",
|
362 |
color_map={"+": "red", "-": "blue", "hal": "red"},
|
363 |
combine_adjacent=True,
|
364 |
)
|
365 |
+
|
366 |
model_manual_input.change(
|
367 |
+
predict_hallucinations_manual_input,
|
368 |
+
inputs=[model_manual_input, model_question_input],
|
369 |
+
outputs=[
|
370 |
+
manual_input_highlighted_text_roberta_base,
|
371 |
+
manual_input_highlighted_text_roberta_large_qa,
|
372 |
+
],
|
373 |
+
)
|
374 |
+
|
375 |
+
model_question_input.change(
|
376 |
+
predict_hallucinations_manual_input_roberta_qa_large,
|
377 |
+
inputs=[model_manual_input, model_question_input],
|
378 |
+
outputs=[
|
379 |
+
manual_input_highlighted_text_roberta_large_qa,
|
380 |
+
],
|
381 |
)
|
382 |
|
383 |
demo.launch(show_error=True)
|