acmc commited on
Commit
a85555b
·
verified ·
1 Parent(s): 82a1f36

Update visualize.py

Browse files
Files changed (1) hide show
  1. visualize.py +193 -37
visualize.py CHANGED
@@ -91,31 +91,22 @@ def show_hallucinations(element):
91
  ]
92
 
93
 
94
- prediction_model = transformers.AutoModelForTokenClassification.from_pretrained(
95
  "shroom-semeval25/cogumelo-hallucinations-detector-roberta-base"
96
  )
97
- prediction_tokenizer = transformers.AutoTokenizer.from_pretrained(
98
  "shroom-semeval25/cogumelo-hallucinations-detector-roberta-base"
99
  )
100
-
101
-
102
- def predict_hallucinations(hallucinated_text: str):
103
- """The model will return 0 if it's not a hallucination, 1 if it is the beginning of a hallucination, and 2 if it's the continuation of a hallucination"""
104
- hallucinated_tokens = prediction_tokenizer(
105
- hallucinated_text,
106
- return_offsets_mapping=True,
107
- add_special_tokens=True,
108
- return_tensors="pt",
109
  )
 
110
 
111
- inputs = {
112
- "input_ids": hallucinated_tokens["input_ids"],
113
- "attention_mask": hallucinated_tokens["attention_mask"],
114
- }
115
- with torch.no_grad():
116
- outputs = prediction_model(**inputs)
117
  # Get the highest value for each token
118
- predictions = outputs.logits.argmax(dim=-1).squeeze(0).tolist()
119
  entities = []
120
  current_entity = None
121
  for i, prediction in enumerate(predictions):
@@ -129,20 +120,18 @@ def predict_hallucinations(hallucinated_text: str):
129
  entities.append(current_entity)
130
  current_entity = {
131
  "entity": "hal",
132
- "start": hallucinated_tokens["offset_mapping"][0][i][0],
133
- "end": hallucinated_tokens["offset_mapping"][0][i][1] + 1,
134
  }
135
  if prediction == 2:
136
  if current_entity is None:
137
  current_entity = {
138
  "entity": "hal",
139
- "start": hallucinated_tokens["offset_mapping"][0][i][0],
140
- "end": hallucinated_tokens["offset_mapping"][0][i][1] + 1,
141
  }
142
  else:
143
- current_entity["end"] = (
144
- hallucinated_tokens["offset_mapping"][0][i][1] + 1
145
- )
146
  if current_entity is not None:
147
  entities.append(current_entity)
148
  return {
@@ -157,14 +146,150 @@ def update_selection(evt: gr.SelectData):
157
  # Run the two functions
158
  json_example, original_text, highlighted_text = show_hallucinations(element)
159
  try:
160
- highlighted_text_predicted = predict_hallucinations(
161
- element["hallucinated_answer_generated"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  )
163
  except Exception as e:
164
  logging.exception(f"An error occurred: {e}")
165
  gr.Error(f"An error occurred: {e}")
166
- highlighted_text_predicted = {"text": "", "entities": []}
167
- return json_example, original_text, highlighted_text, highlighted_text_predicted
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
 
170
  with gr.Blocks(title="Hallucinations Explorer") as demo:
@@ -186,8 +311,13 @@ _SHROOM '25: Detection of Hallucinated Content_
186
  color_map={"+": "red", "-": "blue", "hal": "red"},
187
  combine_adjacent=True,
188
  )
189
- highlighted_text_predicted = gr.HighlightedText(
190
- label="Predicted Hallucinations",
 
 
 
 
 
191
  color_map={"+": "red", "-": "blue", "hal": "red"},
192
  combine_adjacent=True,
193
  )
@@ -200,7 +330,8 @@ _SHROOM '25: Detection of Hallucinated Content_
200
  json_example,
201
  original_text,
202
  highlighted_text,
203
- highlighted_text_predicted,
 
204
  ],
205
  )
206
 
@@ -213,15 +344,40 @@ _SHROOM '25: Detection of Hallucinated Content_
213
  type="text",
214
  )
215
 
216
- manual_input_highlighted_text = gr.HighlightedText(
217
- label="Predicted Hallucinations",
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  color_map={"+": "red", "-": "blue", "hal": "red"},
219
  combine_adjacent=True,
220
  )
 
221
  model_manual_input.change(
222
- predict_hallucinations,
223
- inputs=[model_manual_input],
224
- outputs=[manual_input_highlighted_text],
 
 
 
 
 
 
 
 
 
 
 
225
  )
226
 
227
  demo.launch(show_error=True)
 
91
  ]
92
 
93
 
94
+ roberta_base_predictor = transformers.AutoModelForTokenClassification.from_pretrained(
95
  "shroom-semeval25/cogumelo-hallucinations-detector-roberta-base"
96
  )
97
+ roberta_base_tokenizer = transformers.AutoTokenizer.from_pretrained(
98
  "shroom-semeval25/cogumelo-hallucinations-detector-roberta-base"
99
  )
100
+ roberta_large_qa_predictor = (
101
+ transformers.AutoModelForTokenClassification.from_pretrained(
102
+ "shroom-semeval25/cogumelo-hallucinations-detector-roberta-large-qa-15000"
 
 
 
 
 
 
103
  )
104
+ )
105
 
106
+
107
+ def mark_hallucinations(logits, hallucinated_text, offsets):
 
 
 
 
108
  # Get the highest value for each token
109
+ predictions = logits.argmax(dim=-1).squeeze(0).tolist()
110
  entities = []
111
  current_entity = None
112
  for i, prediction in enumerate(predictions):
 
120
  entities.append(current_entity)
121
  current_entity = {
122
  "entity": "hal",
123
+ "start": offsets[i][0],
124
+ "end": offsets[i][1] + 1,
125
  }
126
  if prediction == 2:
127
  if current_entity is None:
128
  current_entity = {
129
  "entity": "hal",
130
+ "start": offsets[i][0],
131
+ "end": offsets[i][1] + 1,
132
  }
133
  else:
134
+ current_entity["end"] = offsets[i][1] + 1
 
 
135
  if current_entity is not None:
136
  entities.append(current_entity)
137
  return {
 
146
  # Run the two functions
147
  json_example, original_text, highlighted_text = show_hallucinations(element)
148
  try:
149
+ hallucinated_tokens = roberta_base_tokenizer(
150
+ text=element["hallucinated_answer_generated"],
151
+ return_offsets_mapping=True,
152
+ add_special_tokens=True,
153
+ return_tensors="pt",
154
+ return_special_tokens_mask=True,
155
+ )
156
+ q_a_tokens = roberta_base_tokenizer(
157
+ # We have to batch into a single-example batch, because otherwise the tokenizer will interpret that the second element of the pair is example #2 of the batch (while actually it is the second part of the pair of example #1)
158
+ text=[(element["question"], element["hallucinated_answer_generated"])],
159
+ return_offsets_mapping=True,
160
+ add_special_tokens=True,
161
+ return_tensors="pt",
162
+ return_special_tokens_mask=True,
163
+ )
164
+ with torch.no_grad():
165
+ outputs_roberta_base = roberta_base_predictor(
166
+ input_ids=hallucinated_tokens.input_ids,
167
+ attention_mask=hallucinated_tokens.attention_mask,
168
+ )
169
+ # Take only the outputs that are NOT special tokens and where the attention mask is 1
170
+ logits_roberta_base = outputs_roberta_base.logits[
171
+ ...,
172
+ (hallucinated_tokens.special_tokens_mask == 0)
173
+ & (hallucinated_tokens.attention_mask == 1),
174
+ :,
175
+ ]
176
+ outputs_roberta_large_qa = roberta_large_qa_predictor(
177
+ input_ids=q_a_tokens.input_ids,
178
+ attention_mask=q_a_tokens.attention_mask,
179
+ )
180
+ # Take only the outputs after the first special token and where the attention mask is 1 and the special tokens mask is 0
181
+ logits_roberta_large_qa = outputs_roberta_large_qa.logits[
182
+ ...,
183
+ (q_a_tokens.special_tokens_mask.cumsum(dim=-1) > 1)
184
+ & (q_a_tokens.attention_mask == 1)
185
+ & (q_a_tokens.special_tokens_mask == 0),
186
+ :,
187
+ ]
188
+
189
+ highlighted_text_predicted_roberta_base = mark_hallucinations(
190
+ hallucinated_text=element["hallucinated_answer_generated"],
191
+ logits=logits_roberta_base,
192
+ # Discard the first token, which is the BOS token
193
+ offsets=hallucinated_tokens["offset_mapping"][0][1:],
194
+ )
195
+ # How much should we remove from the offset_mapping?
196
+ # The length of the tokens before the first special token
197
+ number_of_offsets_to_remove = (
198
+ q_a_tokens.special_tokens_mask.cumsum(dim=-1) <= 1
199
+ ).sum()
200
+ highlighted_text_predicted_roberta_large_qa = mark_hallucinations(
201
+ hallucinated_text=element["hallucinated_answer_generated"],
202
+ logits=logits_roberta_large_qa,
203
+ # Discard the first two tokens, which are the separators between the question and the answer
204
+ offsets=q_a_tokens["offset_mapping"][0][number_of_offsets_to_remove + 2 :],
205
  )
206
  except Exception as e:
207
  logging.exception(f"An error occurred: {e}")
208
  gr.Error(f"An error occurred: {e}")
209
+ highlighted_text_predicted_roberta_base = {"text": "", "entities": []}
210
+ highlighted_text_predicted_roberta_large_qa = {"text": "", "entities": []}
211
+ return (
212
+ json_example,
213
+ original_text,
214
+ highlighted_text,
215
+ highlighted_text_predicted_roberta_base,
216
+ highlighted_text_predicted_roberta_large_qa,
217
+ )
218
+
219
+
220
+ def predict_hallucinations_manual_input_roberta_base(text: str):
221
+ hallucinated_tokens = roberta_base_tokenizer(
222
+ text=text,
223
+ return_offsets_mapping=True,
224
+ add_special_tokens=True,
225
+ return_tensors="pt",
226
+ return_special_tokens_mask=True,
227
+ )
228
+ with torch.no_grad():
229
+ outputs_roberta_base = roberta_base_predictor(
230
+ input_ids=hallucinated_tokens.input_ids,
231
+ attention_mask=hallucinated_tokens.attention_mask,
232
+ )
233
+ logits_roberta_base = outputs_roberta_base.logits[
234
+ ...,
235
+ (hallucinated_tokens.special_tokens_mask == 0)
236
+ & (hallucinated_tokens.attention_mask == 1),
237
+ :,
238
+ ]
239
+ highlighted_text_predicted_roberta_base = mark_hallucinations(
240
+ hallucinated_text=text,
241
+ logits=logits_roberta_base,
242
+ offsets=hallucinated_tokens["offset_mapping"][0][1:],
243
+ )
244
+ return highlighted_text_predicted_roberta_base
245
+
246
+
247
+ def predict_hallucinations_manual_input_roberta_qa_large(text: str, question: str = ""):
248
+ q_a_tokens = roberta_base_tokenizer(
249
+ text=[(question, text)],
250
+ return_offsets_mapping=True,
251
+ add_special_tokens=True,
252
+ return_tensors="pt",
253
+ return_special_tokens_mask=True,
254
+ )
255
+ with torch.no_grad():
256
+ outputs_roberta_large_qa = roberta_large_qa_predictor(
257
+ input_ids=q_a_tokens.input_ids,
258
+ attention_mask=q_a_tokens.attention_mask,
259
+ )
260
+ number_of_offsets_to_remove = (
261
+ q_a_tokens.special_tokens_mask.cumsum(dim=-1) <= 1
262
+ ).sum()
263
+ logits_roberta_large_qa = outputs_roberta_large_qa.logits[
264
+ ...,
265
+ (q_a_tokens.special_tokens_mask.cumsum(dim=-1) > 1)
266
+ & (q_a_tokens.attention_mask == 1)
267
+ & (q_a_tokens.special_tokens_mask == 0),
268
+ :,
269
+ ]
270
+ highlighted_text_predicted_roberta_large_qa = mark_hallucinations(
271
+ hallucinated_text=text,
272
+ logits=logits_roberta_large_qa,
273
+ offsets=q_a_tokens["offset_mapping"][0][number_of_offsets_to_remove + 2 :],
274
+ )
275
+ return highlighted_text_predicted_roberta_large_qa
276
+
277
+
278
+ def predict_hallucinations_manual_input(text: str, question: str = ""):
279
+ empty_response = {"text": "", "entities": []}
280
+ # If the text is empty, return nothing
281
+ if not text:
282
+ return empty_response, empty_response
283
+ # If the text is not empty, we can get the hallucinations with the RoBERTa Base model
284
+ roberta_base_prediction = predict_hallucinations_manual_input_roberta_base(text)
285
+ # If the question is empty, we can't use the RoBERTa Large QA model
286
+ if not question:
287
+ return roberta_base_prediction, empty_response
288
+ # If the question is not empty, we can use the RoBERTa Large QA model
289
+ roberta_large_qa_prediction = predict_hallucinations_manual_input_roberta_qa_large(
290
+ text, question
291
+ )
292
+ return roberta_base_prediction, roberta_large_qa_prediction
293
 
294
 
295
  with gr.Blocks(title="Hallucinations Explorer") as demo:
 
311
  color_map={"+": "red", "-": "blue", "hal": "red"},
312
  combine_adjacent=True,
313
  )
314
+ highlighted_text_predicted_roberta_base = gr.HighlightedText(
315
+ label="Predicted Hallucinations (RoBERTa Base)",
316
+ color_map={"+": "red", "-": "blue", "hal": "red"},
317
+ combine_adjacent=True,
318
+ )
319
+ highlighted_text_predicted_roberta_large_qa = gr.HighlightedText(
320
+ label="Predicted Hallucinations (RoBERTa Large QA)",
321
  color_map={"+": "red", "-": "blue", "hal": "red"},
322
  combine_adjacent=True,
323
  )
 
330
  json_example,
331
  original_text,
332
  highlighted_text,
333
+ highlighted_text_predicted_roberta_base,
334
+ highlighted_text_predicted_roberta_large_qa,
335
  ],
336
  )
337
 
 
344
  type="text",
345
  )
346
 
347
+ model_question_input = gr.Textbox(
348
+ value="",
349
+ label="Question (only for RoBERTa Large QA)",
350
+ placeholder="Type the question here",
351
+ type="text",
352
+ )
353
+
354
+ manual_input_highlighted_text_roberta_base = gr.HighlightedText(
355
+ label="Predicted Hallucinations (RoBERTa Base)",
356
+ color_map={"+": "red", "-": "blue", "hal": "red"},
357
+ combine_adjacent=True,
358
+ )
359
+
360
+ manual_input_highlighted_text_roberta_large_qa = gr.HighlightedText(
361
+ label="Predicted Hallucinations (RoBERTa Large QA)",
362
  color_map={"+": "red", "-": "blue", "hal": "red"},
363
  combine_adjacent=True,
364
  )
365
+
366
  model_manual_input.change(
367
+ predict_hallucinations_manual_input,
368
+ inputs=[model_manual_input, model_question_input],
369
+ outputs=[
370
+ manual_input_highlighted_text_roberta_base,
371
+ manual_input_highlighted_text_roberta_large_qa,
372
+ ],
373
+ )
374
+
375
+ model_question_input.change(
376
+ predict_hallucinations_manual_input_roberta_qa_large,
377
+ inputs=[model_manual_input, model_question_input],
378
+ outputs=[
379
+ manual_input_highlighted_text_roberta_large_qa,
380
+ ],
381
  )
382
 
383
  demo.launch(show_error=True)