Spaces:

shroom-semeval25
/

cogumelo-visualizer

Sleeping

App Files Files Community

acmc commited on Nov 13, 2024

Commit

65d4803

verified ·

1 Parent(s): 6a4d4fc

Upload visualize.py

Browse files

Files changed (1) hide show

visualize.py +208 -0

visualize.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import gradio as gr
+import datasets
+import difflib
+import transformers
+import torch
+import logging
+tokenizer = transformers.AutoTokenizer.from_pretrained("google/flan-t5-base")
+dataset = (
+    datasets.load_dataset(
+        "shroom-semeval25/hallucinated_answer_generated_dataset",
+        split="test",
+    )
+    .take(10000)
+    .to_pandas()
+    .sort_values("question")
+)
+# Show columns in this order: question, correct_answer_generated, hallucinated_answer_generated, everything else
+dataset = dataset[
+    ["question", "correct_answer_generated", "hallucinated_answer_generated"]
+    + [
+        col
+        for col in dataset.columns
+        if col
+        not in ["question", "correct_answer_generated", "hallucinated_answer_generated"]
+    ]
+]
+def show_hallucinations(evt: gr.SelectData):
+    selected_row = evt.index[0]
+    element = dataset.iloc[selected_row]
+    original_text = element["correct_answer_generated"]
+    hallucinated_text = element["hallucinated_answer_generated"]
+    # tokenize both texts
+    original_tokens = tokenizer(
+        original_text, return_offsets_mapping=True, add_special_tokens=False
+    )
+    hallucinated_tokens = tokenizer(
+        hallucinated_text, return_offsets_mapping=True, add_special_tokens=False
+    )
+    # Find the tokens that are different. We have two lists of numbers, we need to find the differences (mind the order)
+    diff = difflib.SequenceMatcher(
+        None,
+        original_tokens["input_ids"],
+        hallucinated_tokens["input_ids"],
+    ).get_opcodes()
+    entities = []
+    # Follows this structure:
+    # {
+    #     "entity": "+" or "-",
+    #     "start": 0,
+    #     "end": 0,
+    # }
+    for tag, i1, i2, j1, j2 in diff:
+        try:
+            if tag == "equal":
+                continue
+            # Anything that is not equal is a hallucination
+            start_char = hallucinated_tokens["offset_mapping"][j1][0]
+            end_char = hallucinated_tokens["offset_mapping"][j2 - 1][1] + 1
+            entity = {
+                "entity": "hal",
+                "start": start_char,
+                "end": end_char,
+            }
+            # entity_2 = {
+            #     "entity": "-",
+            #     "start": start,
+            #     "end": end,
+            # }
+            entities.append(entity)
+            # entities.append(entity_2)
+        except IndexError as e:
+            gr.Error(f"There was an error in the tokenization process: {e}")
+    return [
+        {
+            "calculated_diffs": diff,
+            "tokenized_original": original_tokens,
+            "tokenized_hallucinated": hallucinated_tokens,
+            **element.to_dict(),
+        },
+        element["correct_answer_generated"],
+        {
+            "text": hallucinated_text,
+            "entities": entities,
+        },
+    ]
+prediction_model = transformers.AutoModelForTokenClassification.from_pretrained(
+    "shroom-semeval25/cogumelo-hallucinations-detector-roberta-base"
+)
+prediction_tokenizer = transformers.AutoTokenizer.from_pretrained(
+    "shroom-semeval25/cogumelo-hallucinations-detector-roberta-base"
+)
+def predict_hallucinations(evt: gr.SelectData):
+    """The model will return 0 if it's not a hallucination, 1 if it is the beginning of a hallucination, and 2 if it's the continuation of a hallucination"""
+    selected_row = evt.index[0]
+    element = dataset.iloc[selected_row]
+    hallucinated_text = element["hallucinated_answer_generated"]
+    hallucinated_tokens = prediction_tokenizer(
+        hallucinated_text,
+        return_offsets_mapping=True,
+        add_special_tokens=True,
+        return_tensors="pt",
+    )
+    inputs = {
+        "input_ids": hallucinated_tokens["input_ids"],
+        "attention_mask": hallucinated_tokens["attention_mask"],
+    }
+    with torch.no_grad():
+        outputs = prediction_model(**inputs)
+    # Get the highest value for each token
+    predictions = outputs.logits.argmax(dim=-1).squeeze(0).tolist()
+    entities = []
+    current_entity = None
+    for i, prediction in enumerate(predictions):
+        if prediction == 0:
+            if current_entity is not None:
+                entities.append(current_entity)
+                current_entity = None
+            continue
+        if prediction == 1:
+            if current_entity is not None:
+                entities.append(current_entity)
+            current_entity = {
+                "entity": "hal",
+                "start": hallucinated_tokens["offset_mapping"][0][i][0],
+                "end": hallucinated_tokens["offset_mapping"][0][i][1] + 1,
+            }
+        if prediction == 2:
+            if current_entity is None:
+                current_entity = {
+                    "entity": "hal",
+                    "start": hallucinated_tokens["offset_mapping"][0][i][0],
+                    "end": hallucinated_tokens["offset_mapping"][0][i][1] + 1,
+                }
+            else:
+                current_entity["end"] = (
+                    hallucinated_tokens["offset_mapping"][0][i][1] + 1
+                )
+    if current_entity is not None:
+        entities.append(current_entity)
+    return {
+        "text": hallucinated_text,
+        "entities": entities,
+    }
+def update_selection(evt: gr.SelectData):
+    # Run the two functions
+    json_example, original_text, highlighted_text = show_hallucinations(evt)
+    try:
+        highlighted_text_predicted = predict_hallucinations(evt)
+    except Exception as e:
+        logging.exception(f"An error occurred: {e}")
+        gr.Error(f"An error occurred: {e}")
+        highlighted_text_predicted = {"text": "", "entities": []}
+    return json_example, original_text, highlighted_text, highlighted_text_predicted
+with gr.Blocks(title="Hallucinations Explorer") as demo:
+    # A selectable dataframe with the dataset
+    # print(dataset)
+    gr.Markdown(
+        """# Cogumelo
+_SHROOM '25: Detection of Hallucinated Content_
+⚠️ These rows are part of the **test set** of the dataset, not the entire dataset (the model has therefore not seen them)"""
+    )
+    df = gr.Dataframe(dataset)
+    original_text = gr.Textbox(label="Original Text")
+    highlighted_text = gr.HighlightedText(
+        label="Real Hallucinations (ground truth)",
+        color_map={"+": "red", "-": "blue", "hal": "red"},
+        combine_adjacent=True,
+    )
+    highlighted_text_predicted = gr.HighlightedText(
+        label="Predicted Hallucinations",
+        color_map={"+": "red", "-": "blue", "hal": "red"},
+        combine_adjacent=True,
+    )
+    json_example = gr.JSON()
+    df.select(
+        update_selection,
+        inputs=[],
+        outputs=[
+            json_example,
+            original_text,
+            highlighted_text,
+            highlighted_text_predicted,
+        ],
+    )
+demo.launch(show_error=True)