Spaces:

Sina1138
/

ReView

Sleeping

App Files Files Community

Sina1138 commited on Jul 7

Commit

6fe7180

0 Parent(s):

Super-squash branch 'main' using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +36 -0
.gitignore +2 -0
.gitmodules +3 -0
README.md +13 -0
glimpse-ui/.gitignore +362 -0
glimpse-ui/LICENSE +21 -0
glimpse-ui/alternative_polarity/deberta/deberta_v3_base_polarity.py +95 -0
glimpse-ui/alternative_polarity/deberta/deberta_v3_base_polarity_train.py +98 -0
glimpse-ui/alternative_polarity/manual_polarity_tester.py +65 -0
glimpse-ui/alternative_polarity/scideberta/scideberta_full_polarity.py +79 -0
glimpse-ui/alternative_polarity/scideberta/scideberta_full_polarity_train.py +108 -0
glimpse-ui/alternative_topic/debetra/deberta_topic.py +92 -0
glimpse-ui/alternative_topic/debetra/deberta_topic_train.py +80 -0
glimpse-ui/alternative_topic/scideberta/scideberta_topic.py +92 -0
glimpse-ui/alternative_topic/scideberta/scideberta_topic_train.py +80 -0
glimpse-ui/data/ExtractDISAPEREData.py +106 -0
glimpse-ui/glimpse/.gitignore +203 -0
glimpse-ui/glimpse/Readme.md +69 -0
glimpse-ui/glimpse/examples/RSA Sum tests.ipynb +189 -0
glimpse-ui/glimpse/examples/reviews/reviews_app.py +274 -0
glimpse-ui/glimpse/examples/reviews/reviews_latex_generation.py +272 -0
glimpse-ui/glimpse/glimpse/baselines/generate_llm_summaries.py +112 -0
glimpse-ui/glimpse/glimpse/baselines/sumy_baselines.py +129 -0
glimpse-ui/glimpse/glimpse/data_loading/Glimpse_tokenizer.py +74 -0
glimpse-ui/glimpse/glimpse/data_loading/data_processing.py +15 -0
glimpse-ui/glimpse/glimpse/data_loading/generate_abstractive_candidates.py +230 -0
glimpse-ui/glimpse/glimpse/data_loading/generate_extractive_candidates.py +129 -0
glimpse-ui/glimpse/glimpse/evaluate/Evaluate informativeness.ipynb +258 -0
glimpse-ui/glimpse/glimpse/evaluate/evaluate_bartbert_metrics.py +110 -0
glimpse-ui/glimpse/glimpse/evaluate/evaluate_common_metrics_samples.py +122 -0
glimpse-ui/glimpse/glimpse/evaluate/evaluate_seahorse_metrics_samples.py +150 -0
glimpse-ui/glimpse/glimpse/src/beam_rsa_decoding.py +207 -0
glimpse-ui/glimpse/glimpse/src/compute_rsa.py +137 -0
glimpse-ui/glimpse/glimpse/src/rsa_merge_into_single.py +52 -0
glimpse-ui/glimpse/glimpse/src/rsa_reranking.py +127 -0
glimpse-ui/glimpse/mds/Single summaries expes.ipynb +587 -0
glimpse-ui/glimpse/mds/Template summaries.ipynb +531 -0
glimpse-ui/glimpse/mds/discriminative_classification.py +113 -0
glimpse-ui/glimpse/pyproject.toml +21 -0
glimpse-ui/glimpse/requirements +10 -0
glimpse-ui/glimpse/rsasumm/__init__.py +0 -0
glimpse-ui/glimpse/rsasumm/beam_search.py +430 -0
glimpse-ui/glimpse/rsasumm/rsa_reranker.py +280 -0
glimpse-ui/glimpse/scripts/abstractive.sh +37 -0
glimpse-ui/glimpse/scripts/extractive.sh +31 -0
glimpse-ui/glimpse_pk_csv_converter.py +92 -0
glimpse-ui/interface/Demo.py +800 -0
glimpse-ui/scibert/scibert_polarity/final_model/config.json +35 -0
glimpse-ui/scibert/scibert_polarity/final_model/model.safetensors +3 -0
glimpse-ui/scibert/scibert_polarity/final_model/special_tokens_map.json +7 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+glimpse-ui/data/preprocessed_scored_reviews.csv filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ ./scibert/*
2	+ ./alternative_*/

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "glimpse-ui"]
+	path = glimpse-ui
+	url = https://github.com/Sina1138/glimpse-ui.git

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: GlimpSys
+emoji: 📊
+colorFrom: yellow
+colorTo: red
+sdk: gradio
+sdk_version: 5.35.0
+app_file: glimpse-ui/interface/Demo.py
+pinned: false
+license: mit
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

glimpse-ui/.gitignore ADDED Viewed

	@@ -0,0 +1,362 @@

+# project ignores
+glimpse/
+data/DISAPERE-main/
+*checkpoints/
+.gradio/
+test.py
+data/*
+final_model/
+alternative_polarity/llama/
+!data/ExtractDISAPEREData.py
+!data/preprocessed_scored_reviews.csv
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+data/DISAPERE_test.py

glimpse-ui/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Sina Salmannia
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

glimpse-ui/alternative_polarity/deberta/deberta_v3_base_polarity.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import pandas as pd
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from pathlib import Path
+import nltk
+from tqdm import tqdm
+import sys, os.path
+from torch.nn import functional as F
+nltk.download('punkt')
+BASE_DIR = Path(__file__).resolve().parent.parent.parent
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
+from glimpse.glimpse.data_loading.Glimpse_tokenizer import glimpse_tokenizer
+# === CONFIGURATION ===
+MODEL_DIR = BASE_DIR / "alternative_polarity" / "deberta" / "deberta_v3_base_polarity_final_model"
+DATA_DIR = BASE_DIR / "glimpse" / "data" / "processed"
+OUTPUT_DIR = BASE_DIR / "data" / "polarity_scored"
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+# === Load model and tokenizer ===
+tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)
+model.eval()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# === Tokenize like GLIMPSE ===
+# def tokenize_sentences(text: str) -> list:
+#     # same tokenization as in the original glimpse code
+#     text = text.replace('-----', '\n')
+#     sentences = nltk.sent_tokenize(text)
+#     sentences = [sentence for sentence in sentences if sentence != ""]
+#     return sentences
+# def predict_polarity(sentences):
+#     inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
+#     with torch.no_grad():
+#         outputs = model(**inputs)
+#         logits = outputs.logits
+#         temperature = 2.7  # Adjust temperature for scaling logits
+#         probs = F.softmax(logits / temperature, dim=-1)
+#         # Get probability of positive class
+#         polarity_scores = probs[:, 1]
+#         # Rescale: 0 → -1 (very negative), 1 → +1 (very positive)
+#         polarity_scores = (polarity_scores * 2) - 1
+#     return polarity_scores.cpu().tolist()
+def predict_polarity(sentences):
+    inputs = tokenizer(
+        sentences,
+        return_tensors="pt",
+        padding=True,
+        truncation=True,
+        max_length=512
+    ).to(device)
+    with torch.no_grad():
+        logits = model(**inputs).logits            # (batch, 2)
+        logit_diff = logits[:,1] - logits[:,0]
+        alpha = 2.1                               # tweak
+        scores = torch.tanh(alpha * logit_diff)   # in [-1,1]
+    return scores.cpu().tolist()
+def find_polarity(start_year=2017, end_year=2021):
+    for year in range(start_year, end_year + 1):
+        print(f"Processing {year}...")
+        input_path = DATA_DIR / f"all_reviews_{year}.csv"
+        output_path = OUTPUT_DIR / f"polarity_scored_reviews_{year}.csv"
+        df = pd.read_csv(input_path)
+        all_rows = []
+        for _, row in tqdm(df.iterrows(), total=len(df)):
+            review_id = row["id"]
+            text = row["text"]
+            sentences = glimpse_tokenizer(text)
+            if not sentences:
+                continue
+            labels = predict_polarity(sentences)
+            for sentence, polarity in zip(sentences, labels):
+                all_rows.append({"id": review_id, "sentence": sentence, "polarity": polarity})
+        output_df = pd.DataFrame(all_rows)
+        output_df.to_csv(output_path, index=False)
+        print(f"Saved polarity-scored data to {output_path}")
+if __name__ == "__main__":
+    find_polarity()

glimpse-ui/alternative_polarity/deberta/deberta_v3_base_polarity_train.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import pandas as pd
+from datasets import Dataset
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support
+import numpy as np
+import torch
+from torch.nn import functional as F
+from transformers import Trainer
+# Load data
+train_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_train.csv")
+dev_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_dev.csv")
+test_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_test.csv")
+# Convert to HuggingFace Datasets
+train_ds = Dataset.from_pandas(train_df)
+dev_ds = Dataset.from_pandas(dev_df)
+test_ds = Dataset.from_pandas(test_df)
+# Tokenize
+model_name = "microsoft/deberta-v3-base"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+def tokenize(batch):
+    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=512)
+train_ds = train_ds.map(tokenize, batched=True)
+dev_ds = dev_ds.map(tokenize, batched=True)
+test_ds = test_ds.map(tokenize, batched=True)
+# Set format for PyTorch
+train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
+dev_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
+test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
+# Load model
+model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
+# Compute class weights
+label_counts = train_df['label'].value_counts()
+total_samples = len(train_df)
+class_weights = torch.tensor([total_samples / (len(label_counts) * count) for count in label_counts.sort_index().values])
+class_weights = class_weights.to(dtype=torch.float32)
+print("Class weights:", class_weights)
+class WeightedTrainer(Trainer):
+    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
+        labels = inputs.pop("labels")
+        outputs = model(**inputs)
+        logits = outputs.logits
+        weights = class_weights.to(logits.device)
+        loss = F.cross_entropy(logits, labels, weight=weights)
+        return (loss, outputs) if return_outputs else loss
+# Metrics
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    preds = np.argmax(logits, axis=1)
+    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="macro")
+    acc = accuracy_score(labels, preds)
+    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}
+# Training arguments
+args = TrainingArguments(
+    output_dir="./alternative_polarity/deberta/checkpoints",
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    learning_rate=2e-5,
+    per_device_train_batch_size=4,
+    per_device_eval_batch_size=8,
+    num_train_epochs=4,
+    weight_decay=0.01,
+    load_best_model_at_end=True,
+    metric_for_best_model="f1"
+)
+# Trainer
+trainer = WeightedTrainer(
+    model=model,
+    args=args,
+    train_dataset=train_ds,
+    eval_dataset=dev_ds,
+    tokenizer=tokenizer,
+    compute_metrics=compute_metrics
+)
+# Train
+trainer.train()
+# Evaluate on test
+results = trainer.evaluate(test_ds)
+print("Test results:", results)
+# Save the model and tokenizer
+model.save_pretrained("./alternative_polarity/deberta/deberta_v3_base_polarity_final_model")
+tokenizer.save_pretrained("./alternative_polarity/deberta/deberta_v3_base_polarity_final_model")

glimpse-ui/alternative_polarity/manual_polarity_tester.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from pathlib import Path
+import sys, os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
+from glimpse.glimpse.data_loading.Glimpse_tokenizer import glimpse_tokenizer
+# === CONFIGURATION ===
+BASE_DIR = Path(__file__).resolve().parent.parent
+MODEL_DIR = BASE_DIR / "alternative_polarity" / "deberta" / "deberta_v3_large_polarity_final_model"
+# MODEL_DIR = BASE_DIR / "alternative_polarity" / "llama" / "final_model"
+# MODEL_DIR = BASE_DIR / "alternative_polarity" / "scideberta" / "scideberta_full_polarity_final_model"
+# --> Best so far: deberta_v3 (passes "pros" test)
+# === Load model and tokenizer ===
+tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)
+model.eval()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# === Prediction function with confidence ===
+def predict_polarity(sentences):
+    inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probs = F.softmax(outputs.logits, dim=1)
+        confidences, preds = torch.max(probs, dim=1)
+    results = []
+    for sentence, pred, conf, prob in zip(sentences, preds, confidences, probs):
+        results.append({
+            "sentence": sentence,
+            "label": "Positive" if pred.item() == 1 else "Negative",
+            "confidence": conf.item(),
+            "probs": prob.cpu().numpy().tolist()
+        })
+    return results
+# === Example: test a multi-sentence peer review ===
+if __name__ == "__main__":
+    # Replace this with your review
+    full_review = """
+    Pros:
+    Con: The experiments lack comparison with prior work.
+    The authors clearly explain their methodology, which is a strong point.
+    """
+    # Use glimpse tokenizer to split into sentences
+    sentences = glimpse_tokenizer(full_review)
+    # Run polarity prediction
+    results = predict_polarity(sentences)
+    # Display results
+    for res in results:
+        print(f"\nSentence: {res['sentence']}")
+        print(f" → Prediction: {res['label']} (Confidence: {res['confidence']:.3f})")
+        print(f"   Probabilities: [Negative: {res['probs'][0]:.3f}, Positive: {res['probs'][1]:.3f}]")

glimpse-ui/alternative_polarity/scideberta/scideberta_full_polarity.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import pandas as pd
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from pathlib import Path
+import nltk
+from tqdm import tqdm
+import sys, os.path
+from torch.nn import functional as F
+nltk.download('punkt')
+BASE_DIR = Path(__file__).resolve().parent.parent.parent
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
+from glimpse.glimpse.data_loading.Glimpse_tokenizer import glimpse_tokenizer
+# === CONFIGURATION ===
+MODEL_DIR = BASE_DIR / "alternative_polarity" / "scideberta" / "scideberta_full_polarity_final_model"
+DATA_DIR = BASE_DIR / "glimpse" / "data" / "processed"
+OUTPUT_DIR = BASE_DIR / "data" / "polarity_scored"
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+# === Load model and tokenizer ===
+tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)
+model.eval()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# === Tokenize like GLIMPSE ===
+# def tokenize_sentences(text: str) -> list:
+#     # same tokenization as in the original glimpse code
+#     text = text.replace('-----', '\n')
+#     sentences = nltk.sent_tokenize(text)
+#     sentences = [sentence for sentence in sentences if sentence != ""]
+#     return sentences
+def predict_polarity(sentences):
+    inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits
+        temperature = 2.7
+        probs = F.softmax(logits / temperature, dim=-1)
+        # Get probability of positive class
+        polarity_scores = probs[:, 1]
+        # Rescale: 0 → -1 (very negative), 1 → +1 (very positive)
+        polarity_scores = (polarity_scores * 2) - 1
+    return polarity_scores.cpu().tolist()
+def find_polarity(start_year=2017, end_year=2021):
+    for year in range(start_year, end_year + 1):
+        print(f"Processing {year}...")
+        input_path = DATA_DIR / f"all_reviews_{year}.csv"
+        output_path = OUTPUT_DIR / f"polarity_scored_reviews_{year}.csv"
+        df = pd.read_csv(input_path)
+        all_rows = []
+        for _, row in tqdm(df.iterrows(), total=len(df)):
+            review_id = row["id"]
+            text = row["text"]
+            sentences = glimpse_tokenizer(text)
+            if not sentences:
+                continue
+            labels = predict_polarity(sentences)
+            for sentence, polarity in zip(sentences, labels):
+                all_rows.append({"id": review_id, "sentence": sentence, "polarity": polarity})
+        output_df = pd.DataFrame(all_rows)
+        output_df.to_csv(output_path, index=False)
+        print(f"Saved polarity-scored data to {output_path}")
+if __name__ == "__main__":
+    find_polarity()

glimpse-ui/alternative_polarity/scideberta/scideberta_full_polarity_train.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import pandas as pd
+from datasets import Dataset
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support
+import numpy as np
+import torch
+from torch.nn import functional as F
+from transformers import Trainer
+class WeightedTrainer(Trainer):
+    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
+        labels = inputs.pop("labels")
+        outputs = model(**inputs)
+        logits = outputs.logits
+        weights = class_weights.to(logits.device)
+        loss = F.cross_entropy(logits, labels, weight=weights)
+        return (loss, outputs) if return_outputs else loss
+# Load data
+train_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_train.csv")
+dev_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_dev.csv")
+test_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_test.csv")
+# Convert to HuggingFace Datasets
+train_ds = Dataset.from_pandas(train_df)
+dev_ds = Dataset.from_pandas(dev_df)
+test_ds = Dataset.from_pandas(test_df)
+model_name = "KISTI-AI/Scideberta-full"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+def tokenize(batch):
+    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=512)
+train_ds = train_ds.map(tokenize, batched=True)
+dev_ds = dev_ds.map(tokenize, batched=True)
+test_ds = test_ds.map(tokenize, batched=True)
+# Set format for PyTorch
+train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
+dev_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
+test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
+# Load model
+model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
+# Compute class weights
+label_counts = train_df['label'].value_counts()
+total_samples = len(train_df)
+class_weights = torch.tensor([total_samples / (len(label_counts) * count) for count in label_counts.sort_index().values])
+class_weights = class_weights.to(dtype=torch.float32)
+print("Class weights:", class_weights)
+class WeightedTrainer(Trainer):
+    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
+        labels = inputs.pop("labels")
+        outputs = model(**inputs)
+        logits = outputs.logits
+        weights = class_weights.to(logits.device)
+        loss = F.cross_entropy(logits, labels, weight=weights)
+        return (loss, outputs) if return_outputs else loss
+# Metrics
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    preds = np.argmax(logits, axis=1)
+    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="macro")
+    acc = accuracy_score(labels, preds)
+    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}
+# Training arguments
+args = TrainingArguments(
+    output_dir="./alternative_polarity/scideberta/checkpoints",
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    learning_rate=2e-5,
+    per_device_train_batch_size=4,
+    per_device_eval_batch_size=8,
+    num_train_epochs=4,
+    weight_decay=0.01,
+    load_best_model_at_end=True,
+    metric_for_best_model="f1"
+)
+# Trainer
+trainer = WeightedTrainer(
+    model=model,
+    args=args,
+    train_dataset=train_ds,
+    eval_dataset=dev_ds,
+    tokenizer=tokenizer,
+    compute_metrics=compute_metrics
+)
+# Train
+trainer.train()
+# Evaluate on test
+results = trainer.evaluate(test_ds)
+print("Test results:", results)
+# Save the model and tokenizer
+model.save_pretrained("./alternative_polarity/scideberta/scideberta_full_polarity_final_model")
+tokenizer.save_pretrained("./alternative_polarity/scideberta/scideberta_full_polarity_final_model")

glimpse-ui/alternative_topic/debetra/deberta_topic.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import pandas as pd
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from pathlib import Path
+import nltk
+from tqdm import tqdm
+import sys, os.path
+nltk.download('punkt')
+BASE_DIR = Path(__file__).resolve().parent.parent.parent
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
+from glimpse.glimpse.data_loading.Glimpse_tokenizer import glimpse_tokenizer
+# === CONFIGURATION ===
+MODEL_DIR = BASE_DIR / "alternative_topic" / "deberta" / "final_model"
+DATA_DIR = BASE_DIR / "glimpse" / "data" / "processed"
+OUTPUT_DIR = BASE_DIR / "data" / "topic_scored"
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+# === Load model and tokenizer ===
+tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)
+model.eval()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# === Tokenize like GLIMPSE ===
+# def tokenize_sentences(text: str) -> list:
+#     # same tokenization as in the original glimpse code
+#     text = text.replace('-----', '\n')
+#     sentences = nltk.sent_tokenize(text)
+#     sentences = [sentence for sentence in sentences if sentence != ""]
+#     return sentences
+# === Label map (optional: for human-readable output) ===
+id2label = {
+    # 0: "Evaluative",
+    # 1: "Structuring",
+    # 2: "Request",
+    # 3: "Fact",
+    # 4: "Social",
+    # 5: "Other",
+    0: "Substance",
+    1: "Clarity",
+    2: "Soundness/Correctness",
+    3: "Originality",
+    4: "Motivation/Impact",
+    5: "Meaningful Comparison",
+    6: "Replicability",
+    7: "NONE"  # This is used for sentences that do not match any specific topic
+}
+def predict_topic(sentences):
+    inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        predictions = torch.argmax(outputs.logits, dim=1).cpu().tolist()
+    # Convert predictions to human-readable labels
+    predictions = [id2label[pred] for pred in predictions]
+    return predictions
+def find_topic(start_year=2017, end_year=2021):
+    for year in range(start_year, end_year + 1):
+        print(f"Processing {year}...")
+        input_path = DATA_DIR / f"all_reviews_{year}.csv"
+        output_path = OUTPUT_DIR / f"topic_scored_reviews_{year}.csv"
+        df = pd.read_csv(input_path)
+        all_rows = []
+        for _, row in tqdm(df.iterrows(), total=len(df)):
+            review_id = row["id"]
+            text = row["text"]
+            sentences = glimpse_tokenizer(text)
+            if not sentences:
+                continue
+            labels = predict_topic(sentences)
+            for sentence, topic in zip(sentences, labels):
+                all_rows.append({"id": review_id, "sentence": sentence, "topic": topic})
+        output_df = pd.DataFrame(all_rows)
+        output_df.to_csv(output_path, index=False)
+        print(f"Saved topic-scored data to {output_path}")
+if __name__ == "__main__":
+    find_topic()

glimpse-ui/alternative_topic/debetra/deberta_topic_train.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import pandas as pd
+from datasets import Dataset
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support
+import numpy as np
+# Load data
+dev_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_topic_dev.csv")
+train_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_topic_train.csv")
+test_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_topic_test.csv")
+# Convert to HuggingFace Datasets
+train_ds = Dataset.from_pandas(train_df)
+dev_ds = Dataset.from_pandas(dev_df)
+test_ds = Dataset.from_pandas(test_df)
+# Tokenize
+model_name = "microsoft/deberta-v3-base"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+def tokenize(batch):
+    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=256)
+train_ds = train_ds.map(tokenize, batched=True)
+dev_ds = dev_ds.map(tokenize, batched=True)
+test_ds = test_ds.map(tokenize, batched=True)
+# Set format for PyTorch
+train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
+dev_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
+test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
+print(train_df['label'].value_counts().sort_index())
+# Load model
+model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=8)
+# Metrics
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    preds = np.argmax(logits, axis=1)
+    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="macro")
+    acc = accuracy_score(labels, preds)
+    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}
+# Training arguments
+args = TrainingArguments(
+    output_dir="./alternative_topic/deberta/checkpoints",
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    learning_rate=2e-5,
+    per_device_train_batch_size=8,
+    per_device_eval_batch_size=16,
+    num_train_epochs=4,
+    weight_decay=0.01,
+    load_best_model_at_end=True,
+    metric_for_best_model="f1"
+)
+# Trainer
+trainer = Trainer(
+    model=model,
+    args=args,
+    train_dataset=train_ds,
+    eval_dataset=dev_ds,
+    tokenizer=tokenizer,
+    compute_metrics=compute_metrics
+)
+# Train
+trainer.train()
+# Evaluate on test
+results = trainer.evaluate(test_ds)
+print("Test results:", results)
+# Save the model and tokenizer
+model.save_pretrained("./alternative_topic/deberta/final_model")
+tokenizer.save_pretrained("./alternative_topic/deberta/final_model")

glimpse-ui/alternative_topic/scideberta/scideberta_topic.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import pandas as pd
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from pathlib import Path
+import nltk
+from tqdm import tqdm
+import sys, os.path
+nltk.download('punkt')
+BASE_DIR = Path(__file__).resolve().parent.parent.parent
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
+from glimpse.glimpse.data_loading.Glimpse_tokenizer import glimpse_tokenizer
+# === CONFIGURATION ===
+MODEL_DIR = BASE_DIR / "alternative_topic" / "scideberta" / "final_model"
+DATA_DIR = BASE_DIR / "glimpse" / "data" / "processed"
+OUTPUT_DIR = BASE_DIR / "data" / "topic_scored"
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+# === Load model and tokenizer ===
+tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)
+model.eval()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# === Tokenize like GLIMPSE ===
+# def tokenize_sentences(text: str) -> list:
+#     # same tokenization as in the original glimpse code
+#     text = text.replace('-----', '\n')
+#     sentences = nltk.sent_tokenize(text)
+#     sentences = [sentence for sentence in sentences if sentence != ""]
+#     return sentences
+# === Label map (optional: for human-readable output) ===
+id2label = {
+    # 0: "Evaluative",
+    # 1: "Structuring",
+    # 2: "Request",
+    # 3: "Fact",
+    # 4: "Social",
+    # 5: "Other",
+    0: "Substance",
+    1: "Clarity",
+    2: "Soundness/Correctness",
+    3: "Originality",
+    4: "Motivation/Impact",
+    5: "Meaningful Comparison",
+    6: "Replicability",
+    7: "NONE"  # This is used for sentences that do not match any specific topic
+}
+def predict_topic(sentences):
+    inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        predictions = torch.argmax(outputs.logits, dim=1).cpu().tolist()
+    # Convert predictions to human-readable labels
+    predictions = [id2label[pred] for pred in predictions]
+    return predictions
+def find_topic(start_year=2017, end_year=2021):
+    for year in range(start_year, end_year + 1):
+        print(f"Processing {year}...")
+        input_path = DATA_DIR / f"all_reviews_{year}.csv"
+        output_path = OUTPUT_DIR / f"topic_scored_reviews_{year}.csv"
+        df = pd.read_csv(input_path)
+        all_rows = []
+        for _, row in tqdm(df.iterrows(), total=len(df)):
+            review_id = row["id"]
+            text = row["text"]
+            sentences = glimpse_tokenizer(text)
+            if not sentences:
+                continue
+            labels = predict_topic(sentences)
+            for sentence, topic in zip(sentences, labels):
+                all_rows.append({"id": review_id, "sentence": sentence, "topic": topic})
+        output_df = pd.DataFrame(all_rows)
+        output_df.to_csv(output_path, index=False)
+        print(f"Saved topic-scored data to {output_path}")
+if __name__ == "__main__":
+    find_topic()

glimpse-ui/alternative_topic/scideberta/scideberta_topic_train.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import pandas as pd
+from datasets import Dataset
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support
+import numpy as np
+# Load data
+dev_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_topic_dev.csv")
+train_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_topic_train.csv")
+test_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_topic_test.csv")
+# Convert to HuggingFace Datasets
+train_ds = Dataset.from_pandas(train_df)
+dev_ds = Dataset.from_pandas(dev_df)
+test_ds = Dataset.from_pandas(test_df)
+# Tokenize
+model_name = "KISTI-AI/Scideberta-full"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+def tokenize(batch):
+    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=256)
+train_ds = train_ds.map(tokenize, batched=True)
+dev_ds = dev_ds.map(tokenize, batched=True)
+test_ds = test_ds.map(tokenize, batched=True)
+# Set format for PyTorch
+train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
+dev_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
+test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
+print(train_df['label'].value_counts().sort_index())
+# Load model
+model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=8)
+# Metrics
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    preds = np.argmax(logits, axis=1)
+    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="macro")
+    acc = accuracy_score(labels, preds)
+    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}
+# Training arguments
+args = TrainingArguments(
+    output_dir="./alternative_topic/scideberta/checkpoints",
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    learning_rate=2e-5,
+    per_device_train_batch_size=8,
+    per_device_eval_batch_size=16,
+    num_train_epochs=4,
+    weight_decay=0.01,
+    load_best_model_at_end=True,
+    metric_for_best_model="f1"
+)
+# Trainer
+trainer = Trainer(
+    model=model,
+    args=args,
+    train_dataset=train_ds,
+    eval_dataset=dev_ds,
+    tokenizer=tokenizer,
+    compute_metrics=compute_metrics
+)
+# Train
+trainer.train()
+# Evaluate on test
+results = trainer.evaluate(test_ds)
+print("Test results:", results)
+# Save the model and tokenizer
+model.save_pretrained("./alternative_topic/scideberta/final_model")
+tokenizer.save_pretrained("./alternative_topic/scideberta/final_model")

glimpse-ui/data/ExtractDISAPEREData.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import os
+import json
+import pandas as pd
+from pathlib import Path
+BASE_DIR = Path(__file__).resolve().parent.parent
+base_path = BASE_DIR / "data" / "DISAPERE-main" / "DISAPERE" / "final_dataset"
+output_path = BASE_DIR / "data" / "DISAPERE-main" / "SELFExtractedData"
+###################################################################################
+###################################################################################
+# EXTRACTING POLARITY SENTENCES FROM DISAPERE DATASET
+# def extract_polarity_sentences(json_dir):
+#     data = []
+#     for filename in os.listdir(json_dir):
+#         if filename.endswith(".json"):
+#             with open(os.path.join(json_dir, filename), "r") as f:
+#                 thread = json.load(f)
+#                 for sentence in thread.get("review_sentences", []):
+#                     text = sentence.get("text", "").strip()
+#                     polarity = sentence.get("polarity")
+#                     if text:
+#                         if polarity == "pol_positive":
+#                             label = 2
+#                         elif polarity == "pol_negative":
+#                             label = 0
+#                         else:
+#                             label = 1
+#                         data.append({"text": text, "label": label})
+#     return pd.DataFrame(data)
+# # Extract and save each split
+# for split in ["train", "dev", "test"]:
+#     df = extract_polarity_sentences(os.path.join(base_path, split))
+#     out_file = os.path.join(output_path, f"disapere_polarity_{split}.csv")
+#     df.to_csv(out_file, index=False)
+#     print(f"{split.capitalize()} saved to {out_file}: {len(df)} samples")
+###################################################################################
+###################################################################################
+# 2. EXTRACTING TOPIC SENTENCES FROM DISAPERE DATASET
+#
+# === Topic Label Mapping ===
+# 1: "Structuring"
+# 0: "Evaluative"
+# 2: "Request"
+# 3: "Fact"
+# 4: "Social"
+# 5: "Other"
+# 6: "Substance"
+# 7: "Clarity"
+# 8: "Soundness/Correctness"
+# 9: "Originality"
+# 10: "Motivation/Impact"
+# 11: "Meaningful Comparison"
+# 12: "Replicability"
+# Final topic classes
+topic_classes = [
+    "asp_substance",
+    "asp_clarity",
+    "asp_soundness-correctness",
+    "asp_originality",
+    "asp_impact",
+    "asp_comparison",
+    "asp_replicability",
+    "None",  # This is used for sentences that do not match any specific topic
+    # "arg-structuring_summary"
+]
+label_map = {label: idx for idx, label in enumerate(topic_classes)}
+def extract_topic_sentences(json_dir):
+    data = []
+    for filename in os.listdir(json_dir):
+        if filename.endswith(".json"):
+            with open(os.path.join(json_dir, filename), "r") as f:
+                thread = json.load(f)
+                for sentence in thread.get("review_sentences", []):
+                    text = sentence.get("text", "").strip()
+                    aspect = sentence.get("aspect", "")
+                    # fine_action = sentence.get("fine_review_action", "")
+                    # Decide label source
+                    topic = aspect if aspect in label_map else "None"
+                    if text and topic in label_map:
+                        label = label_map[topic]
+                        data.append({"text": text, "label": label})
+    return pd.DataFrame(data)
+# Extract and save each split
+for split in ["train", "dev", "test"]:
+    df = extract_topic_sentences(os.path.join(base_path, split))
+    out_file = os.path.join(output_path, f"disapere_topic_{split}.csv")
+    df.to_csv(out_file, index=False)
+    print(f"{split.capitalize()} saved to {out_file}: {len(df)} samples")
+###################################################################################
+###################################################################################

glimpse-ui/glimpse/.gitignore ADDED Viewed

	@@ -0,0 +1,203 @@

+# Created by .ignore support plugin (hsz.mobi)
+### Python template
+# GLIMPSE
+#  Ignore all the data except orignial files
+data/*
+!data/
+summaries/
+output/
+slurm*
+!scripts/
+.gradio/
+.test/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# IPython Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# dotenv
+.env
+# virtualenv
+venv/
+ENV/
+# Spyder project settings
+.spyderproject
+# Rope project settings
+.ropeproject
+### VirtualEnv template
+# Virtualenv
+# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
+[Bb]in
+[Ii]nclude
+[Ll]ib
+[Ll]ib64
+[Ll]ocal
+[Ss]cripts
+pyvenv.cfg
+.venv
+pip-selfcheck.json
+### JetBrains template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+# AWS User-specific
+.idea/**/aws.xml
+# Generated files
+.idea/**/contentModel.xml
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+# CMake
+cmake-build-*/
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+# File-based project format
+*.iws
+# IntelliJ
+out/
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+# JIRA plugin
+atlassian-ide-plugin.xml
+# Cursive Clojure plugin
+.idea/replstate.xml
+# SonarLint plugin
+.idea/sonarlint/
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+# Editor-based Rest Client
+.idea/httpRequests
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+# idea folder, uncomment if you don't need it
+# .idea
+share/man/man1/isympy.1
+share/man/man1/ttx.1
+# IDEs
+.idea/
+.vscode/

glimpse-ui/glimpse/Readme.md ADDED Viewed

	@@ -0,0 +1,69 @@

+This is the repositotry of  GLIMPSE: Pragmatically Informative Multi-Document Summarization for Scholarly Reviews
+[Paper](https://arxiv.org/abs/2406.07359) | [Code](https://github.com/icannos/glimpse-mds)
+### Installation
+- We use python 3.10 and CUDA 12.1
+``` bash
+module load miniconda/3
+module load cuda12
+```
+- First, create a virtual environment using:
+``` bash
+conda create -n glimpse python=3.10
+```
+- Second, activate the environment and install pytorch:
+``` bash
+conda activate glimpse
+conda install pytorch==2.1.1 pytorch-cuda=12.1 -c pytorch -c nvidia
+```
+- Finally, all remaining required packages could be installed with the requirements file:
+``` bash
+pip install -r requirements
+```
+### Data Loading
+Step 1: Start by processing the input files from data.
+``` bash
+python glimpse/data_loading/data_processing.py
+```
+### Generating Summaries and Computing RSA Scores
+Step 2: Now, we generate candidate summaries and compute RSA scores for each candidate
+- for extractive candidates, use the following command:
+``` bash
+sbatch scripts/extractive.sh Path_of_Your_Processed_Dataset_Step1.csv
+```
+- for abstractive candidates, use either of the following commands:
+  - In case the last batch is incomplete, you can add padding using `--add-padding` argument to complete it:
+  ``` bash
+  sbatch scripts/abstractive.sh Path_of_Your_Processed_Dataset_Step1.csv --add-padding
+  ```
+  - If you want to remove the last incomplete batch, you can run the script without the argument:
+  ``` bash
+  sbatch scripts/abstractive.sh Path_of_Your_Processed_Dataset_Step1.csv
+  ```
+`rsasumm/` provides a python package with an implementation of RSA incremental decoding and RSA reranking of candidates.
+`mds/` provides the experiment scripts and analysis for the MultiDocument Summarization task.
+## Citation
+If you use this code, please cite the following papers:
+```@misc{darrin2024glimpsepragmaticallyinformativemultidocument,
+      title={GLIMPSE: Pragmatically Informative Multi-Document Summarization for Scholarly Reviews},
+      author={Maxime Darrin and Ines Arous and Pablo Piantanida and Jackie CK Cheung},
+      year={2024},
+      eprint={2406.07359},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2406.07359},
+}
+```

glimpse-ui/glimpse/examples/RSA Sum tests.ipynb ADDED Viewed

	@@ -0,0 +1,189 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2024-01-12T16:31:17.690349522Z",
+     "start_time": "2024-01-12T16:31:15.472874479Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "outputs": [],
+   "source": [
+    "%reload_ext autoreload\n",
+    "%autoreload 2"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-01-12T16:31:17.717430741Z",
+     "start_time": "2024-01-12T16:31:17.695066680Z"
+    }
+   },
+   "id": "ecefdad828c7daa3"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of BartForConditionalGeneration were not initialized from the model checkpoint at facebook/bart-large-cnn and are newly initialized: ['model.shared.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "from transformers import AutoTokenizer, BartForConditionalGeneration\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"facebook/bart-large-cnn\")\n",
+    "model = BartForConditionalGeneration.from_pretrained(\"facebook/bart-large-cnn\")\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-01-12T16:31:26.058437142Z",
+     "start_time": "2024-01-12T16:31:17.720106168Z"
+    }
+   },
+   "id": "8c32b182fbcac2b6"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "outputs": [],
+   "source": [
+    "from rsasumm.beam_search import RSAContextualDecoding"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-01-12T16:31:26.097766981Z",
+     "start_time": "2024-01-12T16:31:26.056626187Z"
+    }
+   },
+   "id": "cb33d902fe736c25"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "texts = ['The paper gives really interesting insights on the topic of transfer learning. It is well presented and the experiment are extensive. I believe the authors missed Jane and al 2021. In addition, I think, there is a mistake in the math.',\n",
+    "         'The paper gives really interesting insights on the topic of transfer learning. It is well presented and the experiment are extensive. However, some parts remain really unclear and I would like to see a more detailed explanation of the proposed method.',\n",
+    "         'The paper gives really interesting insights on the topic of transfer learning. It is not well presented and lack experiments. In addition, some parts remain really unclear and I would like to see a more detailed explanation of the proposed method.'\n",
+    "         ]\n",
+    "\n",
+    "# texts = [texts[2], texts[1], texts[0]]\n",
+    "\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-01-12T16:31:26.127922110Z",
+     "start_time": "2024-01-12T16:31:26.098805312Z"
+    }
+   },
+   "id": "436ef1482c361159"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "outputs": [],
+   "source": [
+    "source_texts = tokenizer(texts, return_tensors=\"pt\", padding=True)\n",
+    "\n",
+    "rsa = RSAContextualDecoding(model, tokenizer, 'cpu')\n",
+    "\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-01-12T16:31:26.169520864Z",
+     "start_time": "2024-01-12T16:31:26.125283164Z"
+    }
+   },
+   "id": "84b9943cac6cd7b2"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "outputs": [],
+   "source": [
+    "output = rsa.generate(target_id=1, source_texts_ids=source_texts.input_ids, source_text_attention_mask=source_texts.attention_mask, max_length=50, top_p=0.95, do_sample=True, rationality=8.0, temperature=1.0, process_logits_before_rsa=True)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-01-12T16:32:14.857034731Z",
+     "start_time": "2024-01-12T16:31:26.164578792Z"
+    }
+   },
+   "id": "620e54a63dd2099c"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "['Some parts of the paper remain unclear. I would like to see a more detailed explanation of the proposed method.',\n 'Some parts of the paper remain unclear. I would like to see a more detailed explanation of the proposed method.',\n 'Some parts of the paper remain unclear. I would like to see a more detailed explanation of the proposed method.',\n 'Some parts of the paper remain unclear. I would like to see a more detailed explanation of the proposed method.',\n 'Some parts of the paper remain unclear. I would like to see a more detailed explanation of the proposed method.',\n 'Some parts of the paper remain unclear. I would like to see a more detailed explanation of the proposed method.',\n 'Some parts of the paper remain unclear. I would like to see a more detailed explanation of the proposed method.',\n 'Some parts of the paper remain unclear. I would like to see a more detailed explanation of the proposed method.']"
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "tokenizer.batch_decode(output[0], skip_special_tokens=True)\n",
+    "\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-01-12T16:32:14.858531480Z",
+     "start_time": "2024-01-12T16:32:14.856763396Z"
+    }
+   },
+   "id": "fb3a5a9a8f9990ee"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

glimpse-ui/glimpse/examples/reviews/reviews_app.py ADDED Viewed

	@@ -0,0 +1,274 @@

+import math
+from typing import List, Tuple
+import nltk
+import numpy as np
+import seaborn as sns
+import sys, os.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+from rsasumm.rsa_reranker import RSAReranking
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import seaborn as sns
+import pandas as pd
+import matplotlib.pyplot as plt
+MODEL = "facebook/bart-large-cnn"
+model = AutoModelForSeq2SeqLM.from_pretrained(MODEL)
+tokenizer = AutoTokenizer.from_pretrained(MODEL)
+latex_template = r"""
+    \begin{subfigure}[b]{0.48\textwidth}
+        \resizebox{\textwidth}{!}{
+            \begin{coloredbox}{darkgray}{Review 1}
+            [REVIEW 1]
+\end{coloredbox}}
+    \end{subfigure}
+        \begin{subfigure}[b]{0.48\textwidth}
+        \resizebox{\textwidth}{!}{
+        \begin{coloredbox}{darkgray}{Review 2}
+        [REVIEW 2]
+\end{coloredbox}}
+    \end{subfigure}
+        \begin{subfigure}[b]{0.48\textwidth}
+        \resizebox{\textwidth}{!}{
+        \begin{coloredbox}{darkgray}{Review 3}
+        [REVIEW 3]
+\end{coloredbox}}
+    \end{subfigure}
+    """
+EXAMPLES = [
+    "The paper gives really interesting insights on the topic of transfer learning. It is well presented and the experiment are extensive. I believe the authors missed Jane and al 2021. In addition, I think, there is a mistake in the math.",
+    "The paper gives really interesting insights on the topic of transfer learning. It is well presented and the experiment are extensive. Some parts remain really unclear and I would like to see a more detailed explanation of the proposed method.",
+    "The paper gives really interesting insights on the topic of transfer learning. It is not well presented and lack experiments. Some parts remain really unclear and I would like to see a more detailed explanation of the proposed method.",
+]
+def make_colored_text_to_latex(scored_texts : List[Tuple[str, float]]):
+    """
+    Make a latex string from a list of scored texts.
+    """
+    # cast scores between 0 and 1
+    scores = np.array([score for _, score in scored_texts])
+    scores = (scores - scores.min()) / (scores.max() - scores.min())
+    # make color map in hex
+    cmap = sns.diverging_palette(250, 30, l=50, center="dark", as_cmap=True)
+    hex_colors = [cmap(score)[0:3] for score in scores]
+    # make html color string
+    hex_colors = [",".join([str(round(x, 2)) for x in color]) for color in hex_colors]
+    # make latex string
+    latex_string = ""
+    for (text, score), hex_color in zip(scored_texts, hex_colors):
+        latex_string += "\\textcolor[rgb]{" + str(hex_color) + "}{" + text + "} "
+    return latex_string
+def summarize(text1, text2, text3, iterations, rationality=1.0):
+    # get sentences for each text
+    text1_sentences = nltk.sent_tokenize(text1)
+    text2_sentences = nltk.sent_tokenize(text2)
+    text3_sentences = nltk.sent_tokenize(text3)
+    # remove empty sentences
+    text1_sentences = [sentence for sentence in text1_sentences if sentence != ""]
+    text2_sentences = [sentence for sentence in text2_sentences if sentence != ""]
+    text3_sentences = [sentence for sentence in text3_sentences if sentence != ""]
+    sentences = list(set(text1_sentences + text2_sentences + text3_sentences))
+    rsa_reranker = RSAReranking(
+        model,
+        tokenizer,
+        candidates=sentences,
+        source_texts=[text1, text2, text3],
+        device="cpu",
+        rationality=rationality,
+    )
+    (
+        best_rsa,
+        best_base,
+        speaker_df,
+        listener_df,
+        initial_listener,
+        language_model_proba_df,
+        initial_consensuality_scores,
+        consensuality_scores,
+    ) = rsa_reranker.rerank(t=iterations)
+    # apply exp to the probabilities
+    speaker_df = speaker_df.applymap(lambda x: math.exp(x))
+    text_1_summaries = speaker_df.loc[text1][text1_sentences]
+    text_1_summaries = text_1_summaries / text_1_summaries.sum()
+    text_2_summaries = speaker_df.loc[text2][text2_sentences]
+    text_2_summaries = text_2_summaries / text_2_summaries.sum()
+    text_3_summaries = speaker_df.loc[text3][text3_sentences]
+    text_3_summaries = text_3_summaries / text_3_summaries.sum()
+    # make list of tuples
+    text_1_summaries = [(sentence, text_1_summaries[sentence]) for sentence in text1_sentences]
+    text_2_summaries = [(sentence, text_2_summaries[sentence]) for sentence in text2_sentences]
+    text_3_summaries = [(sentence, text_3_summaries[sentence]) for sentence in text3_sentences]
+    # normalize consensuality scores between -1 and 1
+    consensuality_scores = (consensuality_scores - (consensuality_scores.max() - consensuality_scores.min()) / 2) / (consensuality_scores.max() - consensuality_scores.min()) / 2
+    consensuality_scores_01 = (consensuality_scores - consensuality_scores.min()) / (consensuality_scores.max() - consensuality_scores.min())
+    most_consensual = consensuality_scores.sort_values(ascending=True).head(3).index.tolist()
+    least_consensual = consensuality_scores.sort_values(ascending=False).head(3).index.tolist()
+    most_consensual = [(sentence, consensuality_scores[sentence]) for sentence in most_consensual]
+    least_consensual = [(sentence, consensuality_scores[sentence]) for sentence in least_consensual]
+    text_1_consensuality = consensuality_scores.loc[text1_sentences]
+    text_2_consensuality = consensuality_scores.loc[text2_sentences]
+    text_3_consensuality = consensuality_scores.loc[text3_sentences]
+    # rescale between -1 and 1
+    # text_1_consensuality = (text_1_consensuality - (text_1_consensuality.max() - text_1_consensuality.min()) / 2) / (text_1_consensuality.max() - text_1_consensuality.min()) / 2
+    # text_2_consensuality = (text_2_consensuality - (text_2_consensuality.max() - text_2_consensuality.min()) / 2) / (text_2_consensuality.max() - text_2_consensuality.min()) / 2
+    # text_3_consensuality = (text_3_consensuality - (text_3_consensuality.max() - text_3_consensuality.min()) / 2) / (text_3_consensuality.max() - text_3_consensuality.min()) / 2
+    text_1_consensuality = [(sentence, text_1_consensuality[sentence]) for sentence in text1_sentences]
+    text_2_consensuality = [(sentence, text_2_consensuality[sentence]) for sentence in text2_sentences]
+    text_3_consensuality = [(sentence, text_3_consensuality[sentence]) for sentence in text3_sentences]
+    fig1 = plt.figure(figsize=(20, 10))
+    ax = fig1.add_subplot(111)
+    sns.heatmap(
+        listener_df,
+        ax=ax,
+        cmap="Blues",
+        annot=True,
+        fmt=".2f",
+        cbar=False,
+        annot_kws={"size": 10},
+    )
+    ax.set_title("Listener probabilities")
+    ax.set_xlabel("Candidate sentences")
+    ax.set_ylabel("Source texts")
+    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
+    fig1.tight_layout()
+    fig2 = plt.figure(figsize=(20, 10))
+    ax = fig2.add_subplot(111)
+    sns.heatmap(
+        speaker_df,
+        ax=ax,
+        cmap="Blues",
+        annot=True,
+        fmt=".2f",
+        cbar=False,
+        annot_kws={"size": 10},
+    )
+    ax.set_title("Speaker probabilities")
+    ax.set_xlabel("Candidate sentences")
+    ax.set_ylabel("Source texts")
+    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
+    fig2.tight_layout()
+    latex_text_1 = make_colored_text_to_latex(text_1_summaries)
+    latex_text_2 = make_colored_text_to_latex(text_2_summaries)
+    latex_text_3 = make_colored_text_to_latex(text_3_summaries)
+    text_1_consensuality_ = consensuality_scores_01.loc[text1_sentences]
+    text_2_consensuality_ = consensuality_scores_01.loc[text2_sentences]
+    text_3_consensuality_ = consensuality_scores_01.loc[text3_sentences]
+    text_1_consensuality_ = [(sentence, text_1_consensuality_[sentence]) for sentence in text1_sentences]
+    text_2_consensuality_ = [(sentence, text_2_consensuality_[sentence]) for sentence in text2_sentences]
+    text_3_consensuality_ = [(sentence, text_3_consensuality_[sentence]) for sentence in text3_sentences]
+    latex_text_1_consensuality = make_colored_text_to_latex(text_1_consensuality_)
+    latex_text_2_consensuality = make_colored_text_to_latex(text_2_consensuality_)
+    latex_text_3_consensuality = make_colored_text_to_latex(text_3_consensuality_)
+    latex = latex_template.replace("[REVIEW 1]", latex_text_1)
+    latex = latex.replace("[REVIEW 2]", latex_text_2)
+    latex = latex.replace("[REVIEW 3]", latex_text_3)
+    return text_1_summaries, text_2_summaries, text_3_summaries, text_1_consensuality, text_2_consensuality, text_3_consensuality, most_consensual, least_consensual, fig1, fig2, latex
+# make gradiot highlightedText component
+iface = gr.Interface(
+    fn=summarize,
+    inputs=[
+        gr.Textbox(lines=10, value=EXAMPLES[0]),
+        gr.Textbox(lines=10, value=EXAMPLES[1]),
+        gr.Textbox(lines=10, value=EXAMPLES[2]),
+        gr.Number(value=1, label="Iterations"),
+        gr.Slider(minimum=0.0, maximum=10.0, step=0.1, value=1.0, label="Rationality"),
+    ],
+    outputs=[
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Uniqueness score for each sentence in text 1",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Uniqueness score for each sentence in text 2",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Uniqueness score for each sentence in text 3",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Consensuality score for each sentence in text 1",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Consensuality score for each sentence in text 2",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Consensuality score for each sentence in text 3",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Most consensual sentences",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Least consensual sentences",
+        ),
+        gr.Plot(
+            label="Listener probabilities",
+        ),
+        gr.Plot(
+            label="Speaker probabilities",
+        ),
+        gr.Textbox(lines=10, label="Latex Consensuality scores"),
+    ],
+    title="RSA Summarizer",
+    description="Summarize 3 texts using RSA",
+)
+iface.launch(share=True)

glimpse-ui/glimpse/examples/reviews/reviews_latex_generation.py ADDED Viewed

	@@ -0,0 +1,272 @@

+import math
+from typing import List, Tuple
+import nltk
+import numpy as np
+import seaborn as sns
+from rsasumm.rsa_reranker import RSAReranking
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import seaborn as sns
+import pandas as pd
+import matplotlib.pyplot as plt
+MODEL = "facebook/bart-large-cnn"
+model = AutoModelForSeq2SeqLM.from_pretrained(MODEL)
+tokenizer = AutoTokenizer.from_pretrained(MODEL)
+latex_template = r"""
+    \begin{subfigure}[b]{0.48\textwidth}
+        \resizebox{\textwidth}{!}{
+            \begin{coloredbox}{darkgray}{Review 1}
+            [REVIEW 1]
+\end{coloredbox}}
+    \end{subfigure}
+        \begin{subfigure}[b]{0.48\textwidth}
+        \resizebox{\textwidth}{!}{
+        \begin{coloredbox}{darkgray}{Review 2}
+        [REVIEW 2]
+\end{coloredbox}}
+    \end{subfigure}
+        \begin{subfigure}[b]{0.48\textwidth}
+        \resizebox{\textwidth}{!}{
+        \begin{coloredbox}{darkgray}{Review 3}
+        [REVIEW 3]
+\end{coloredbox}}
+    \end{subfigure}
+    """
+EXAMPLES = [
+    "The paper gives really interesting insights on the topic of transfer learning. It is well presented and the experiment are extensive. I believe the authors missed Jane and al 2021. In addition, I think, there is a mistake in the math.",
+    "The paper gives really interesting insights on the topic of transfer learning. It is well presented and the experiment are extensive. Some parts remain really unclear and I would like to see a more detailed explanation of the proposed method.",
+    "The paper gives really interesting insights on the topic of transfer learning. It is not well presented and lack experiments. Some parts remain really unclear and I would like to see a more detailed explanation of the proposed method.",
+]
+def make_colored_text_to_latex(scored_texts : List[Tuple[str, float]]):
+    """
+    Make a latex string from a list of scored texts.
+    """
+    # cast scores between 0 and 1
+    scores = np.array([score for _, score in scored_texts])
+    scores = (scores - scores.min()) / (scores.max() - scores.min())
+    # make color map in hex
+    cmap = sns.diverging_palette(250, 30, l=50, center="dark", as_cmap=True)
+    hex_colors = [cmap(score)[0:3] for score in scores]
+    # make html color string
+    hex_colors = [",".join([str(round(x, 2)) for x in color]) for color in hex_colors]
+    # make latex string
+    latex_string = ""
+    for (text, score), hex_color in zip(scored_texts, hex_colors):
+        #latex_string += "\\textcolor[rgb]{" + str(hex_color) + "}{" + text + "} "
+        latex_string += "\\hlc{" + str(hex_color)[1:-1] + "}{" + text + "} "
+    return latex_string
+def summarize(text1, text2, text3, iterations, rationality=1.0):
+    # get sentences for each text
+    text1_sentences = nltk.sent_tokenize(text1)
+    text2_sentences = nltk.sent_tokenize(text2)
+    text3_sentences = nltk.sent_tokenize(text3)
+    # remove empty sentences
+    text1_sentences = [sentence for sentence in text1_sentences if sentence != ""]
+    text2_sentences = [sentence for sentence in text2_sentences if sentence != ""]
+    text3_sentences = [sentence for sentence in text3_sentences if sentence != ""]
+    sentences = list(set(text1_sentences + text2_sentences + text3_sentences))
+    rsa_reranker = RSAReranking(
+        model,
+        tokenizer,
+        candidates=sentences,
+        source_texts=[text1, text2, text3],
+        device="cpu",
+        rationality=rationality,
+    )
+    (
+        best_rsa,
+        best_base,
+        speaker_df,
+        listener_df,
+        initial_listener,
+        language_model_proba_df,
+        initial_consensuality_scores,
+        consensuality_scores,
+    ) = rsa_reranker.rerank(t=iterations)
+    # apply exp to the probabilities
+    speaker_df = speaker_df.applymap(lambda x: math.exp(x))
+    text_1_summaries = speaker_df.loc[text1][text1_sentences]
+    text_1_summaries = text_1_summaries / text_1_summaries.sum()
+    text_2_summaries = speaker_df.loc[text2][text2_sentences]
+    text_2_summaries = text_2_summaries / text_2_summaries.sum()
+    text_3_summaries = speaker_df.loc[text3][text3_sentences]
+    text_3_summaries = text_3_summaries / text_3_summaries.sum()
+    # make list of tuples
+    text_1_summaries = [(sentence, text_1_summaries[sentence]) for sentence in text1_sentences]
+    text_2_summaries = [(sentence, text_2_summaries[sentence]) for sentence in text2_sentences]
+    text_3_summaries = [(sentence, text_3_summaries[sentence]) for sentence in text3_sentences]
+    # normalize consensuality scores between -1 and 1
+    consensuality_scores = (consensuality_scores - (consensuality_scores.max() - consensuality_scores.min()) / 2) / (consensuality_scores.max() - consensuality_scores.min()) / 2
+    consensuality_scores_01 = (consensuality_scores - consensuality_scores.min()) / (consensuality_scores.max() - consensuality_scores.min())
+    most_consensual = consensuality_scores.sort_values(ascending=True).head(3).index.tolist()
+    least_consensual = consensuality_scores.sort_values(ascending=False).head(3).index.tolist()
+    most_consensual = [(sentence, consensuality_scores[sentence]) for sentence in most_consensual]
+    least_consensual = [(sentence, consensuality_scores[sentence]) for sentence in least_consensual]
+    text_1_consensuality = consensuality_scores.loc[text1_sentences]
+    text_2_consensuality = consensuality_scores.loc[text2_sentences]
+    text_3_consensuality = consensuality_scores.loc[text3_sentences]
+    # rescale between -1 and 1
+    # text_1_consensuality = (text_1_consensuality - (text_1_consensuality.max() - text_1_consensuality.min()) / 2) / (text_1_consensuality.max() - text_1_consensuality.min()) / 2
+    # text_2_consensuality = (text_2_consensuality - (text_2_consensuality.max() - text_2_consensuality.min()) / 2) / (text_2_consensuality.max() - text_2_consensuality.min()) / 2
+    # text_3_consensuality = (text_3_consensuality - (text_3_consensuality.max() - text_3_consensuality.min()) / 2) / (text_3_consensuality.max() - text_3_consensuality.min()) / 2
+    text_1_consensuality = [(sentence, text_1_consensuality[sentence]) for sentence in text1_sentences]
+    text_2_consensuality = [(sentence, text_2_consensuality[sentence]) for sentence in text2_sentences]
+    text_3_consensuality = [(sentence, text_3_consensuality[sentence]) for sentence in text3_sentences]
+    fig1 = plt.figure(figsize=(20, 10))
+    ax = fig1.add_subplot(111)
+    sns.heatmap(
+        listener_df,
+        ax=ax,
+        cmap="Blues",
+        annot=True,
+        fmt=".2f",
+        cbar=False,
+        annot_kws={"size": 10},
+    )
+    ax.set_title("Listener probabilities")
+    ax.set_xlabel("Candidate sentences")
+    ax.set_ylabel("Source texts")
+    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
+    fig1.tight_layout()
+    fig2 = plt.figure(figsize=(20, 10))
+    ax = fig2.add_subplot(111)
+    sns.heatmap(
+        speaker_df,
+        ax=ax,
+        cmap="Blues",
+        annot=True,
+        fmt=".2f",
+        cbar=False,
+        annot_kws={"size": 10},
+    )
+    ax.set_title("Speaker probabilities")
+    ax.set_xlabel("Candidate sentences")
+    ax.set_ylabel("Source texts")
+    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
+    fig2.tight_layout()
+    latex_text_1 = make_colored_text_to_latex(text_1_summaries)
+    latex_text_2 = make_colored_text_to_latex(text_2_summaries)
+    latex_text_3 = make_colored_text_to_latex(text_3_summaries)
+    text_1_consensuality_ = consensuality_scores_01.loc[text1_sentences]
+    text_2_consensuality_ = consensuality_scores_01.loc[text2_sentences]
+    text_3_consensuality_ = consensuality_scores_01.loc[text3_sentences]
+    text_1_consensuality_ = [(sentence, text_1_consensuality_[sentence]) for sentence in text1_sentences]
+    text_2_consensuality_ = [(sentence, text_2_consensuality_[sentence]) for sentence in text2_sentences]
+    text_3_consensuality_ = [(sentence, text_3_consensuality_[sentence]) for sentence in text3_sentences]
+    latex_text_1_consensuality = make_colored_text_to_latex(text_1_consensuality_)
+    latex_text_2_consensuality = make_colored_text_to_latex(text_2_consensuality_)
+    latex_text_3_consensuality = make_colored_text_to_latex(text_3_consensuality_)
+    latex = latex_template.replace("[REVIEW 1]", latex_text_1)
+    latex = latex.replace("[REVIEW 2]", latex_text_2)
+    latex = latex.replace("[REVIEW 3]", latex_text_3)
+    return text_1_summaries, text_2_summaries, text_3_summaries, text_1_consensuality, text_2_consensuality, text_3_consensuality, most_consensual, least_consensual, fig1, fig2, latex
+# make gradiot highlightedText component
+iface = gr.Interface(
+    fn=summarize,
+    inputs=[
+        gr.Textbox(lines=10, value=EXAMPLES[0]),
+        gr.Textbox(lines=10, value=EXAMPLES[1]),
+        gr.Textbox(lines=10, value=EXAMPLES[2]),
+        gr.Number(value=1, label="Iterations"),
+        gr.Slider(minimum=0.0, maximum=10.0, step=0.1, value=1.0, label="Rationality"),
+    ],
+    outputs=[
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Uniqueness score for each sentence in text 1",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Uniqueness score for each sentence in text 2",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Uniqueness score for each sentence in text 3",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Consensuality score for each sentence in text 1",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Consensuality score for each sentence in text 2",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Consensuality score for each sentence in text 3",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Most consensual sentences",
+        ),
+        gr.Highlightedtext(
+            show_legend=True,
+            label="Least consensual sentences",
+        ),
+        gr.Plot(
+            label="Listener probabilities",
+        ),
+        gr.Plot(
+            label="Speaker probabilities",
+        ),
+        gr.Textbox(lines=10, label="Latex Consensuality scores"),
+    ],
+    title="RSA Summarizer",
+    description="Summarize 3 texts using RSA",
+)
+iface.launch()

glimpse-ui/glimpse/glimpse/baselines/generate_llm_summaries.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import pandas as pd
+from pathlib import Path
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import re
+import argparse
+from tqdm import tqdm
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dataset", default="")
+    parser.add_argument("--batch_size", type=int, default=4)
+    parser.add_argument("--device", type=str, default="cuda")
+    parser.add_argument("--output", type=Path, default="")
+    args = parser.parse_args()
+    return args
+def prepare_dataset(dataset_name, dataset_path="rsasumm/data/processed/"):
+    dataset_path = Path(dataset_path)
+    if dataset_name == "amazon":
+        dataset = pd.read_csv(dataset_path / "amazon_test.csv")
+    elif dataset_name == "space":
+        dataset = pd.read_csv(dataset_path / "space.csv")
+    elif dataset_name == "yelp":
+        dataset = pd.read_csv(dataset_path / "yelp_test.csv")
+    elif dataset_name == "reviews":
+        dataset = pd.read_csv(dataset_path / "test_metareviews.csv")
+    else:
+        raise ValueError(f"Unknown dataset {dataset_name}")
+    return dataset
+# group text by sample id and concatenate text
+def group_text_by_id(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Group the text by the sample id and concatenate the text.
+    :param df: The dataframe
+    :return: The dataframe with the text grouped by the sample id
+    """
+    texts = df.groupby("id")["text"].apply(lambda x: " ".join(x))
+    # retrieve first gold by id
+    gold = df.groupby("id")["gold"].first()
+    # create new dataframe
+    df = pd.DataFrame({"text": texts, "gold": gold}, index=texts.index)
+    return df
+def generate_summaries(model, tokenizer, df, batch_size, device):
+    # df columns = id, text, gold
+    # make instruction:
+    def make_instruction(text):
+        return f"[INST]\n{text}\n Summarize the previous text:[/INST]\n\n"
+    df["instruction"] = df["text"].apply(make_instruction)
+    # make data loader
+    dataset = df[["instruction"]].values.tolist()
+    model = model.to(device).eval()
+    summaries = []
+    with torch.no_grad():
+        for batch in tqdm(dataset):
+            print(batch)
+            inputs = tokenizer.encode(batch, padding=True, truncation=True, return_tensors="pt")
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            outputs = model.generate(**inputs, temperature=0.7, top_p=0.7, top_k=50, max_new_tokens=500)
+            summaries.extend(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+    # remove the instruction from the summaries
+    df["summary"] = [re.sub(r"\[INST\]\n.*\[/INST\]\n\n", "", summary) for summary in summaries]
+    return df
+def main():
+    args = parse_args()
+    model = "togethercomputer/Llama-2-7B-32K-Instruct"
+    tokenizer = AutoTokenizer.from_pretrained(model)
+    model = AutoModelForCausalLM.from_pretrained(model, trust_remote_code=True, torch_dtype=torch.float16)
+    df = prepare_dataset(args.dataset)
+    df = group_text_by_id(df)
+    df = generate_summaries(model, tokenizer, df, args.batch_size, args.device)
+    df['metadata/Method'] = "LLM"
+    df['metadata/Model'] = model
+    name = f"{args.dataset}-_-{model.replace('/', '-')}-_-llm_summaries.csv"
+    path = Path(args.output) / name
+    Path(args.output).mkdir(exist_ok=True, parents=True)
+    df.to_csv(path, index=True)
+main()

glimpse-ui/glimpse/glimpse/baselines/sumy_baselines.py ADDED Viewed

	@@ -0,0 +1,129 @@

+from sumy.parsers.plaintext import PlaintextParser
+from sumy.parsers.html import HtmlParser
+from sumy.nlp.tokenizers import Tokenizer
+from sumy.nlp.stemmers import Stemmer
+from sumy.utils import get_stop_words
+import argparse
+import pandas as pd
+from pathlib import Path
+import nltk
+def summarize(method, language, sentence_count, input_type, input_):
+    if method == 'LSA':
+        from sumy.summarizers.lsa import LsaSummarizer as Summarizer
+    if method == 'text-rank':
+        from sumy.summarizers.text_rank import TextRankSummarizer as Summarizer
+    if method == 'lex-rank':
+        from sumy.summarizers.lex_rank import LexRankSummarizer as Summarizer
+    if method == 'edmundson':
+        from sumy.summarizers.edmundson import EdmundsonSummarizer as Summarizer
+    if method == 'luhn':
+        from sumy.summarizers.luhn import LuhnSummarizer as Summarizer
+    if method == 'kl-sum':
+        from sumy.summarizers.kl import KLSummarizer as Summarizer
+    if method == 'random':
+        from sumy.summarizers.random import RandomSummarizer as Summarizer
+    if method == 'reduction':
+        from sumy.summarizers.reduction import ReductionSummarizer as Summarizer
+    if input_type == "URL":
+        parser = HtmlParser.from_url(input_, Tokenizer(language))
+    if input_type == "text":
+        parser = PlaintextParser.from_string(input_, Tokenizer(language))
+    stemmer = Stemmer(language)
+    summarizer = Summarizer(stemmer)
+    stop_words = get_stop_words(language)
+    if method == 'edmundson':
+        summarizer.null_words = stop_words
+        summarizer.bonus_words = parser.significant_words
+        summarizer.stigma_words = parser.stigma_words
+    else:
+        summarizer.stop_words = stop_words
+    summary_sentences = summarizer(parser.document, sentence_count)
+    summary = ' '.join([str(sentence) for sentence in summary_sentences])
+    return summary
+# methods = ['LSA', 'text-rank', 'lex-rank', 'edmundson', 'luhn', 'kl-sum', 'random', 'reduction']
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dataset", default="")
+    # method
+    parser.add_argument("--method", type=str, choices=['LSA', 'text-rank', 'lex-rank', 'edmundson', 'luhn', 'kl-sum', 'random', 'reduction'], default="LSA")
+    parser.add_argument("--batch_size", type=int, default=4)
+    parser.add_argument("--device", type=str, default="cuda")
+    parser.add_argument("--output", type=Path, default="")
+    args = parser.parse_args()
+    return args
+def prepare_dataset(dataset_name, dataset_path="rsasumm/data/processed/"):
+    dataset_path = Path(dataset_path)
+    if dataset_name == "amazon":
+        dataset = pd.read_csv(dataset_path / "amazon_test.csv")
+    elif dataset_name == "space":
+        dataset = pd.read_csv(dataset_path / "space.csv")
+    elif dataset_name == "yelp":
+        dataset = pd.read_csv(dataset_path / "yelp_test.csv")
+    elif dataset_name == "reviews":
+        dataset = pd.read_csv(dataset_path / "test_metareviews.csv")
+    else:
+        raise ValueError(f"Unknown dataset {dataset_name}")
+    return dataset
+# group text by sample id and concatenate text
+def group_text_by_id(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Group the text by the sample id and concatenate the text.
+    :param df: The dataframe
+    :return: The dataframe with the text grouped by the sample id
+    """
+    texts = df.groupby("id")["text"].apply(lambda x: " ".join(x))
+    # retrieve first gold by id
+    gold = df.groupby("id")["gold"].first()
+    # create new dataframe
+    df = pd.DataFrame({"text": texts, "gold": gold}, index=texts.index)
+    return df
+def main():
+    args = parse_args()
+    for N in [1]:
+        dataset = prepare_dataset(args.dataset)
+        # dataset = group_text_by_id(dataset)
+        summaries = []
+        for text in dataset.text:
+            summary = summarize(args.method, "english", N, "text", text)
+            summaries.append(summary)
+        dataset['summary'] = summaries
+        dataset['metadata/dataset'] = args.dataset
+        dataset["metadata/method"] = args.method
+        dataset["metadata/sentence_count"] = N
+        name = f"{args.dataset}-_-{args.method}-_-sumy_{N}.csv"
+        path = Path(args.output) / name
+        Path(args.output).mkdir(exist_ok=True, parents=True)
+        dataset.to_csv(path, index=True)
+main()

glimpse-ui/glimpse/glimpse/data_loading/Glimpse_tokenizer.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import re
+import spacy
+import importlib
+import nltk
+############################################
+### CHANGE THIS LINE TO CHOOSE TOKENIZER ###
+ORIGINAL_TOKENIZER = False
+############################################
+try:
+    importlib.util.find_spec("en_core_web_sm")
+    nlp = spacy.load("en_core_web_sm")
+except:
+    import spacy.cli
+    spacy.cli.download("en_core_web_sm")
+    nlp = spacy.load("en_core_web_sm")
+def glimpse_tokenizer(text: str) -> list:
+    # If the original tokenizer is set to True, use the original tokenizer
+    if ORIGINAL_TOKENIZER:
+        return original_tokenizer(text)
+    # else, use the new tokenizer
+    else:
+        # More general-purpose tokenizer that handles both natural paragraph text and structured reviews.
+        # Normalize long dashes
+        text = re.sub(r"[-]{2,}", "\n", text)
+        # Keep line breaks meaningful (but fallback to sentence splitting)
+        chunks = re.split(r"\n+", text)
+        sentences = []
+        for chunk in chunks:
+            chunk = chunk.strip()
+            if not chunk:
+                continue
+            # Section headers and bullets become single “sentences”
+            if re.match(r"^(Summary|Strengths?|Weaknesses?|Minor)\s*:?", chunk, re.IGNORECASE):
+                sentences.append(chunk)
+                continue
+            if re.match(r"^(\d+(\.\d+)*\.|-)\s+.+", chunk):
+                sentences.append(chunk)
+                continue
+            # Otherwise, apply SpaCy sentence splitting
+            doc = nlp(chunk)
+            sentences.extend([sent.text.strip() for sent in doc.sents if sent.text.strip()])
+        return sentences
+# reuse the original glimpse tokenizer
+# def glimpse_tokenizer(text: str) -> list:
+#     return tokenize_sentences(text)
+# Default glimpse tokenizer from the original code
+def original_tokenizer(text: str) -> list:
+    """
+    Tokenizes the input text into sentences.
+    @param text: The input text to be tokenized
+    @return: A list of tokenized sentences
+    """
+    text = text.replace('-----', '\n')
+    sentences = nltk.sent_tokenize(text)
+    # remove empty sentences
+    sentences = [sentence for sentence in sentences if sentence != ""]
+    return sentences

glimpse-ui/glimpse/glimpse/data_loading/data_processing.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import pandas as pd
+import os
+data_glimpse = "data/processed/"
+if not os.path.exists(data_glimpse):
+    os.makedirs(data_glimpse)
+for year in range (2017, 2021 + 1):
+    dataset = pd.read_csv(f"data/all_reviews_{year}.csv")
+    sub_dataset = dataset[['id','review', 'metareview']]
+    sub_dataset.rename(columns={"review": "text", "metareview": "gold"}, inplace=True)
+    sub_dataset.to_csv(f"{data_glimpse}all_reviews_{year}.csv", index=False)

glimpse-ui/glimpse/glimpse/data_loading/generate_abstractive_candidates.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import argparse
+from pathlib import Path
+import pandas as pd
+from torch.utils.data import DataLoader
+from datasets import Dataset
+from tqdm import tqdm
+import datetime
+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+GENERATION_CONFIGS = {
+    "top_p_sampling": {
+        "max_new_tokens": 200,
+        "do_sample": True,
+        "top_p": 0.95,
+        "temperature": 1.0,
+        "num_return_sequences": 8,
+        "num_beams" : 1,
+        #"num_beam_groups" : 4,
+    },
+    **{
+        f"sampling_topp_{str(topp).replace('.', '')}": {
+            "max_new_tokens": 200,
+            "do_sample": True,
+            "num_return_sequences": 8,
+            "top_p": 0.95,
+        }
+        for topp in [0.5, 0.8, 0.95, 0.99]
+    },
+}
+# add base.csv config to all configs
+for key, value in GENERATION_CONFIGS.items():
+    GENERATION_CONFIGS[key] = {
+        # "max_length": 2048,
+        "min_length": 0,
+        "early_stopping": True,
+        **value,
+    }
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_name", type=str, default="facebook/bart-large-cnn")
+    parser.add_argument("--dataset_path", type=Path, default="data/processed/all_reviews_2017.csv")
+    parser.add_argument("--decoding_config", type=str, default="top_p_sampling", choices=GENERATION_CONFIGS.keys())
+    parser.add_argument("--batch_size", type=int, default=16)
+    parser.add_argument("--device", type=str, default="cuda")
+    parser.add_argument("--trimming", action=argparse.BooleanOptionalAction, default=True)
+    parser.add_argument("--output_dir", type=str, default="data/candidates")
+    # if ran in a scripted way, the output path will be printed
+    parser.add_argument("--scripted-run", action=argparse.BooleanOptionalAction, default=False)
+    # limit the number of samples to generate
+    parser.add_argument("--limit", type=int, default=None)
+    args = parser.parse_args()
+    return args
+def prepare_dataset(dataset_path) -> Dataset:
+    try:
+        dataset = pd.read_csv(dataset_path)
+    except:
+        raise ValueError(f"Unknown dataset {dataset_path}")
+    # make a dataset from the dataframe
+    dataset = Dataset.from_pandas(dataset)
+    return dataset
+def evaluate_summarizer(
+    model, tokenizer, dataset: Dataset, decoding_config, batch_size: int,
+    device: str, trimming: bool
+) -> Dataset:
+    """
+    @param model: The model used to generate the summaries
+    @param tokenizer: The tokenizer used to tokenize the text and the summary
+    @param dataset: A dataset with the text
+    @param decoding_config: Dictionary with the decoding config
+    @param batch_size: The batch size used to generate the summaries
+    @return: The same dataset with the summaries added
+    """
+    # create a dataset with the text and the summary
+    # create a dataloader
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=trimming)
+    # generate summaries
+    summaries = []
+    print("Generating summaries...")
+    for batch in tqdm(dataloader):
+        text = batch["text"]
+        inputs = tokenizer(
+            text,
+            max_length=1024,
+            padding="max_length",
+            truncation=True,
+            return_tensors="pt",
+        )
+        # move inputs to device
+        inputs = {key: value.to(device) for key, value in inputs.items()}
+        # generate summaries
+        outputs = model.generate(
+            **inputs,
+            **decoding_config,
+        )
+        total_size = outputs.numel()  # Total number of elements in the tensor
+        target_size = batch_size * outputs.shape[-1]  # Target size of the last dimension
+        pad_size = (target_size - (total_size % target_size)) % target_size  # Calculate the required padding size to make the total number of elements divisible by the target size
+        # Pad the tensor with zeros to make the total number of elements divisible by the target size
+        if not trimming and pad_size != 0: outputs = torch.nn.functional.pad(outputs, (0, 0, 0, pad_size // outputs.shape[-1]))
+        # output : (batch_size * num_return_sequences, max_length)
+        try:
+            outputs = outputs.reshape(batch_size, -1, outputs.shape[-1])
+        except Exception as e:
+            print(f"Error reshaping outputs: {e}")
+            raise ValueError(f"Cannot reshape tensor of size {outputs.numel()} into shape "
+                            f"({batch_size}, -1, {outputs.shape[-1]}).")
+        # decode summaries
+        for b in range(batch_size):
+            summaries.append(
+                [
+                    tokenizer.decode(
+                        outputs[b, i],
+                        skip_special_tokens=True,
+                    )
+                    for i in range(outputs.shape[1])
+                ]
+            )
+    # if trimming the last batch, remove them from the dataset
+    if trimming: dataset = dataset.select(range(len(summaries)))
+    # add summaries to the huggingface dataset
+    dataset = dataset.map(lambda example: {"summary": summaries.pop(0)})
+    return dataset
+def sanitize_model_name(model_name: str) -> str:
+    """
+    Sanitize the model name to be used as a folder name.
+    @param model_name: The model name
+    @return: The sanitized model name
+    """
+    return model_name.replace("/", "_")
+def main():
+    args = parse_args()
+    # load the model
+    model = AutoModelForSeq2SeqLM.from_pretrained(
+        args.model_name
+    )
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name)
+    tokenizer.pad_token = tokenizer.unk_token
+    tokenizer.pad_token_id = tokenizer.unk_token_id
+    # move model to device
+    model = model.to(args.device)
+    # load the dataset
+    print("Loading dataset...")
+    dataset = prepare_dataset(args.dataset_path)
+    # limit the number of samples
+    if args.limit is not None:
+        _lim = min(args.limit, len(dataset))
+        dataset = dataset.select(range(_lim))
+    # generate summaries
+    dataset = evaluate_summarizer(
+        model,
+        tokenizer,
+        dataset,
+        GENERATION_CONFIGS[args.decoding_config],
+        args.batch_size,
+        args.device,
+        args.trimming,
+    )
+    df_dataset = dataset.to_pandas()
+    df_dataset = df_dataset.explode('summary')
+    df_dataset = df_dataset.reset_index()
+    # add an idx with  the id of the summary for each example
+    df_dataset['id_candidate'] = df_dataset.groupby(['index']).cumcount()
+    # save the dataset
+    # add unique date in name
+    now = datetime.datetime.now()
+    date = now.strftime("%Y-%m-%d-%H-%M-%S")
+    model_name = sanitize_model_name(args.model_name)
+    padding_status = "trimmed" if args.trimming else "padded"
+    output_path = (
+        Path(args.output_dir)
+        / f"{model_name}-_-{args.dataset_path.stem}-_-{args.decoding_config}-_-{padding_status}-_-{date}.csv"
+    )
+    # create output dir if it doesn't exist
+    if not output_path.parent.exists():
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+    df_dataset.to_csv(output_path, index=False, encoding="utf-8")
+    # in case of scripted run, print the output path
+    if args.scripted_run: print(output_path)
+if __name__ == "__main__":
+    main()

glimpse-ui/glimpse/glimpse/data_loading/generate_extractive_candidates.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import argparse
+import datetime
+from pathlib import Path
+import pandas as pd
+from datasets import Dataset
+from tqdm import tqdm
+import nltk
+import sys, os.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
+from glimpse.data_loading.Glimpse_tokenizer import glimpse_tokenizer
+# def tokenize_sentences(text: str) -> list:
+#     """
+#     Tokenizes the input text into sentences.
+#     @param text: The input text to be tokenized
+#     @return: A list of tokenized sentences
+#     """
+#     text = text.replace('-----', '\n')
+#     sentences = nltk.sent_tokenize(text)
+#     # remove empty sentences
+#     sentences = [sentence for sentence in sentences if sentence != ""]
+#     return sentences
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dataset_path", type=Path, default="glimpse/data/processed/all_reviews_2017.csv")
+    parser.add_argument("--output_dir", type=str, default="glimpse/data/candidates")
+    # if ran in a scripted way, the output path will be printed
+    parser.add_argument("--scripted-run", action=argparse.BooleanOptionalAction, default=False)
+    # limit the number of samples to generate
+    parser.add_argument("--limit", type=int, default=None)
+    args = parser.parse_args()
+    return args
+def prepare_dataset(dataset_path) -> Dataset:
+    try:
+        dataset = pd.read_csv(dataset_path)
+    except:
+        raise ValueError(f"Unknown dataset {dataset_path}")
+    # make a dataset from the dataframe
+    dataset = Dataset.from_pandas(dataset)
+    return dataset
+def evaluate_summarizer(dataset: Dataset) -> Dataset:
+    """
+    @param dataset: A dataset with the text
+    @return: The same dataset with the summaries added
+    """
+    # create a dataset with the text and the summary
+    # create a dataloader
+    # generate summaries
+    summaries = []
+    print("Generating summaries...")
+    # (tqdm library for progress bar)
+    for sample in tqdm(dataset):
+        text = sample["text"]
+        sentences = glimpse_tokenizer(text)
+        summaries.append(sentences)
+    # add summaries to the huggingface dataset
+    dataset = dataset.map(lambda example: {"summary": summaries.pop(0)})
+    return dataset
+def main():
+    args = parse_args()
+    # load the dataset
+    print("Loading dataset...")
+    dataset = prepare_dataset(args.dataset_path)
+    # limit the number of samples
+    if args.limit is not None:
+        _lim = min(args.limit, len(dataset))
+        dataset = dataset.select(range(_lim))
+    # generate summaries
+    dataset = evaluate_summarizer(
+        dataset,
+    )
+    df_dataset = dataset.to_pandas()
+    df_dataset = df_dataset.explode("summary")
+    df_dataset = df_dataset.reset_index()
+    # add an idx with  the id of the summary for each example
+    df_dataset["id_candidate"] = df_dataset.groupby(["index"]).cumcount()
+    # save the dataset
+    # add unique date in name
+    now = datetime.datetime.now()
+    date = now.strftime("%Y-%m-%d-%H-%M-%S")
+    output_path = (
+        Path(args.output_dir)
+        / f"extractive_sentences-_-{args.dataset_path.stem}-_-none-_-{date}.csv"
+    )
+    # create output dir if it doesn't exist
+    if not output_path.parent.exists():
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+    df_dataset.to_csv(output_path, index=False, encoding="utf-8")
+    # in case of scripted run, print the output path
+    if args.scripted_run: print(output_path)
+if __name__ == "__main__":
+    main()

glimpse-ui/glimpse/glimpse/evaluate/Evaluate informativeness.ipynb ADDED Viewed

	@@ -0,0 +1,258 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "38be7bab-8a42-49dd-8976-2755ee84edbe",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from pathlib import Path\n",
+    "import pickle as pk\n",
+    "import nltk\n",
+    "import seaborn as sns\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "\n",
+    "\n",
+    "export_summaries_path = Path('output/summaries/methods_per_text')\n",
+    "export_summaries_path.mkdir(parents=True, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1c24ddc1-978e-4a24-8fdb-e48b2848edd0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "dfs = []\n",
+    "for file in export_summaries_path.glob('*.csv'):\n",
+    "    df = pd.read_csv(file)\n",
+    "    generation_method, dataset = file.stem.split('-_-')[:2]\n",
+    "    \n",
+    "    df['metadata/Generation'] = generation_method\n",
+    "    df['metadata/Dataset'] = dataset\n",
+    "    \n",
+    "    dfs.append(df)\n",
+    "    \n",
+    "df = pd.concat(dfs)\n",
+    "\n",
+    "df = df.drop([c for c in df.columns if \"Unnamed:\" in c], axis=1)\n",
+    "\n",
+    "del dfs\n",
+    "\n",
+    "def replace_abstractive(x):\n",
+    "    if \"abstractive\" in x:\n",
+    "        return \"extractive_sentences\"\n",
+    "    else:\n",
+    "        return x\n",
+    "\n",
+    "df['metadata/Generation'] = df['metadata/Generation'].apply(replace_abstractive)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "582af03f-ac60-4644-88d7-28caf84bb552",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "df = df[~(df['Method'].str.contains('Lead')).fillna(False)]\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "099fda3b-137c-4e3b-bb70-933ad183d378",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7d9babd3-a409-409c-848e-db6a3f846d6f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "ddf = df.copy()\n",
+    "ddf['proba_of_success'] = ddf['proba_of_success'].apply(np.exp)\n",
+    "\n",
+    "discriminativity = ddf.groupby(['metadata/Generation', 'metadata/reranking_model', 'Method'])[['proba_of_success', 'LM Perplexity']].agg(['mean']).droplevel(1, axis=1).sort_values('proba_of_success', ascending=False).reset_index()\n",
+    "discriminativity"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d1bffd58-0eed-4007-bcca-cd0a6dfdcd1d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "ddf = ddf.sort_values('proba_of_success', ascending=False)\n",
+    "sns.catplot(data=ddf, y='proba_of_success', x='Method', hue=\"metadata/Generation\", kind='bar', col='metadata/reranking_model')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6684f99b-68d8-4a4f-a1f7-8943f9755bb7",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "fig, ax = plt.subplots(1, 1)\n",
+    "\n",
+    "paretto = get_pareto_points(discriminativity[['proba_of_success', 'LM Perplexity']].values)\n",
+    "\n",
+    "ax.plot(paretto[:, 1], paretto[:, 0], c='purple', linewidth=5, linestyle=\"--\", label=\"Pareto front\")\n",
+    "sns.scatterplot(data=discriminativity, y='proba_of_success', x='LM Perplexity', hue=\"Method\", s=200, alpha=0.8, style='metadata/reranking_model')\n",
+    "plt.xlim(-70, 0)\n",
+    "\n",
+    "        \n",
+    "\n",
+    "def get_pareto_points(data):\n",
+    "    # data : [N, 2]\n",
+    "    \n",
+    "    optima = []\n",
+    "    for p in data:\n",
+    "        x, y = p\n",
+    "        if len([p2 for p2 in data if p2[0] > p[0] and p2[1] > p[1]]) == 0:\n",
+    "            optima.append(p)\n",
+    "            \n",
+    "    return np.array(optima)\n",
+    "\n",
+    "\n",
+    "\n",
+    "            \n",
+    "\n",
+    "    \n",
+    "    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6f6d6a10-e13e-4d44-ace3-dae5de0f362c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6f2b4cc9-8584-463e-b130-6c48c85e2665",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d49c5df4-23a4-473c-81a6-b8928ffdf8af",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "ddf = df.copy().drop('level_0', axis=1)\n",
+    "ddf['proba_of_success'] = -ddf['proba_of_success']\n",
+    "ddf['LM Perplexity'] = -ddf['LM Perplexity']\n",
+    "ddf = ddf.reset_index()\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "sns.displot(data=ddf, x='LM Perplexity', y='proba_of_success', hue=\"Method\", kind='kde')\n",
+    "# plt.xscale('log')\n",
+    "#plt.yscale('log')\n",
+    "\n",
+    "plt.xlim(0, 100)\n",
+    "plt.ylim(-2, 3)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d1d1235a-7bff-4e51-ad43-20dd5d1a0734",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "sns.scatterplot(data=df, x='LM Perplexity', y='proba_of_success', hue=\"Method\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "faf6ec08-feca-4106-bf51-7a3ffb438267",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0805bfe-660f-4d1f-a28a-cf09220b5da3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5235f917-81b5-40ca-9c62-cb63a8aaaffb",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pytorch-gpu-2.0.0_py3.10.9",
+   "language": "python",
+   "name": "module-conda-env-pytorch-gpu-2.0.0_py3.10.9"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

glimpse-ui/glimpse/glimpse/evaluate/evaluate_bartbert_metrics.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import argparse
+from pathlib import Path
+import pandas as pd
+from bert_score import BERTScorer
+def sanitize_model_name(model_name: str) -> str:
+    """
+    Sanitize the model name to be used as a folder name.
+    @param model_name: The model name
+    @return: The sanitized model name
+    """
+    return model_name.replace("/", "_")
+# logging.basicConfig(stream=stdout, level=logging.)
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--summaries", type=Path, default="")
+    # device
+    parser.add_argument("--device", type=str, default="cuda")
+    args = parser.parse_args()
+    return args
+def parse_summaries(path: Path):
+    """
+    :return: a pandas dataframe with at least the columns 'text' and 'summary'
+    """
+    # read csv file
+    df = pd.read_csv(path).dropna()
+    # check if the csv file has the correct columns
+    if not all([col in df.columns for col in ["gold", "summary"]]):
+        raise ValueError("The csv file must have the columns 'text' and 'summary'.")
+    return df
+def evaluate_bartbert(df, device="cuda"):
+    # make a list of the tuples (text, summary)
+    # texts = df.text.tolist()
+    texts = df.gold.tolist()
+    summaries = df.summary.tolist()
+    scorer = BERTScorer(lang="en", rescale_with_baseline=True, device=device)
+    metrics = {'BERTScore': []}
+    for i in range(len(texts)):
+        texts[i] = texts[i].replace("\n", " ")
+        summaries[i] = summaries[i].replace("\n", " ")
+        P, R, F1 = scorer.score([summaries[i]], [texts[i]])
+        metrics['BERTScore'].append(F1.mean().item())
+    # compute the mean of the metrics
+    # metrics = {k: sum(v) / len(v) for k, v in metrics.items()}
+    return metrics
+def main():
+    args = parse_args()
+    path = args.summaries
+    path.parent.mkdir(parents=True, exist_ok=True)
+    # load the model
+    df = parse_summaries(args.summaries)
+    metrics = evaluate_bartbert(df)
+    # make a dataframe with the metric
+    df = pd.DataFrame(metrics)
+    # Add the model name in the metrics names
+    df = df.add_prefix(f"common/")
+    # save the dataframe
+    # check if exists already, if it does load it and add the new columns
+    print(df)
+    if path.exists():
+        df_old = pd.read_csv(path, index_col=0)
+        # create the colums if they do not exist
+        for col in df.columns:
+            if col not in df_old.columns:
+                df_old[col] = float("nan")
+        # add entry to the dataframe
+        for col in df.columns:
+            df_old[col] = df[col]
+        df = df_old
+    df.to_csv(path)
+if __name__ == "__main__":
+    main()

glimpse-ui/glimpse/glimpse/evaluate/evaluate_common_metrics_samples.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import argparse
+from pathlib import Path
+import pandas as pd
+from rouge_score import rouge_scorer
+def sanitize_model_name(model_name: str) -> str:
+    """
+    Sanitize the model name to be used as a folder name.
+    @param model_name: The model name
+    @return: The sanitized model name
+    """
+    return model_name.replace("/", "_")
+# logging.basicConfig(stream=stdout, level=logging.)
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--summaries", type=Path, default="")
+    args = parser.parse_args()
+    return args
+def parse_summaries(path: Path):
+    """
+    :return: a pandas dataframe with at least the columns 'text' and 'summary'
+    """
+    # read csv file
+    df = pd.read_csv(path).dropna()
+    # check if the csv file has the correct columns
+    if not all([col in df.columns for col in ["gold", "summary"]]):
+        raise ValueError("The csv file must have the columns 'text' and 'summary'.")
+    return df
+def evaluate_rouge(
+    df,
+):
+    # make a list of the tuples (text, summary)
+    texts = df.gold.tolist()
+    summaries = df.summary.tolist()
+    # rouges
+    metrics = {"rouge1": [], "rouge2": [], "rougeL": [], "rougeLsum": []}
+    rouges = rouge_scorer.RougeScorer(
+        ["rouge1", "rouge2", "rougeL", "rougeLsum"], use_stemmer=True
+    )
+    metrics["rouge1"].extend(
+        [
+            rouges.score(summary, text)["rouge1"].fmeasure
+            for summary, text in zip(summaries, texts)
+        ]
+    )
+    metrics["rouge2"].extend(
+        [
+            rouges.score(summary, text)["rouge2"].fmeasure
+            for summary, text in zip(summaries, texts)
+        ]
+    )
+    metrics["rougeL"].extend(
+        [
+            rouges.score(summary, text)["rougeL"].fmeasure
+            for summary, text in zip(summaries, texts)
+        ]
+    )
+    metrics["rougeLsum"].extend(
+        [
+            rouges.score(summary, text)["rougeLsum"].fmeasure
+            for summary, text in zip(summaries, texts)
+        ]
+    )
+    # compute the mean of the metrics
+    # metrics = {k: sum(v) / len(v) for k, v in metrics.items()}
+    return metrics
+def main():
+    args = parse_args()
+    # load the model
+    df = parse_summaries(args.summaries)
+    metrics = evaluate_rouge(df)
+    # # add index to the metrics
+    # metrics["index"] = [i for i in range(len(df))]
+    df = pd.DataFrame.from_dict(metrics)
+    df = df.add_prefix(f"common/")
+    # merge the metrics with the summaries
+    if args.summaries.exists():
+        df_old = parse_summaries(args.summaries)
+        for col in df.columns:
+            if col not in df_old.columns:
+                df_old[col] = float("nan")
+        # add entry to the dataframe
+        for col in df.columns:
+            df_old[col] = df[col]
+        df = df_old
+    df.to_csv(args.summaries, index=False)
+if __name__ == "__main__":
+    main()

glimpse-ui/glimpse/glimpse/evaluate/evaluate_seahorse_metrics_samples.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import argparse
+from pathlib import Path
+import pandas as pd
+import torch
+import torch.nn.functional as F
+import torch.utils.data
+from tqdm import tqdm
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+map_questionnumber_to_question = {
+    "question1": "SHMetric/Comprehensible",
+    "question2": "SHMetric/Repetition",
+    "question3": "SHMetric/Grammar",
+    "question4": "SHMetric/Attribution",
+    "question5": "SHMetric/Main ideas",
+    "question6": "SHMetric/Conciseness",
+}
+def sanitize_model_name(model_name: str) -> str:
+    """
+    Sanitize the model name to be used as a folder name.
+    @param model_name: The model name
+    @return: The sanitized model name
+    """
+    return model_name.replace("/", "_")
+# logging.basicConfig(stream=stdout, level=logging.)
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--question",
+        type=str,
+        default="repetition",
+    )
+    parser.add_argument("--summaries", type=Path, default="")
+    parser.add_argument("--select", type=str, default="*")
+    parser.add_argument("--batch_size", type=int, default=16)
+    parser.add_argument("--device", type=str, default="cuda")
+    args = parser.parse_args()
+    return args
+def parse_summaries(path: Path):
+    """
+    :return: a pandas dataframe with at least the columns 'text' and 'summary'
+    """
+    # read csv file
+    df = pd.read_csv(path).dropna()
+    # check if the csv file has the correct columns
+    if not all([col in df.columns for col in ["text", "summary"]]):
+        raise ValueError("The csv file must have the columns 'text' and 'summary'.")
+    return df
+def evaluate_classification_task(model, tokenizer, question, df, batch_size):
+    texts = df.text.tolist()
+    summaries = df.summary.tolist()
+    template = "premise: {premise} hypothesis: {hypothesis}"
+    ds = [template.format(premise=text[:20*1024], hypothesis=summary) for text, summary in zip(texts, summaries)]
+    eval_loader = torch.utils.data.DataLoader(ds, batch_size=batch_size)
+    metrics = {f"{question}/proba_1": [], f"{question}/proba_0": [], f"{question}/guess": []}
+    with torch.no_grad():
+        for batch in tqdm(eval_loader):
+            # tokenize the batch
+            inputs = tokenizer(batch, padding=True, truncation=True, return_tensors="pt")
+            # move the inputs to the device
+            inputs = {k: v.to(model.device) for k, v in inputs.items()}
+            N_inputs = inputs["input_ids"].shape[0]
+            # make decoder inputs to be <pad>
+            decoder_input_ids = torch.full((N_inputs, 1), tokenizer.pad_token_id, dtype=torch.long, device=model.device)
+            outputs = model(**inputs, decoder_input_ids=decoder_input_ids)
+            logits = outputs.logits
+            # retrieve logits for the last token and the scores for 0 and 1
+            logits = logits[:, -1, [497, 333]]
+            # compute the probabilities
+            probs = F.softmax(logits, dim=-1)
+            # compute the guess
+            guess = probs.argmax(dim=-1)
+            # append the metrics
+            metrics[f"{question}/proba_1"].extend(probs[:, 1].tolist())
+            metrics[f"{question}/proba_0"].extend(probs[:, 0].tolist())
+            metrics[f"{question}/guess"].extend(guess.tolist())
+    # average the metrics
+    # metrics = {k: sum(v) / len(v) for k, v in metrics.items()}
+    return metrics
+def main():
+    args = parse_args()
+    model_name = f"google/seahorse-large-q{args.question}"
+    question = map_questionnumber_to_question[f"question{args.question}"]
+    # load the model
+    # load in float16 to save memory
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map='auto', torch_dtype=torch.float16)
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    df = parse_summaries(args.summaries)
+    metrics = evaluate_classification_task(model, tokenizer, question, df, args.batch_size)
+    # make a dataframe with the metric
+    df_metrics = pd.DataFrame(metrics)
+    # merge the metrics with the summaries
+    df = parse_summaries(args.summaries)
+    df = pd.concat([df, df_metrics], axis=1)
+    path = Path(args.summaries)
+    if path.exists():
+        df_old = pd.read_csv(path, index_col=0)
+        # create the colums if they do not exist
+        for col in df.columns:
+            if col not in df_old.columns:
+                df_old[col] = float("nan")
+        # add entry to the dataframe
+        for col in df.columns:
+            df_old[col] = df[col]
+        df = df_old
+    # save the dataframe
+    df.to_csv(args.summaries)
+if __name__ == "__main__":
+    main()

glimpse-ui/glimpse/glimpse/src/beam_rsa_decoding.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import argparse
+import datetime
+from pathlib import Path
+import pandas as pd
+from datasets import Dataset
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+from rsasumm.beam_search import RSAContextualDecoding
+from tqdm import tqdm
+GENERATION_CONFIGS = {
+    "top_p_sampling": {
+        "max_new_tokens": 200,
+        "do_sample": True,
+        "top_p": 0.95,
+        "temperature": 1.0,
+        "num_return_sequences": 8,
+        "num_beams": 1,
+        # "num_beam_groups" : 4,
+    },
+    **{
+        f"sampling_topp_{str(topp).replace('.', '')}": {
+            "max_new_tokens": 200,
+            "do_sample": True,
+            "num_return_sequences": 8,
+            "top_p": 0.95,
+        }
+        for topp in [0.5, 0.8, 0.95, 0.99]
+    },
+}
+# add base.csv config to all configs
+for key, value in GENERATION_CONFIGS.items():
+    GENERATION_CONFIGS[key] = {
+        # "max_length": 2048,
+        "min_length": 0,
+        "early_stopping": True,
+        **value,
+    }
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_name", type=str, default="facebook/bart-large-cnn")
+    parser.add_argument("--dataset_name", type=str, default="amazon")
+    parser.add_argument("--dataset_path", type=str, default=None)
+    parser.add_argument(
+        "--decoding_config",
+        type=str,
+        default="top_p_sampling",
+        choices=GENERATION_CONFIGS.keys(),
+    )
+    parser.add_argument("--batch_size", type=int, default=16)
+    parser.add_argument("--device", type=str, default="cuda")
+    parser.add_argument("--output_dir", type=str, default="output")
+    # limit the number of samples to generate
+    parser.add_argument("--limit", type=int, default=None)
+    args = parser.parse_args()
+    return args
+def prepare_dataset(dataset_name, dataset_path=None) -> Dataset:
+    dataset_path = Path(dataset_path)
+    if dataset_name == "amazon":
+        dataset = pd.read_csv(dataset_path / "amazon_test.csv")
+    elif dataset_name == "space":
+        dataset = pd.read_csv(dataset_path / "space.csv")
+    elif dataset_name == "yelp":
+        dataset = pd.read_csv(dataset_path / "yelp_test.csv")
+    elif dataset_name == "reviews":
+        dataset = pd.read_csv(dataset_path / "test_metareviews.csv")
+    elif dataset_name == "multi_news":
+        dataset = pd.read_csv(dataset_path / "multi_news.csv")
+    else:
+        raise ValueError(f"Unknown dataset {dataset_name}")
+    # make a dataset from the dataframe
+    dataset = Dataset.from_pandas(dataset)
+    return dataset
+def evaluate_summarizer(model, tokenizer, dataset: Dataset, decoding_config) -> Dataset:
+    """
+    @param model: The model used to generate the summaries
+    @param tokenizer: The tokenizer used to tokenize the text and the summary
+    @param dataset: A dataset with the text
+    @param decoding_config: Dictoionary with the decoding config
+    @return: The same dataset with the summaries added
+    """
+    rsa = RSAContextualDecoding(model, tokenizer, device=model.device)
+    # generate summaries
+    summaries = []
+    print("Generating summaries...")
+    for id, batch in tqdm(dataset.to_pandas().groupby("id")):
+        text = batch["text"].tolist()
+        inputs = tokenizer(
+            text,
+            max_length=1024,
+            padding="max_length",
+            truncation=True,
+            return_tensors="pt",
+        )
+        batch_size = inputs["input_ids"].shape[0]
+        for k in tqdm(range(len(text))):
+            # move inputs to device
+            inputs = {key: value.to("cuda") for key, value in inputs.items()}
+            output = rsa.generate(
+                target_id=k,
+                source_texts_ids=inputs["input_ids"],
+                source_text_attention_mask=inputs["attention_mask"],
+                max_length=50,
+                top_p=0.95,
+                do_sample=True,
+                rationality=8.0,
+                temperature=1.0,
+                process_logits_before_rsa=True,
+            )
+            # output : (batch_size * num_return_sequences, max_length)
+            outputs = output[0]
+            summaries.append(tokenizer.decode(outputs[0], skip_special_tokens=True))
+        # decode summaries
+    # add summaries to the huggingface dataset
+    dataset = dataset.add_column("summary", summaries)
+    return dataset
+def sanitize_model_name(model_name: str) -> str:
+    """
+    Sanitize the model name to be used as a folder name.
+    @param model_name: The model name
+    @return: The sanitized model name
+    """
+    return model_name.replace("/", "_")
+def main():
+    args = parse_args()
+    # load the model
+    model = AutoModelForSeq2SeqLM.from_pretrained(args.model_name)
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name)
+    tokenizer.pad_token = tokenizer.unk_token
+    tokenizer.pad_token_id = tokenizer.unk_token_id
+    # move model to device
+    model = model.to(args.device)
+    # load the dataset
+    print("Loading dataset...")
+    dataset = prepare_dataset(args.dataset_name, args.dataset_path)
+    # limit the number of samples
+    if args.limit is not None:
+        _lim = min(args.limit, len(dataset))
+        dataset = dataset.select(range(_lim))
+    # generate summaries
+    dataset = evaluate_summarizer(
+        model,
+        tokenizer,
+        dataset,
+        GENERATION_CONFIGS[args.decoding_config],
+    )
+    df_dataset = dataset.to_pandas()
+    df_dataset = df_dataset.explode("summary")
+    df_dataset = df_dataset.reset_index()
+    # add an idx with  the id of the summary for each example
+    # df_dataset["id_candidate"] = df_dataset.groupby(["index"]).cumcount()
+    # save the dataset
+    # add unique date in name
+    now = datetime.datetime.now()
+    date = now.strftime("%Y-%m-%d-%H-%M-%S")
+    model_name = sanitize_model_name(args.model_name)
+    output_path = (
+        Path(args.output_dir)
+        / f"{model_name}-_-{args.dataset_name}-_-{args.decoding_config}-_-{date}.csv"
+    )
+    # create output dir if it doesn't exist
+    if not output_path.parent.exists():
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+    df_dataset.to_csv(output_path, index=False, encoding="utf-8")
+if __name__ == "__main__":
+    main()

glimpse-ui/glimpse/glimpse/src/compute_rsa.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from pathlib import Path
+import pandas as pd
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, PegasusTokenizer
+import argparse
+from tqdm import tqdm
+from pickle import dump
+import sys, os.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+from rsasumm.rsa_reranker import RSAReranking
+DESC = """
+Compute the RSA matrices for all the set of multi-document samples and dump these along with additional information in a pickle file.
+"""
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_name", type=str, default="facebook/bart-large-cnn")
+    parser.add_argument("--summaries", type=Path, default="glimpse/data/candidates/extractive_sentences-_-all_reviews_2017-_-none-_-2025-05-20-20-22-18.csv")
+    parser.add_argument("--output_dir", type=str, default="glimpse/output")
+    parser.add_argument("--filter", type=str, default=None)
+    # if ran in a scripted way, the output path will be printed
+    parser.add_argument("--scripted-run", action=argparse.BooleanOptionalAction, default=False)
+    parser.add_argument("--device", type=str, default="cuda")
+    return parser.parse_args()
+def parse_summaries(path: Path) -> pd.DataFrame:
+    try:
+        summaries = pd.read_csv(path)
+    except:
+        raise ValueError(f"Unknown dataset {path}")
+    # check if the dataframe has the right columns
+    if not all(
+        col in summaries.columns for col in ["index", "id", "text", "gold", "summary", "id_candidate"]
+    ):
+        raise ValueError(
+            "The dataframe must have columns ['index', 'id', 'text', 'gold', 'summary', 'id_candidate']"
+        )
+    return summaries
+def compute_rsa(summaries: pd.DataFrame, model, tokenizer, device):
+    results = []
+    for name, group in tqdm(summaries.groupby(["id"])):
+        rsa_reranker = RSAReranking(
+            model,
+            tokenizer,
+            device=device,
+            candidates=group.summary.unique().tolist(),
+            source_texts=group.text.unique().tolist(),
+            rationality=1,
+        )
+        (
+            best_rsa,
+            best_base,
+            speaker_df,
+            listener_df,
+            initial_listener,
+            language_model_proba_df,
+            initial_consensuality_scores,
+            consensuality_scores,
+        ) = rsa_reranker.rerank(t=1)
+        gold = group['gold'].tolist()[0]
+        results.append(
+            {
+                "id": name,
+                "best_rsa": best_rsa,  # best speaker score
+                "best_base": best_base,  # naive baseline
+                "speaker_df": speaker_df,  # all speaker results
+                "listener_df": listener_df,  # all listener results (chances of guessing correctly)
+                "initial_listener": initial_listener,
+                "language_model_proba_df": language_model_proba_df,
+                "initial_consensuality_scores": initial_consensuality_scores,
+                "consensuality_scores": consensuality_scores,  # consensuality scores
+                "gold": gold,
+                "rationality": 1,  # hyperparameter
+                "text_candidates" : group
+            }
+        )
+    return results
+def main():
+    args = parse_args()
+    if args.filter is not None:
+        if args.filter not in args.summaries.stem:
+            return
+    # load the model and the tokenizer
+    model = AutoModelForSeq2SeqLM.from_pretrained(args.model_name)
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name)
+    model = model.to(args.device)
+    # load the summaries
+    summaries = parse_summaries(args.summaries)
+    # rerank the summaries
+    results = compute_rsa(summaries, model, tokenizer, args.device)
+    results = {"results": results}
+    results["metadata/reranking_model"] = args.model_name
+    results["metadata/rsa_iterations"] = 1
+    # save the summaries
+    # make the output directory if it does not exist
+    Path(args.output_dir).mkdir(parents=True, exist_ok=True)
+    output_path = Path(args.output_dir) / f"{args.summaries.stem}-_-r3-_-rsa_reranked-{args.model_name.replace('/', '-')}.pk"
+    output_path_base = (
+        Path(args.output_dir) / f"{args.summaries.stem}-_-base_reranked.pk"
+    )
+    with open(output_path, "wb") as f:
+        dump(results, f)
+    # in case of scripted run, print the output path
+    if args.scripted_run: print(output_path)
+if __name__ == "__main__":
+    main()

glimpse-ui/glimpse/glimpse/src/rsa_merge_into_single.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import argparse
+import datetime
+from pathlib import Path
+import pandas as pd
+from datasets import Dataset
+from tqdm import tqdm
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--summaries", type=Path)
+    parser.add_argument("--output_dir", type=str, default="output")
+    # limit the number of samples to generate
+    parser.add_argument("--limit", type=int, default=None)
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    path = Path(args.summaries)
+    for file in path.glob("*.csv"):
+        model_name, dataset, decoding_config, date, reranking_type = file.stem.split('-_-')
+        df = pd.read_csv(file)
+        df = df.drop(["Unnamed: 0.1",  "Unnamed: 0", ], axis=1)
+        # df = df[['id', 'id_text', 'text', 'summary', 'gold']]
+        merged_summaries = df.groupby("id").agg({"summary": " ".join}).reset_index()
+        # add gold and text
+        merged_summaries = merged_summaries.merge(df[["id", "gold", "text"]], on="id")
+if __name__ == "__main__":
+    main()

glimpse-ui/glimpse/glimpse/src/rsa_reranking.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from pathlib import Path
+import pandas as pd
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import argparse
+from tqdm import tqdm
+from rsasumm.rsa_reranker import RSAReranking
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_name", type=str, default="facebook/bart-large-cnn")
+    parser.add_argument("--summaries", type=Path, default="")
+    parser.add_argument("--output_dir", type=str, default="output")
+    parser.add_argument("--filter", type=str, default=None)
+    parser.add_argument("--device", type=str, default="cuda")
+    return parser.parse_args()
+def parse_summaries(path : Path) -> pd.DataFrame:
+    summaries = pd.read_csv(path)
+    # check if the dataframe has the right columns
+    if not all(col in summaries.columns for col in ["id", "text", "id_candidate", "summary"]):
+        raise ValueError("The dataframe must have columns ['id', 'text', 'id_candidate', 'summary']")
+    return summaries
+def reranking_rsa(summaries : pd.DataFrame, model, tokenizer, device):
+    best_summaries = []
+    best_bases = []
+    for name, group in tqdm(summaries.groupby(["id"])):
+        rsa_reranker = RSAReranking(model, tokenizer, device, group.summary.unique().tolist(), group.text.unique().tolist())
+        best_rsa, best_base, speaker_df, listener_df, initial_listener, language_model_proba_df = rsa_reranker.rerank(t=3)
+        group = group.set_index("summary")
+        group_lines = group.loc[best_rsa]
+        group_lines['speaker_proba'] = 0
+        group_lines['listener_proba'] = 0
+        group_lines['language_model_proba'] = 0
+        group_lines['initial_listener_proba'] = 0
+        group_lines = group_lines.reset_index()
+        for i, (idx, line) in enumerate(group_lines.iterrows()):
+            summary = line['summary']
+            text = line['text']
+            group_lines['speaker_proba'].loc[i] = speaker_df.loc[text, summary]
+            group_lines['listener_proba'].loc[i] = listener_df.loc[text, summary]
+            group_lines['language_model_proba'].loc[i] = language_model_proba_df.loc[text, summary]
+            group_lines['initial_listener_proba'].loc[i] = initial_listener.loc[text, summary]
+        group_lines["id"] = name
+        best_summaries.append(group_lines)
+        best_base_lines = group.loc[best_base]
+        best_base_lines = best_base_lines.reset_index()
+        best_base_lines['speaker_proba'] = 0
+        best_base_lines['listener_proba'] = 0
+        best_base_lines['language_model_proba'] = 0
+        best_base_lines['initial_listener_proba'] = 0
+        for i, (idx, line) in enumerate(best_base_lines.iterrows()):
+            summary = line['summary']
+            text = line['text']
+            best_base_lines['speaker_proba'].loc[i] = speaker_df.loc[text, summary]
+            best_base_lines['listener_proba'].loc[i] = listener_df.loc[text, summary]
+            best_base_lines['language_model_proba'].loc[i] = language_model_proba_df.loc[text, summary]
+            best_base_lines['initial_listener_proba'].loc[i] = initial_listener.loc[text, summary]
+        best_base_lines["id"] = name
+        best_bases.append(best_base_lines)
+    best_summaries = pd.concat(best_summaries)
+    best_bases = pd.concat(best_bases)
+    return best_summaries, best_bases
+def main():
+    args = parse_args()
+    if args.filter is not None:
+        if args.filter not in args.summaries.stem:
+            return
+    # load the model and the tokenizer
+    model = AutoModelForSeq2SeqLM.from_pretrained(args.model_name)
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name)
+    model = model.to(args.device)
+    # load the summaries
+    summaries = parse_summaries(args.summaries)
+    # rerank the summaries
+    best_summaries, bast_base = reranking_rsa(summaries, model, tokenizer, device=args.device)
+    best_summaries['metadata/reranking_model'] = args.model_name
+    best_summaries['metadata/rsa_iterations'] = 3
+    bast_base['metadata/reranking_model'] = args.model_name
+    bast_base['metadata/rsa_iterations'] = 3
+    # save the summaries
+    # make the output directory if it does not exist
+    Path(args.output_dir).mkdir(parents=True, exist_ok=True)
+    output_path = Path(args.output_dir) / f"{args.summaries.stem}-_-rsa_reranked.csv"
+    output_path_base = Path(args.output_dir) / f"{args.summaries.stem}-_-base_reranked.csv"
+    best_summaries.to_csv(output_path)
+    bast_base.to_csv(output_path_base)
+if __name__ == "__main__":
+    main()

glimpse-ui/glimpse/mds/Single summaries expes.ipynb ADDED Viewed

	@@ -0,0 +1,587 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "64c5b118-5a32-4220-89f2-4e3ccd7a28d2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib as mpl\n",
+    "# Use the pgf backend (must be set before pyplot imported)\n",
+    "mpl.use('pgf')\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "import re\n",
+    "from pathlib import Path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "7893116c24574642"
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "# use pgf backend\n",
+    "plt.style.use('seaborn-paper')\n"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "f3dc93e0b2eb9894"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3806928d-0624-4d9f-905f-3bf41b9725f1",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "sumy_individual_path = Path('output/summaries/sumy_individual/')\n",
+    "ours_individual_path = Path('output/summaries/methods_reviews_individual/')\n",
+    "\n",
+    "TABLE_PATH = Path(\"../../../EMIRR/papers/rsa_multi_document/tables/\")\n",
+    "FIGURE = Path(\"../../../EMIRR/papers/rsa_multi_document/figures/\")\n",
+    "\n",
+    "# make sure the folder exists\n",
+    "TABLE_PATH.mkdir(parents=True, exist_ok=True)\n",
+    "FIGURE.mkdir(parents=True, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "141a8192-773a-40af-a891-620a6ab81efd",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "dfs = []\n",
+    "for file in sumy_individual_path.glob('*.csv'):\n",
+    "    df = pd.read_csv(file)\n",
+    "    method = file.stem.split('-_-')[1]\n",
+    "    \n",
+    "    sumy = file.stem.split('-_-')[-1].split('_')\n",
+    "    if len(sumy) > 1:\n",
+    "        sentence_count = int(sumy[-1])\n",
+    "        df['metadata/sentence_count'] = sentence_count\n",
+    "\n",
+    "    # df['Method'] = method\n",
+    "    dfs.append(df)\n",
+    "    \n",
+    "    \n",
+    "for file in ours_individual_path.glob('*.csv'):\n",
+    "    generation_method, dataset, generation_params, date, rsa_param, rsa_ranking_model, method = file.stem.split('-_-')\n",
+    "    \n",
+    "    method, n = \"_\".join( method.split('_')[:-1]), method.split('_')[-1]\n",
+    "    \n",
+    "    if \"metadata/method\" not in df.columns:\n",
+    "        df['metadata/method'] = method\n",
+    "    \n",
+    "#     reranking_model = rsa_ranking_model[len(\"rsa_reranked-\"):]\n",
+    "    \n",
+    "#     df['Ranking Model'] = reranking_model\n",
+    "#     df['Method'] = method\n",
+    "#     df['N'] = int(n) if n != \"based\" else 3 \n",
+    "    df['Generation Method'] = generation_method\n",
+    "    \n",
+    "    df = pd.read_csv(file)\n",
+    "    dfs.append(df)\n",
+    "        \n",
+    "df = pd.concat(dfs)\n",
+    "del dfs\n",
+    "\n",
+    "df = df.drop([c for c in df.columns if \"Unnamed\" in c], axis=1)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "\n",
+    "df['metadata/method'] = df['metadata/method'].fillna('N/A')\n",
+    "df = df[~(df[\"metadata/method\"].str.contains('lead'))]\n",
+    "df = df[~(df[\"metadata/method\"].str.contains('Lead'))]\n",
+    "\n"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "804fb4bacf2686d4",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "419db7d5-b90b-47a3-9c25-f39eff337849",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def fix_generation(x):\n",
+    "    if x == \"abstractive_sentences\":\n",
+    "        return \"extractive_sentences\"\n",
+    "    else:\n",
+    "        return x\n",
+    "\n",
+    "\n",
+    "df['Generation Method'] = df[\"Generation Method\"].apply(fix_generation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4a157db9-e408-46e8-9499-2751e4cbe7e4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "df['N'] = (df['metadata/n_sentences'].fillna(0) + df['metadata/sentence_count'].fillna(0)).apply(int)\n",
+    "\n",
+    "def fix_methods(x):\n",
+    "\n",
+    "    if \"consensus\" in str(x):\n",
+    "        return \"Agreement\"\n",
+    "    elif \"rsa\" in str(x):\n",
+    "        return \"Speaker+Agreement\"\n",
+    "    else:\n",
+    "        return x\n",
+    "    \n",
+    "df['metadata/method'] = df['metadata/method'].apply(fix_methods)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4b926a30-f4be-4de9-b921-1eac3145e87e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "df['metadata/sentence_count'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0451f550-0522-40e9-a64a-551337ae47f6",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7d213f27-9b7f-4893-b2c1-251715401db5",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "metric= 'SHMetric/Main ideas/proba_1'\n",
+    "\n",
+    "SHMetric = df.columns[df.columns.str.contains('SHMetric') & df.columns.str.contains('proba_1')].tolist()\n",
+    "\n",
+    "toplot = df.copy()\n",
+    "toplot['metadata/reranking_model'] = toplot['metadata/reranking_model'].fillna('N/A')\n",
+    "toplot['Generation Method'] = toplot['Generation Method'].fillna('N/A')\n",
+    "\n",
+    "\n",
+    "toplot = toplot.groupby([\"metadata/method\", \"id\", \"Generation Method\", \"metadata/reranking_model\"]).mean()\n",
+    "idx = toplot.groupby([\"metadata/method\", \"id\", \"Generation Method\"])[metric].idxmax()\n",
+    "\n",
+    "toplot = toplot.loc[idx].reset_index()\n",
+    "\n",
+    "avg = toplot.groupby([\"metadata/method\"]).agg(['mean', 'std'])\n",
+    "avg = avg[SHMetric]\n",
+    "\n",
+    "display(avg)\n",
+    "\n",
+    "# rename columns Consiness, Main ideas, Repetition\n",
+    "avg.columns = pd.MultiIndex.from_tuples([(f'{c[0].split(\"/\")[1]}', c[1]) for c in avg.columns])\n",
+    "\n",
+    "def map_ours(x):\n",
+    "    if \"Agreement\" in x:\n",
+    "        return \"Ours\"\n",
+    "    else:\n",
+    "        return \"Bas.\"\n",
+    "\n",
+    "\n",
+    "avg = avg.groupby([\"metadata/method\"]).mean()\n",
+    "\n",
+    "avg['Ours'] = avg.index.get_level_values(0).map(map_ours)\n",
+    "\n",
+    "\n",
+    "avg = avg.reset_index().rename(columns={'metadata/method': 'Method'})\n",
+    "avg = avg.set_index(['Ours', 'Method'])\n",
+    "avg = avg.sort_index()\n",
+    "\n",
+    "# print avg columns level 0\n",
+    "print(avg.columns.get_level_values(0))\n",
+    "\n",
+    "#Index(['Comprehensible', 'Comprehensible', 'Repetition', 'Repetition',\n",
+    "      #  'Grammar', 'Grammar', 'Attribution', 'Attribution', 'Main ideas',\n",
+    "      #  'Main ideas', 'Conciseness', 'Conciseness'],\n",
+    "      # dtype='object')\n",
+    "      \n",
+    "# rename columns with shorter names\n",
+    "avg.columns = pd.MultiIndex.from_tuples([\n",
+    "    ('Compr.', 'mean'), ('Compr.', 'std'),\n",
+    "    ('Repet.', 'mean'), ('Repet.', 'std'),\n",
+    "    ('Gram.', 'mean'), ('Gram.', 'std'),\n",
+    "    ('Attr.', 'mean'), ('Attr.', 'std'),\n",
+    "    ('M. i.', 'mean'), ('M. i.', 'std'),\n",
+    "    ('Conc.', 'mean'), ('Conc.', 'std')\n",
+    "])\n",
+    "\n",
+    "\n",
+    "style = avg.style\n",
+    "style = style.format(\"{:.2f}\")\n",
+    "\n",
+    "# make std column smaller and lighter in latex\n",
+    "idx = pd.IndexSlice\n",
+    "# style = style.set_properties(subset=idx[:, ['std']], **{'font-size': '10pt', 'font-weight': 'lighter'})\n",
+    "\n",
+    "# bold the best value in each mean column\n",
+    "style = style.highlight_max(axis=0, subset=idx[:, idx[:, 'mean']], props=\"bfseries: ;\")\n",
+    "\n",
+    "# make std columns smaller and add +/- sign\n",
+    "style = style.set_properties(**{'color':'[HTML]{A0A1A3}'} ,subset=(idx[:], idx[:, 'std']))\n",
+    "style = style.format(\"±{:.2f}\", subset=(idx[:], idx[:, 'std']))\n",
+    "\n",
+    "# drop level 1 of columns\n",
+    "style = style.hide_columns(level=1)\n",
+    "\n",
+    "# to latex\n",
+    "latex = style.to_latex(clines=\"skip-last;data\", hrules=True, multirow_align=\"l\", environment=\"table*\", caption=\"Estimated human judgment using the SEAHORSE metrics for all  baselines and our templated summaries compared against each document independently. M. i. stands for Main ideas, Attr. for Attribution, Gram. for Grammar, Compr. for Comprehensible, Conc. for Conciseness, and Repet. for Repetition. The best value in each column is in bold.\")\n",
+    "display(style)\n",
+    "\n",
+    "# add resize box\n",
+    "latex = latex.replace(\"\\\\begin{tabular}\", \"\\\\resizebox{\\\\textwidth}{!}{\\\\begin{tabular}\")\n",
+    "latex = latex.replace(\"\\\\end{tabular}\", \"\\\\end{tabular}}\")\n",
+    "\n",
+    "\n",
+    "# replace \n",
+    "\n",
+    "# write to file\n",
+    "with open(TABLE_PATH / \"seahorse.tex\", \"w\") as f:\n",
+    "    f.write(latex)\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "# display(avg)\n",
+    "# avg.set_index('Method')\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cd4c7707-f1cd-4810-972b-1a69e0ec68ae",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "metric='SHMetric/Main ideas/proba_1'\n",
+    "# white grid\n",
+    "sns.set(style=\"whitegrid\")\n",
+    "avg = df.groupby([\"metadata/method\", \"id\", \"metadata/reranking_model\", \"Generation Method\"]).mean().reset_index()\n",
+    "avg = avg.sort_values(metric)\n",
+    "\n",
+    "# rename columns with human readable names\n",
+    "avg = avg.rename(columns={\n",
+    "    'metadata/method': 'Method',\n",
+    "    'metadata/reranking_model': 'Reranking Model',\n",
+    "    'Generation Method': 'Generation Method',\n",
+    "    metric: 'Main Ideas'\n",
+    "})\n",
+    "\n",
+    "\n",
+    "\n",
+    "g = sns.catplot(data=avg, y=\"Main Ideas\", x=\"Method\", hue=\"Reranking Model\", col=\"Generation Method\", kind=\"bar\")\n",
+    "\n",
+    "\n",
+    "# get legend label and handle\n",
+    "handles, labels = g._legend_data.values(), g._legend_data.keys()\n",
+    "\n",
+    "# set legend\n",
+    "g._legend.remove()\n",
+    "g.fig.legend(handles, labels, loc='upper center', ncol=2, fontsize=25, title_fontsize=25, title=\"Reranking Model\", bbox_to_anchor=(0.4, -0.3))\n",
+    "\n",
+    "\n",
+    "# set title template \n",
+    "g.set_titles(\"{col_name}\")\n",
+    "\n",
+    "# add hline at 0.215 for the baseline, on each axis\n",
+    "for ax in g.axes.flat:\n",
+    "    ax.axhline(0.215, ls='--', color='black', linewidth=5)\n",
+    "    ax.set_xticklabels(ax.get_xticklabels(), rotation=30)\n",
+    "    \n",
+    "# make label bigger\n",
+    "for ax in g.axes.flat:\n",
+    "    ax.set_xlabel(\"\")\n",
+    "    ax.set_ylabel(ax.get_ylabel(), fontsize=25, fontweight='bold')\n",
+    "    ax.set_xticklabels(ax.get_xticklabels(), fontsize=25, fontweight='bold')\n",
+    "    \n",
+    "# make title bigger\n",
+    "for ax in g.axes.flat:\n",
+    "    ax.set_title(ax.get_title(), fontsize=25, fontweight='bold')\n",
+    "    \n",
+    "# add annotation for the hline on the first axis\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "plt.xticks(rotation=30)\n",
+    "\n",
+    "# save figure\n",
+    "g.savefig(FIGURE / \"seahorse_main_ideas.pdf\")\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f84773d4-f2b7-4db6-851f-4683d545345b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "metric='SHMetric/Main ideas/proba_1'\n",
+    "\n",
+    "toplot = df.copy()\n",
+    "toplot['metadata/reranking_model'] = toplot['metadata/reranking_model'].fillna('N/A')\n",
+    "toplot['Generation Method'] = toplot['Generation Method'].fillna('N/A')\n",
+    "\n",
+    "toplot = toplot.groupby([\"metadata/method\", \"id\", \"Generation Method\", \"metadata/reranking_model\"]).mean()\n",
+    "idx = toplot.groupby([\"metadata/method\", \"id\", \"Generation Method\"])[metric].idxmax()\n",
+    "toplot = toplot.loc[idx].reset_index()\n",
+    "toplot = toplot[~toplot['metadata/method'].str.contains('Lead')]\n",
+    "\n",
+    "toplot = toplot.sort_values(metric, ascending=True)\n",
+    "order = toplot.groupby(\"metadata/method\").mean().sort_values(metric)\n",
+    "\n",
+    "\n",
+    "display(toplot.groupby(\"metadata/method\").mean().sort_values(metric)[metric])\n",
+    "\n",
+    "sns.barplot(data=toplot, y=metric, x=\"metadata/method\", order=order.index)\n",
+    "\n",
+    "plt.xticks(rotation=45)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "47f15195-e60d-45f8-aac0-dc88eeea9577",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "metric='SHMetric/Conciseness/proba_1'\n",
+    "\n",
+    "toplot = df.copy()\n",
+    "toplot['metadata/reranking_model'] = toplot['metadata/reranking_model'].fillna('N/A')\n",
+    "toplot['Generation Method'] = toplot['Generation Method'].fillna('N/A')\n",
+    "\n",
+    "toplot = toplot.groupby([\"metadata/method\", \"id\", \"Generation Method\", \"metadata/reranking_model\"]).mean()\n",
+    "idx = toplot.groupby([\"metadata/method\", \"id\", \"Generation Method\"])[metric].idxmax()\n",
+    "toplot = toplot.loc[idx].reset_index()\n",
+    "toplot = toplot[~toplot['metadata/method'].str.contains('Lead')]\n",
+    "\n",
+    "toplot = toplot.sort_values(metric, ascending=True)\n",
+    "order = toplot.groupby(\"metadata/method\").mean().sort_values(metric)\n",
+    "\n",
+    "\n",
+    "\n",
+    "sns.barplot(data=toplot, y=metric, x=\"metadata/method\", order=order.index)\n",
+    "\n",
+    "plt.xticks(rotation=45)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "96b5d47b-f9d2-45bf-9284-844dedb24ce9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "metric='SHMetric/Repetition/proba_1'\n",
+    "\n",
+    "toplot = df.copy()\n",
+    "toplot['metadata/reranking_model'] = toplot['metadata/reranking_model'].fillna('N/A')\n",
+    "toplot['Generation Method'] = toplot['Generation Method'].fillna('N/A')\n",
+    "\n",
+    "toplot = toplot.groupby([\"metadata/method\", \"id\", \"Generation Method\", \"metadata/reranking_model\"]).mean()\n",
+    "idx = toplot.groupby([\"metadata/method\", \"id\", \"Generation Method\"])[metric].idxmax()\n",
+    "toplot = toplot.loc[idx].reset_index()\n",
+    "toplot = toplot[~toplot['metadata/method'].str.contains('Lead')]\n",
+    "\n",
+    "toplot = toplot.sort_values(metric, ascending=True)\n",
+    "order = toplot.groupby(\"metadata/method\").mean().sort_values(metric)\n",
+    "\n",
+    "\n",
+    "\n",
+    "sns.barplot(data=toplot, y=metric, x=\"metadata/method\", order=order.index)\n",
+    "\n",
+    "plt.xticks(rotation=45)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "be9a5198-40e9-4b60-8ed4-6b2ea3a9683e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metric='SHMetric/Repetition/proba_1'\n",
+    "\n",
+    "toplot = df.copy()\n",
+    "toplot['metadata/reranking_model'] = toplot['metadata/reranking_model'].fillna('N/A')\n",
+    "toplot['Generation Method'] = toplot['Generation Method'].fillna('N/A')\n",
+    "\n",
+    "toplot = toplot.groupby([\"metadata/method\", \"id\", \"Generation Method\", \"metadata/reranking_model\"]).mean()\n",
+    "idx = toplot.groupby([\"metadata/method\", \"id\", \"Generation Method\"])[metric].idxmax()\n",
+    "toplot = toplot.loc[idx].reset_index()\n",
+    "toplot = toplot[~toplot['metadata/method'].str.contains('Lead')]\n",
+    "\n",
+    "toplot = toplot.sort_values(metric, ascending=True)\n",
+    "order = toplot.groupby(\"metadata/method\").mean().sort_values(metric)\n",
+    "\n",
+    "\n",
+    "\n",
+    "sns.barplot(data=toplot, y=metric, x=\"metadata/method\", order=order.index)\n",
+    "\n",
+    "plt.xticks(rotation=45)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6e52ec5a-3f68-4fdf-9828-205cd9b55e42",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "metric='SHMetric/Main ideas/proba_1'\n",
+    "\n",
+    "avg = df.groupby([\"metadata/method\", \"id\", \"N\"]).mean().reset_index()\n",
+    "avg = avg.sort_values(metric)\n",
+    "sns.barplot(data=avg[~avg['metadata/method'].str.contains('Lead')], y=metric, x=\"metadata/method\", hue='N')\n",
+    "plt.xticks(rotation=45)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a675a584-c80f-49ca-94e3-55d868c8b594",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "metric='SHMetric/Main ideas/proba_1'\n",
+    "\n",
+    "avg = df.groupby([\"metadata/method\", \"id\"]).mean().reset_index()\n",
+    "avg = avg[~avg['metadata/method'].str.contains('Lead')].sort_values(metric, )\n",
+    "sns.barplot(data=avg, y=metric, x=\"metadata/method\")\n",
+    "plt.xticks(rotation=45)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "79f7e781-0e34-458d-826d-67c08109cef7",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "metric='rougeL'\n",
+    "\n",
+    "avg = df.groupby([\"metadata/method\", \"id\"]).mean().reset_index()\n",
+    "avg = avg[~avg['metadata/method'].str.contains('Lead')].sort_values(metric, )\n",
+    "sns.barplot(data=avg, y=metric, x=\"metadata/method\")\n",
+    "plt.xticks(rotation=45)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b65306c-9f5e-4d71-910a-48de9a195534",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cf6abd56-e9d1-4cea-bd01-d03046b56f3d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "name": "python3",
+   "language": "python",
+   "display_name": "Python 3 (ipykernel)"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

glimpse-ui/glimpse/mds/Template summaries.ipynb ADDED Viewed

	@@ -0,0 +1,531 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12404068-3244-43d6-8556-41e11489bb48",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import pickle as pk\n",
+    "import pandas as pd\n",
+    "from pathlib import Path\n",
+    "import numpy as np\n",
+    "import seaborn as sns\n",
+    "\n",
+    "from rouge_score import rouge_scorer\n",
+    "\n",
+    "\n",
+    "from lexrank import LexRank\n",
+    "from lexrank.mappings.stopwords import STOPWORDS\n",
+    "import nltk \n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "044c82d6-23c8-4c3b-a4b5-12acbbc1cc1a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1f5cdf3f-5485-4b9f-a6fc-b2b8bd8aca7f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "\n",
+    "path = Path(\"output/summaries/rsa_reranking/reviews_rsa_matrices/\")\n",
+    "output_path =  Path(\"output/summaries/methods_reviews/\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ec5e8ff3-6bef-42bc-8430-df93c1a4e79a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d45cd444-0a81-4670-bbae-213e322ea281",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9e6c2863-9e5a-4e5a-bdb6-02e03c5f6105",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2ae23148-38ae-4385-99fb-db20da54334d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d15ce19b-a3c7-4554-878f-41acd3204878",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1652ff75-b6c2-483a-a9af-ff3ca8616756",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "983cd24c-b996-4224-8f87-ea79842c41a0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0db0e8f5-4b4a-4d55-8596-fe095aa4135f",
+   "metadata": {},
+   "source": [
+    "# Consensus score based summaries:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "560c7f8d-6b8e-4b5b-ba36-0dedc509791f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "662a5f1b-1e4d-458e-a437-b1d9c8db4552",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def consensus_scores_based_summaries(sample, n_consensus=3, n_dissensus=3):\n",
+    "    consensus_samples = sample['consensuality_scores'].sort_values(ascending=True).head(n_consensus).index.tolist()\n",
+    "    disensus_samples = sample['consensuality_scores'].sort_values(ascending=False).head(n_dissensus).index.tolist()\n",
+    "    \n",
+    "    consensus = \".\".join(consensus_samples)\n",
+    "    disensus = \".\".join(disensus_samples)\n",
+    "    \n",
+    "    return consensus + \"\\n\\n\" + disensus\n",
+    "    \n",
+    "    \n",
+    "def rsa_scores_based_summaries(sample, n_consensus=3, n_rsa_speaker=3):\n",
+    "    consensus_samples = sample['consensuality_scores'].sort_values(ascending=True).head(n_consensus).index.tolist()\n",
+    "    rsa = sample['best_rsa'].tolist()[:n_rsa_speaker]\n",
+    "    \n",
+    "    consensus = \".\".join(consensus_samples)\n",
+    "    rsa = \".\".join(rsa)\n",
+    "    \n",
+    "    return consensus + \"\\n\\n\" + rsa\n",
+    "\n",
+    "scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)\n",
+    "\n",
+    "def lead(sample, N=10):\n",
+    "    texts = sample['speaker_df'].index.tolist()\n",
+    "    \n",
+    "    summary = \"\\n\".join([\".\".join(t.split('.')[:N]) for t in texts])\n",
+    "    \n",
+    "    return summary\n",
+    "\n",
+    "    \n",
+    "    \n",
+    "\n",
+    "scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)\n",
+    "\n",
+    "\n",
+    "def construct_templated_summaries(data, fn, dataset=None):    \n",
+    "    records = []\n",
+    "    for sample in data['results']:\n",
+    "        summary = fn(sample)\n",
+    "        text = \"\\n\\n\".join(sample['speaker_df'].index.tolist())\n",
+    "        record = {'id' : sample['id'], 'summary': summary, 'metadata/reranking_model' : data['metadata/reranking_model'], 'metadata/rsa_iterations' : data['metadata/reranking_model'], \"text\": text}\n",
+    "        if dataset is not None:\n",
+    "            record['gold'] = dataset.loc[sample[\"id\"]]['gold'].tolist()[0]\n",
+    "            if record['gold'] is not None:\n",
+    "                rouges = scorer.score(summary, record['gold'])\n",
+    "                record |= {r : v.fmeasure  for r, v in rouges.items()}\n",
+    "            \n",
+    "            \n",
+    "            \n",
+    "        records.append(record)\n",
+    "        \n",
+    "    return pd.DataFrame.from_records(records)\n",
+    "        \n",
+    "\n",
+    "    \n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "39c45180-a354-429c-8cde-3a7c78013cc6",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def prepare_dataset(dataset_name, dataset_path=\"data/processed/\"):\n",
+    "    dataset_path = Path(dataset_path)\n",
+    "    if dataset_name == \"amazon\":\n",
+    "        dataset = pd.read_csv(dataset_path / \"amazon_test.csv\")\n",
+    "    elif dataset_name == \"space\":\n",
+    "        dataset = pd.read_csv(dataset_path / \"space.csv\")\n",
+    "    elif dataset_name == \"yelp\":\n",
+    "        dataset = pd.read_csv(dataset_path / \"yelp_test.csv\")\n",
+    "    elif dataset_name == \"reviews\":\n",
+    "        dataset = pd.read_csv(dataset_path / \"test_metareviews.csv\")\n",
+    "    else:\n",
+    "        raise ValueError(f\"Unknown dataset {dataset_name}\")\n",
+    "\n",
+    "\n",
+    "    return dataset\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "addeda2b-71fc-4c9a-8e91-12cf70e52b1e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# df = prepare_dataset('reviews')\n",
+    "\n",
+    "# for n, group in df.groupby('id'):\n",
+    "#     for idx, row in group.iterrows():\n",
+    "#         print(row['text'].replace('-----', \"\\n\"))\n",
+    "#         print(\"===========\")\n",
+    "#     break\n",
+    "rsa_scores_based_summaries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7fe212dd-63d2-44b1-b06e-7792b9d504ac",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "for n in [1, 2, 3, 4, 5, 6]:\n",
+    "    for file in path.glob(\"*.pk\"):\n",
+    "        print(file)\n",
+    "        with file.open('rb') as fd:\n",
+    "            data = pk.load(fd)\n",
+    "\n",
+    "        Path(output_path).mkdir(parents=True, exist_ok=True)\n",
+    "        model_name, dataset_name, decoding_config, date = str(file.stem).split('-_-')[:4]\n",
+    "\n",
+    "        dataset = prepare_dataset(dataset_name, dataset_path=\"data/processed/\")\n",
+    "        dataset = dataset.set_index('id')\n",
+    "        \n",
+    "        fn = lambda sample: consensus_scores_based_summaries(sample, n_consensus=n, n_dissensus=n)\n",
+    "\n",
+    "        df = construct_templated_summaries(data, fn, dataset=dataset)\n",
+    "        \n",
+    "        df['metadata/method'] = \"Agreement\"\n",
+    "        df['metadata/n_sentences'] = 2*n\n",
+    "        df['metadata/n_consensus'] = n\n",
+    "        df['metadata/n_dissensus'] = n\n",
+    "\n",
+    "        name = file.stem + \"-_-\" + f\"consensus_score_based_{n}.csv\"\n",
+    "\n",
+    "        if (output_path / name).exists():\n",
+    "            df_old = pd.read_csv(output_path / name)\n",
+    "\n",
+    "            for col in df.columns:\n",
+    "                if col not in df_old.columns:\n",
+    "                    df_old[col] = float(\"nan\")\n",
+    "\n",
+    "            # add entry to the dataframe\n",
+    "            for col in df.columns:\n",
+    "                df_old[col] = df[col]\n",
+    "\n",
+    "            df = df_old\n",
+    "\n",
+    "        df.to_csv(output_path / name)\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab45111b-9c9f-44ee-8cc1-613bfa32a007",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "for n in [1, 2, 3, 4, 5, 6]:\n",
+    "    for file in path.glob(\"*.pk\"):\n",
+    "        with file.open('rb') as fd:\n",
+    "            data = pk.load(fd)\n",
+    "\n",
+    "        Path(output_path).mkdir(parents=True, exist_ok=True)\n",
+    "        model_name, dataset_name, decoding_config, date = str(file.stem).split('-_-')[:4]\n",
+    "\n",
+    "        dataset = prepare_dataset(dataset_name, dataset_path=\"data/processed/\")\n",
+    "        dataset = dataset.set_index('id')\n",
+    "\n",
+    "        fn = lambda sample: rsa_scores_based_summaries(sample, n_consensus=n, n_rsa_speaker=n)\n",
+    "        df = construct_templated_summaries(data, fn, dataset=dataset)\n",
+    "\n",
+    "        df['metadata/method'] = \"Speaker+Agreement\"\n",
+    "        df['metadata/n_sentences'] = 2*n\n",
+    "        df['metadata/n_consensus'] = n\n",
+    "        df['metadata/n_dissensus'] = n\n",
+    "\n",
+    "        name = file.stem + \"-_-\" + f\"rsa_score_based_{n}.csv\"\n",
+    "\n",
+    "        if (output_path / name).exists():\n",
+    "            df_old = pd.read_csv(output_path / name)\n",
+    "\n",
+    "            for col in df.columns:\n",
+    "                if col not in df_old.columns:\n",
+    "                    df_old[col] = float(\"nan\")\n",
+    "\n",
+    "            # add entry to the dataframe\n",
+    "            for col in df.columns:\n",
+    "                df_old[col] = df[col]\n",
+    "\n",
+    "            df = df_old\n",
+    "\n",
+    "        df.to_csv(output_path / name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b57c318-5fc8-49fc-8746-128e1112e46a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "for n in [1, 2, 3, 4, 5, 6, 7, 8]:\n",
+    "    for file in path.glob(\"*.pk\"):\n",
+    "        with file.open('rb') as fd:\n",
+    "            data = pk.load(fd)\n",
+    "\n",
+    "        Path(output_path).mkdir(parents=True, exist_ok=True)\n",
+    "        model_name, dataset_name, decoding_config, date = str(file.stem).split('-_-')[:4]\n",
+    "\n",
+    "        dataset = prepare_dataset(dataset_name, dataset_path=\"data/processed/\")\n",
+    "        dataset = dataset.set_index('id')\n",
+    "\n",
+    "        fn = lambda sample: lead(sample, N=2*n)\n",
+    "\n",
+    "\n",
+    "        df = construct_templated_summaries(data, fn, dataset=dataset)\n",
+    "\n",
+    "        df['metadata/method'] = \"Lead\"\n",
+    "        df['metadata/n_sentences'] = 2*n\n",
+    "\n",
+    "        name = file.stem + \"-_-\" + f\"lead_{2*n}.csv\"\n",
+    "\n",
+    "        if (output_path / name).exists():\n",
+    "            df_old = pd.read_csv(output_path / name)\n",
+    "\n",
+    "            for col in df.columns:\n",
+    "                if col not in df_old.columns:\n",
+    "                    df_old[col] = float(\"nan\")\n",
+    "\n",
+    "            # add entry to the dataframe\n",
+    "            for col in df.columns:\n",
+    "                df_old[col] = df[col]\n",
+    "\n",
+    "            df = df_old\n",
+    "\n",
+    "        df.to_csv(output_path / name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "af574823-667d-4722-90bc-2bb095ad3a01",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c32d1d88-c6f5-4ada-aec6-219d90cade16",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import seaborn as sns\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "868ac37e-6187-46f5-935c-111ca532b1b0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "output_path =  Path(\"output/summaries/methods_reviews/\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7263e9d0-6a43-4698-bf6f-82fff0839316",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import subprocess\n",
+    "\n",
+    "\n",
+    "for file in output_path.glob(\"*.csv\"):\n",
+    "    print(file)\n",
+    "    cmd = [\"python\", \"mds/evaluate_bartbert_metrics.py\", \"--summaries\", file]\n",
+    "    subprocess.run(cmd)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18a63537-c44e-421c-beff-50c6518115bf",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "dfs = []\n",
+    "for file in output_path.glob(\"*.csv\"):\n",
+    "    model_name, dataset_name, decoding_config, date = str(file.stem).split('-_-')[:4]\n",
+    "    method =  str(file.stem).split('-_-')[-1]\n",
+    "    \n",
+    "    df = pd.read_csv(file)\n",
+    "    df['metadata/Model'] = model_name\n",
+    "    df['metadata/Dataset'] = dataset_name\n",
+    "    df['metadata/method'] = method\n",
+    "    \n",
+    "    df[\"Method\"] = f\"{model_name}/{method}\"\n",
+    "    \n",
+    "    dfs.append(df)\n",
+    "    \n",
+    "df = pd.concat(dfs)\n",
+    "    \n",
+    "    \n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eef1ec67-62f9-458f-9380-debe40bac46a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "sns.catplot(data=df, hue='Method', y='rougeL', x='metadata/Dataset', kind='bar')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aed3e026-fd89-416f-a333-c841eaf566e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.catplot(data=df, hue='metadata/method', y='rouge1', x='metadata/reranking_model', kind='bar', row=\"metadata/model\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e433925-6be6-4322-af07-45b4c07ff5ff",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pytorch-gpu-2.0.0_py3.10.9",
+   "language": "python",
+   "name": "module-conda-env-pytorch-gpu-2.0.0_py3.10.9"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

glimpse-ui/glimpse/mds/discriminative_classification.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from typing import Tuple
+import numpy as np
+import pandas as pd
+import argparse
+from pathlib import Path
+import torch
+from sentence_transformers import SentenceTransformer
+def xlogx(x):
+    if x == 0:
+        return 0
+    else:
+        return x * torch.log(x)
+def parse_summaries(path : Path):
+    # Load the data
+    df = pd.read_csv(path)
+    if 'id' not in df.columns:
+        raise ValueError('id column not found in the summaries file')
+    if 'text' not in df.columns:
+        raise ValueError('text column not found in the summaries file')
+    if 'summary' not in df.columns:
+        raise ValueError('summary column not found in the summaries file')
+    return df
+def embed_text_and_summaries(df : pd.DataFrame, model : SentenceTransformer) -> Tuple[torch.Tensor, torch.Tensor]:
+    text_embeddings = model.encode(df.text.tolist(), convert_to_tensor=True)
+    summary_embeddings = model.encode(df.summary.tolist(), convert_to_tensor=True)
+    return text_embeddings, summary_embeddings
+def compute_dot_products(df : pd.DataFrame, text_embeddings : torch.Tensor, summary_embeddings : torch.Tensor):
+    df = df.reset_index()
+    df['index'] = df.index
+    # group by id
+    grouped = df.groupby('id')
+    # for each id gather the id of the text and the summary
+    ids_per_sample = grouped.index.apply(list).tolist()
+    # compute the dot product between the text and the summary
+    metrics = {'proba_of_success' : []}
+    for text_ids in ids_per_sample:
+        # shape (num_text, embedding_dim)
+        text_embedding = text_embeddings[text_ids]
+        summary_embedding = summary_embeddings[text_ids]
+        # shape (num_text, num_text=num_summary)
+        dot_product = torch.matmul(text_embedding, summary_embedding.T)
+        # apply log softmax
+        log_softmax = torch.nn.functional.log_softmax(dot_product, dim=0)
+        # num_text
+        log_proba_of_success = torch.diag(log_softmax).squeeze()
+        entropy = torch.xlogy(log_proba_of_success, log_proba_of_success).sum(0).squeeze()
+        metrics['proba_of_success'].extend(log_proba_of_success.tolist())
+        # metrics['entropy'].extend(entropy.tolist())
+    df['proba_of_success'] = metrics['proba_of_success']
+    # df['entropy'] = metrics['entropy']
+    return df
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--summaries', type=Path, required=True)
+    parser.add_argument('--model', type=str, default='paraphrase-MiniLM-L6-v2')
+    parser.add_argument('--output', type=Path, required=True)
+    parser.add_argument('--device', type=str, default='cuda')
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    # load the model
+    model = SentenceTransformer(args.model, device=args.device)
+    # load the summaries
+    df = parse_summaries(args.summaries)
+    # embedd the text and the summary
+    text_embeddings, summary_embeddings = embed_text_and_summaries(df, model)
+    # compute the dot product between the text and the summary
+    df = compute_dot_products(df, text_embeddings, summary_embeddings)
+    # create the output directory
+    args.output.mkdir(parents=True, exist_ok=True)
+    path = args.output / f"{args.summaries.stem}.csv"
+    # save the results
+    df.to_csv(path, index=False)
+if __name__ == '__main__':
+    main()

glimpse-ui/glimpse/pyproject.toml ADDED Viewed

	@@ -0,0 +1,21 @@

+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "rsasumm"
+version = "0.0.1"
+authors = [
+]
+description = ""
+readme = "Readme.md"
+requires-python = ">=3.10"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+[project.urls]
+"Homepage" = ""
+"Bug Tracker" = ""

glimpse-ui/glimpse/requirements ADDED Viewed

	@@ -0,0 +1,10 @@

+transformers
+numpy==1.25.2
+seaborn
+matplotlib
+gradio
+pandas
+datasets
+nltk
+SentencePiece
+spacy

glimpse-ui/glimpse/rsasumm/__init__.py ADDED Viewed

File without changes

glimpse-ui/glimpse/rsasumm/beam_search.py ADDED Viewed

	@@ -0,0 +1,430 @@

+from typing import Tuple, Optional
+import torch
+from transformers.generation.logits_process import TopKLogitsWarper, TopPLogitsWarper
+def compute_rsa_probas(
+    logits: torch.Tensor, prior: torch.Tensor, rationality: float = 1.0
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    :param logits: (world_size, num_beam, vocab_size)
+    :param prior: (world_size, num_beam) for each beam the prior over the objects
+    :param rationality: rationality parameter, the higher the more rational ie the more the speaker will try to adapt
+    to the listener
+    :return: S1, L1: (world_size, num_beam, vocab_size).
+    S1[o, b, w] is the (log)probability of the word w given the object o and the current partial summary for the beam b
+    L1[o, b, w] is the (log)probability of the object o given the word w and the current partial summary for the beam b
+    """
+    prod = logits + prior[..., None]
+    L0 = torch.nan_to_num(torch.log_softmax(prod, dim=0), nan=-float("inf"))
+    prod_s = logits + L0 * rationality
+    S1 = torch.log_softmax(prod_s, dim=-1)
+    S1 = torch.nan_to_num(S1, nan=-float("inf"))
+    prod_l = logits + L0
+    L1 = torch.log_softmax(prod_l, dim=0)
+    L1 = torch.nan_to_num(L1, nan=-float("inf"))
+    return S1, L1
+def sample_from_probs(
+    logits: torch.Tensor, num_beams: torch.Tensor, do_sample: bool, K: int = 10
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    :param logits: (num_beams, vocab_size) log proba for the next token only for the wanted object
+    :param num_beams: number of beam to sample. (Can be different from the shape of logits since some beams might have
+    finished earlier)
+    :param do_sample: sample or use argmax
+    :param K: number of samples to draw per beam to create the new population
+    :return: idx_beam, idx_token, tokens_scores, the indices of the sampled tokens and their scores
+    """
+    vocab_size = logits.shape[-1]
+    if do_sample:
+        # sample from the probability distribution
+        logits = logits.view(num_beams * logits.shape[-1])
+        probs = torch.softmax(logits, dim=-1)
+        samples = torch.multinomial(probs, num_samples=K * num_beams)
+        # get the indices of the sampled tokens
+        idx_beam, idx_token = samples // vocab_size, samples % vocab_size
+        logits = logits.view(num_beams * vocab_size)
+        tokens_scores = logits.gather(dim=-1, index=samples).squeeze(-1)
+        return idx_beam, idx_token, tokens_scores
+    else:
+        # get the indices of the most probable tokens
+        num_beams = logits.shape[0]
+        vocab_size = logits.shape[-1]
+        logits = logits.view(num_beams * vocab_size)
+        scores, samples = logits.topk(2 * num_beams, dim=-1)
+        idx_beam, idx_token = samples // vocab_size, samples % vocab_size
+        tokens_scores = scores.squeeze(-1)
+        return idx_beam, idx_token, tokens_scores
+# Beam search RSA decoding
+class RSAContextualDecoding:
+    def __init__(self, model, tokenizer, device):
+        """
+        :param model:
+        :param tokenizer:
+        :param device:
+        """
+        self.model = model.to(device)
+        self.tokenizer = tokenizer
+        self.device = device
+    def fwd_pass(
+        self,
+        input_ids: torch.Tensor,
+        decoder_input_ids: torch.Tensor,
+        attention_mask: torch.Tensor,
+        decoder_attention_mask: torch.Tensor,
+    ) -> torch.Tensor:
+        """
+        Make a forward pass through the model to get the logits for the next tokens
+        :param input_ids: (world_size, num_beams, input_length)
+        :param decoder_input_ids: (world_size, num_beams, partial_target_length)
+        :param attention_mask: (world_size, num_beams, input_length)
+        :param decoder_attention_mask: (world_size, num_beams, partial_target_length)
+        :return: logits: (world_size, num_beams, vocab_size)
+        """
+        with torch.no_grad():
+            world_size, num_beams = input_ids.shape[0], decoder_input_ids.shape[1]
+            input_ids = input_ids.view(world_size * num_beams, input_ids.shape[2]).to(self.device)
+            attention_mask = attention_mask.view(
+                world_size * num_beams, attention_mask.shape[2]
+            ).to(self.device)
+            decoder_input_ids = decoder_input_ids.view(
+                world_size * num_beams, decoder_input_ids.shape[2]
+            ).to(self.device)
+            decoder_attention_mask = decoder_attention_mask.view(
+                world_size * num_beams, decoder_attention_mask.shape[2]
+            ).to(self.device)
+            outputs = self.model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                decoder_input_ids=decoder_input_ids,
+                decoder_attention_mask=decoder_attention_mask,
+            )
+            logits = outputs.logits[..., -1, :]
+            logits = logits.view(self.world_size, num_beams, logits.shape[-1])
+            # return the probability of the next token when conditioned on the source text (world_size)
+            # and the partial target text (num_beam)
+            return logits
+    def duplicate_and_align_input_ids(
+        self,
+        input_ids: torch.Tensor,
+        input_ids_mask: torch.Tensor,
+        decoder_input_ids: torch.Tensor,
+        decoder_input_ids_mask: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Duplicate the input_ids and decoder_input_ids to have all pairs of input_ids[i] and decoder_input_ids[j]
+        It uses torch.repeat and torch.repeat_interleave to do get something like:
+        a 1
+        a 2
+        a 3
+        b 1
+        b 2
+        b 3
+        ...
+        :param input_ids: (world_size, input_length)
+        :param decoder_input_ids: (num_beam, partial_target_length)
+        :return: input_ids: (world_size, num_beam, input_length)
+                 decoder_input_ids: (world_size, num_beam, partial_target_length)
+                 aligned such that all pairs of input_ids[i] and decoder_input_ids[j] are present
+        """
+        num_beams = decoder_input_ids.shape[0]
+        input_ids = input_ids.unsqueeze(1).repeat(1, num_beams, 1)
+        input_ids_mask = input_ids_mask.unsqueeze(1).repeat(1, num_beams, 1)
+        # repeat interleave
+        decoder_input_ids = decoder_input_ids.repeat_interleave(self.world_size, dim=0)
+        decoder_input_ids_mask = decoder_input_ids_mask.repeat_interleave(
+            self.world_size, dim=0
+        )
+        decoder_input_ids = decoder_input_ids.view(self.world_size, num_beams, -1)
+        decoder_input_ids_mask = decoder_input_ids_mask.view(
+            self.world_size, num_beams, -1
+        )
+        # print(self.tokenizer.batch_decode(input_ids[0]))
+        # print(self.tokenizer.batch_decode(decoder_input_ids[0]))
+        return input_ids, input_ids_mask, decoder_input_ids, decoder_input_ids_mask
+    def compute_rsa_probas(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: torch.Tensor,
+        decoder_input_ids: torch.Tensor,
+        decoder_attention_mask: torch.Tensor,
+        do_sample: bool = True,
+        top_p: Optional[float] = None,
+        top_k: Optional[int] = None,
+        temperature: float = 1.0,
+        rationality: float = 8.0,  # seems to be a good value
+        process_logits_before_rsa: bool = True,
+        beam_scores: torch.Tensor = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        :param input_ids: input_ids to the encoder/decoder model = source texts
+        :param attention_mask: attention_mask to the encoder/decoder model
+        :param decoder_input_ids: decoder ids / partial summaries
+        :param decoder_attention_mask: attention mask for the decoder
+        :param do_sample: are we planning on sampling the tokens or using argmax (to apply or not the logits processor)
+        :param top_p: parameters for the logits processor top p
+        :param top_k: parameters for the logits processor top k
+        :param temperature: sampling temperature
+        :param rationality: how rational is the speaker (higher means more rational)
+        :param process_logits_before_rsa: should we apply the logits processor before or after the RSA computation
+        :param beam_scores: (world_size, num_beams) the scores of the beams to be added to the logits
+        :return: S1, L1: (world_size, num_beam, vocab_size).
+        """
+        # some sanity checks
+        assert (top_p is None) or (
+            top_k is None
+        ), "top_p and top_k cannot be used together"
+        assert ((top_p is not None) and (do_sample)) or (
+            top_p is None
+        ), "top_p can only be used with sampling"
+        assert ((top_k is not None) and (do_sample)) or (
+            top_k is None
+        ), "top_k can only be used with sampling"
+        # duplicate the input_ids and decoder_input_ids to have all pairs of input_ids[i] and decoder_input_ids[j]
+        (
+            input_ids,
+            attention_mask,
+            decoder_input_ids,
+            decoder_attention_mask,
+        ) = self.duplicate_and_align_input_ids(
+            input_ids,
+            attention_mask,
+            decoder_input_ids,
+            decoder_attention_mask,
+        )
+        logits = (
+            self.fwd_pass(
+                input_ids, decoder_input_ids, attention_mask, decoder_attention_mask
+            )
+            / temperature  # apply the temperature
+        )
+        logits = torch.nn.functional.log_softmax(logits, dim=-1)
+        world_size = input_ids.shape[0]
+        num_beams = decoder_input_ids.shape[1]
+        logits = logits.view(world_size * num_beams, -1)
+        if do_sample and process_logits_before_rsa:
+            if top_p is not None:
+                logits = TopPLogitsWarper(top_p=top_p)(input_ids=None, scores=logits)
+            if top_k is not None:
+                logits = TopKLogitsWarper(top_k=top_k)(input_ids=None, scores=logits)
+        logits = logits.view(world_size, num_beams, -1)
+        if beam_scores is not None:
+            logits = logits + beam_scores[None, ..., None]
+        # compute the RSA probabilities
+        S1, L1 = compute_rsa_probas(logits, self.prior, rationality=rationality)
+        logits = S1
+        if do_sample and not process_logits_before_rsa:
+            logits = logits.view(world_size * num_beams, -1)
+            if top_p is not None:
+                logits = TopPLogitsWarper(top_p=top_p)(input_ids=None, scores=logits)
+            if top_k is not None:
+                logits = TopKLogitsWarper(top_k=top_k)(input_ids=None, scores=logits)
+            logits = logits.view(world_size, num_beams, -1)
+        return logits, L1
+    def generate(
+        self,
+        target_id: int,
+        source_texts_ids: torch.Tensor,
+        source_text_attention_mask: torch.Tensor,
+        max_length: int = 100,
+        num_beams: int = 8,
+        do_sample=True,
+        top_p: Optional[float] = None,
+        top_k: Optional[int] = None,
+        temperature: float = 1.0,
+        rationality: float = 1.0,
+        process_logits_before_rsa=True,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        :param target_id: the id of the target object
+        :param source_texts_ids: (world_size, input_length) the tokenized source texts
+        :param source_text_attention_mask: (world_size, input_length) the attention mask for the source texts
+        :param max_length: the maximum length to generate
+        :param do_sample: are we sampling or using argmax
+        :param top_p: parameters for the logits processor top p
+        :param top_k: parameters for the logits processor top k
+        :param temperature: sampling temperature
+        :param rationality: how rational is the speaker (higher means more rational)
+        :param process_logits_before_rsa: should we apply the logits processor before or after the RSA computation
+        :return: decoder_input_ids : (num_beams, max_length) decoded sequences, beam_scores: (num_beams) the scores
+        of the beams
+        """
+        self.num_beam = num_beams
+        self.world_size = source_texts_ids.shape[0]
+        self.prior = torch.ones((self.world_size, self.num_beam)).to(self.device) / self.world_size
+        beam_scores = torch.zeros(self.num_beam).to(self.device)
+        # initialize the decoder input ids
+        decoder_input_ids = torch.full(
+            (self.num_beam, 2),
+            0,
+            dtype=torch.long,
+            device=self.device,
+        )
+        # initialize the decoder attention mask
+        decoder_attention_mask = torch.ones_like(decoder_input_ids).to(self.device)
+        new_beams = []
+        finished_beams = []
+        # run the beam search
+        for t in range(max_length):
+            # compute the RSA probabilities
+            num_beams = decoder_input_ids.shape[0]
+            S1, L1 = self.compute_rsa_probas(
+                source_texts_ids,
+                source_text_attention_mask,
+                decoder_input_ids,
+                decoder_attention_mask,
+                do_sample=do_sample,
+                top_p=top_p,
+                top_k=top_k,
+                temperature=temperature,
+                rationality=rationality,
+                beam_scores=beam_scores,
+                process_logits_before_rsa=process_logits_before_rsa,
+            )
+            # sample from the probabilities
+            idx_beam, idx_token, tokens_scores = sample_from_probs(
+                S1[target_id].squeeze(), num_beams, do_sample
+            )
+            # create all the new beams
+            new_beams = []
+            for idx_t, idx_b, token_score in zip(idx_token, idx_beam, tokens_scores):
+                new_beams.append(
+                    (
+                        decoder_input_ids[idx_b].tolist() + [idx_t.item()],
+                        beam_scores[idx_b] + token_score.item(),
+                        L1[:, idx_b, idx_t.item()],
+                    )
+                )
+            # sort the beams
+            new_beams = sorted(new_beams, key=lambda x: x[1], reverse=True)
+            # keep only the best beams
+            new_beams = new_beams[: self.num_beam]
+            # check if the beams are finished
+            _new_beams = []
+            for beam in new_beams:
+                if beam[0][-1] == self.tokenizer.eos_token_id:
+                    finished_beams.append(beam)
+                else:
+                    _new_beams.append(beam)
+            new_beams = _new_beams
+            if len(new_beams) == 0:
+                break
+            # pad the beams
+            max_beam_len = max(len(x[0]) for x in new_beams)
+            new_beams = [
+                (
+                    x[0] + [self.tokenizer.pad_token_id] * (max_beam_len - len(x[0])),
+                    x[1],
+                    x[2],
+                )
+                for x in new_beams
+            ]
+            # update the beam scores
+            beam_scores = torch.tensor([x[1] for x in new_beams]).to(self.device)
+            # update the decoder input ids
+            decoder_input_ids: torch.Tensor = torch.tensor(
+                [x[0] for x in new_beams], device=self.device
+            )
+            # update the decoder attention mask based on pad tokens
+            decoder_attention_mask = (
+                decoder_input_ids != self.tokenizer.pad_token_id
+            ).long()
+            self.prior = torch.stack([x[2] for x in new_beams], dim=1).to(self.device)
+            # self.prior = torch.ones((self.world_size, len(new_beams))) / self.world_size
+        results = []
+        # pad the beams
+        max_beam_len = max(len(x[0]) for x in finished_beams + new_beams)
+        for x in finished_beams + new_beams:
+            results.append(
+                (
+                    x[0] + [self.tokenizer.pad_token_id] * (max_beam_len - len(x[0])),
+                    x[1],
+                    x[2],
+                )
+            )
+        decoder_input_ids = torch.tensor([x[0] for x in results], device=self.device)
+        beam_scores = torch.tensor([x[1] for x in results]).to(self.device)
+        return decoder_input_ids, beam_scores

glimpse-ui/glimpse/rsasumm/rsa_reranker.py ADDED Viewed

	@@ -0,0 +1,280 @@

+from functools import cache
+from typing import List
+import numpy as np
+import torch
+import pandas as pd
+from tqdm import tqdm
+def kl_divergence(p, q):
+    """
+    Compute the KL divergence between two distributions
+    """
+    return torch.nan_to_num(p * (p / q).log(), nan=0.0).sum(-1)
+def jensen_shannon_divergence(p, q):
+    """
+    Compute the Jensen-Shannon divergence between two distributions
+    """
+    m = 0.5 * (p + q)
+    return 0.5 * (kl_divergence(p, m) + kl_divergence(q, m))
+class RSAReranking:
+    """
+    Rerank a list of candidates according to the RSA model.
+    """
+    def __init__(
+            self,
+            model,
+            tokenizer,
+            candidates: List[str],
+            source_texts: List[str],
+            batch_size: int = 32,
+            rationality: int = 1,
+            device="cuda",
+    ):
+        """
+        :param model: hf model used to compute the likelihoods (supposed to be a seq2seq model), is S0 in the RSA model
+        :param tokenizer:
+        :param candidates: list of candidates summaries
+        :param source_texts: list of source texts
+        :param batch_size: batch size used to compute the likelihoods (can be high since we don't need gradients and
+        it's a single forward pass)
+        :param rationality: rationality parameter of the RSA model
+        :param device: device used to compute the likelihoods
+        """
+        self.model = model
+        self.device = device
+        self.model = model.to(self.device)
+        self.tokenizer = tokenizer
+        self.candidates = candidates
+        self.source_texts = source_texts
+        self.batch_size = batch_size
+        self.rationality = rationality
+    def compute_conditionned_likelihood(
+            self, x: List[str], y: List[str], mean: bool = True
+    ) -> torch.Tensor:
+        """
+        Compute the likelihood of y given x
+        :param x: list of source texts len(x) = batch_size
+        :param y: list of candidates summaries len(y) = batch_size
+        :param mean: average the likelihoods over the tokens of y or take the sum
+        :return: tensor of shape (batch_size) containing the likelihoods of y given x
+        """
+        # Ensure x,y are pure Python lists of strings (not pandas.Series, np.ndarray, etc.)
+        x = [str(item) for item in list(x)]
+        y = [str(item) for item in list(y)]
+        assert len(x) == len(y), "x and y must have the same length"
+        loss_fn = torch.nn.CrossEntropyLoss(reduction="none")
+        batch_size = len(x)
+        x = self.tokenizer(
+            x,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=1024,
+        )
+        y = self.tokenizer(
+            y,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=1024,
+        )
+        # Move all tensors to the correct device
+        x = {k: v.to(self.device) for k, v in x.items()}
+        y = {k: v.to(self.device) for k, v in y.items()}
+        # Concatenate the two inputs
+        # Compute the likelihood of y given x
+        x_ids = x["input_ids"]
+        y_ids = y["input_ids"]
+        logits = self.model(
+            input_ids=x_ids,
+            decoder_input_ids=y_ids,
+            attention_mask=x["attention_mask"],
+            decoder_attention_mask=y["attention_mask"],
+        ).logits
+        # Compute the likelihood of y given x
+        shifted_logits = logits[..., :-1, :].contiguous()
+        shifted_ids = y_ids[..., 1:].contiguous()
+        likelihood = -loss_fn(
+            shifted_logits.view(-1, shifted_logits.size(-1)), shifted_ids.view(-1)
+        )
+        likelihood = likelihood.view(batch_size, -1).sum(-1)
+        if mean:
+            likelihood /= (y_ids != self.tokenizer.pad_token_id).float().sum(-1)
+        return likelihood
+    def score(self, x: List[str], y: List[str], **kwargs):
+        return self.compute_conditionned_likelihood(x, y, **kwargs)
+    def likelihood_matrix(self) -> torch.Tensor:
+        """
+        :return: likelihood matrix : (world_size, num_candidates), likelihood[i, j] is the likelihood of
+        candidate j being a summary for source text i.
+        """
+        likelihood_matrix = torch.zeros(
+            (len(self.source_texts), len(self.candidates))
+        ).to(self.device)
+        pairs = []
+        for i, source_text in enumerate(self.source_texts):
+            for j, candidate in enumerate(self.candidates):
+                pairs.append((i, j, source_text, candidate))
+        # split the pairs into batches
+        batches = [
+            pairs[i: i + self.batch_size]
+            for i in range(0, len(pairs), self.batch_size)
+        ]
+        for batch in tqdm(batches):
+            # get the source texts and candidates
+            source_texts = [pair[2] for pair in batch]
+            candidates = [pair[3] for pair in batch]
+            # compute the likelihoods
+            with torch.no_grad():
+                likelihoods = self.score(
+                    source_texts, candidates, mean=True
+                )
+            # fill the matrix
+            for k, (i, j, _, _) in enumerate(batch):
+                likelihood_matrix[i, j] = likelihoods[k].detach()
+        return likelihood_matrix
+    @cache
+    def S(self, t):
+        if t == 0:
+            return self.initial_speaker_probas
+        else:
+            listener = self.L(t - 1)
+            prod = listener * self.rationality # + self.initial_speaker_probas.sum(0, keepdim=True)
+            return torch.log_softmax(prod, dim=-1)
+    @cache
+    def L(self, t):
+        speaker = self.S(t)
+        return torch.log_softmax(speaker, dim=-2)
+    def mk_listener_dataframe(self, t):
+        self.initial_speaker_probas = self.likelihood_matrix()
+        initial_listener_probas = self.L(0)
+        # compute consensus
+        uniform_distribution_over_source_texts = torch.ones_like(
+            initial_listener_probas
+        ) / len(self.source_texts)
+        initital_consensuality_score = (
+                torch.exp(initial_listener_probas)
+                * (
+                        initial_listener_probas - torch.log(uniform_distribution_over_source_texts)
+                )
+        ).sum(0).cpu().numpy()
+        initital_consensuality_score = pd.Series(initital_consensuality_score, index=self.candidates)
+        initial_listener_probas = initial_listener_probas.cpu().numpy()
+        initial_listener_probas = pd.DataFrame(initial_listener_probas)
+        initial_listener_probas.index = self.source_texts
+        initial_listener_probas.columns = self.candidates
+        initial_speaker_probas = self.S(0).cpu().numpy()
+        initial_speaker_probas = pd.DataFrame(initial_speaker_probas)
+        initial_speaker_probas.index = self.source_texts
+        initial_speaker_probas.columns = self.candidates
+        listener_df = pd.DataFrame(self.L(t).cpu().numpy())
+        consensuality_scores = (
+                torch.exp(self.L(t))
+                * (self.L(t) - torch.log(uniform_distribution_over_source_texts))
+        ).sum(0).cpu().numpy()
+        consensuality_scores = pd.Series(consensuality_scores, index=self.candidates)
+        S = self.S(t).cpu().numpy()
+        speaker_df = pd.DataFrame(S)
+        # add the source texts and candidates as index
+        listener_df.index = self.source_texts
+        speaker_df.index = self.source_texts
+        listener_df.columns = self.candidates
+        speaker_df.columns = self.candidates
+        return listener_df, speaker_df, initial_listener_probas, initial_speaker_probas, initital_consensuality_score, consensuality_scores
+    def rerank(self, t=1):
+        """
+        return the best summary (according to rsa) for each text
+        """
+        (
+            listener_df,
+            speaker_df,
+            initial_listener_proba,
+            initial_speaker_proba,
+            initital_consensuality_score,
+            consensuality_scores,
+        ) = self.mk_listener_dataframe(t=t)
+        best_rsa = speaker_df.idxmax(axis=1).values
+        best_base = initial_listener_proba.idxmax(axis=1).values
+        return (
+            best_rsa,
+            best_base,
+            speaker_df,
+            listener_df,
+            initial_listener_proba,
+            initial_speaker_proba,
+            initital_consensuality_score,
+            consensuality_scores,
+        )
+class RSARerankingEmbedder(RSAReranking):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def compute_embeddings(self, x: List[str], y: List[str], **kwargs):
+        model_kwargs = kwargs.get("model_kwargs")
+        # shape: (batch_size, embedding_dim)
+        x_embeddings = self.model.encode(x, **model_kwargs)
+        y_embeddings = self.model.encode(y, **model_kwargs)
+        # dot product between the embeddings : shape (batch_size)
+        dot_products = (x_embeddings * y_embeddings).sum(-1)
+        return dot_products
+    def score(self, x: List[str], y: List[str], **kwargs):
+        return self.compute_embeddings(x, y, **kwargs)

glimpse-ui/glimpse/scripts/abstractive.sh ADDED Viewed

	@@ -0,0 +1,37 @@

+#!/bin/bash
+#SBATCH --partition=main                                 # Ask for unkillable job
+#SBATCH --gres=gpu:1
+#SBATCH --mem=10G                                        # Ask for 10 GB of RAM
+#SBATCH --time=2:00:00                                   # The job will run for 3 hours
+#SBATCH --output=./logs/abstractive_out.txt
+#SBATCH --error=./logs/abstractive_error.txt
+#SBATCH -c 2
+# Load the required modules
+module --quiet load miniconda/3
+module --quiet load cuda/12.1.1
+conda activate "glimpse"
+# Check if input file path is provided and valid
+if [ -z "$1" ] || [ ! -f "$1" ]; then
+    # if no path is provided, or the path is invalid, use the default test dataset
+    echo "Couldn't find a valid path. Using default path: data/processed/all_reviews_2017.csv"
+    dataset_path="data/processed/all_reviews_2017.csv"
+else
+    dataset_path="$1"
+fi
+# Generate abstractive summaries
+if [[ "$@" =~ "--add-padding" ]]; then # check if padding argument is present
+    # add '--no-trimming' flag to the script
+    candidates=$(python glimpse/data_loading/generate_abstractive_candidates.py  --dataset_path "$dataset_path" --scripted-run --no-trimming | tail -n 1)
+else
+    # no additional flags
+    candidates=$(python glimpse/data_loading/generate_abstractive_candidates.py --dataset_path "$dataset_path" --scripted-run | tail -n 1)
+fi
+# Compute the RSA scores based on the generated summaries
+rsa_scores=$(python glimpse/src/compute_rsa.py --summaries $candidates | tail -n 1)

glimpse-ui/glimpse/scripts/extractive.sh ADDED Viewed

	@@ -0,0 +1,31 @@

+#!/bin/bash
+#SBATCH --partition=main                                 # Ask for unkillable job
+#SBATCH --gres=gpu:1
+#SBATCH --mem=10G                                        # Ask for 10 GB of RAM
+#SBATCH --time=2:00:00                                   # The job will run for 3 hours
+#SBATCH --output=./logs/abstractive_out.txt
+#SBATCH --error=./logs/abstractive_error.txt
+#SBATCH -c 2
+# Load the required modules
+module --quiet load miniconda/3
+module --quiet load cuda/12.1.1
+conda activate "glimpse"
+# Check if input file path is provided and valid
+if [ -z "$1" ] || [ ! -f "$1" ]; then
+    # if no path is provided, or the path is invalid, use the default test dataset
+    echo "Couldn't find a valid path. Using default path: data/processed/all_reviews_2017.csv"
+    dataset_path="data/processed/all_reviews_2017.csv"
+else
+    dataset_path="$1"
+fi
+# Generate extractive summaries
+candidates=$(python glimpse/data_loading/generate_extractive_candidates.py --dataset_path "$dataset_path" --scripted-run | tail -n 1)
+# Compute the RSA scores based on the generated summaries
+rsa_scores=$(python glimpse/src/compute_rsa.py --summaries $candidates | tail -n 1)

glimpse-ui/glimpse_pk_csv_converter.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import pickle
+import pandas as pd
+from pathlib import Path
+import os
+import glob
+import re
+import json
+def process_pickle_results(pickle_path: Path, output_path: Path):
+    # === Load Pickle File ===
+    with open(pickle_path, 'rb') as f:
+        data = pickle.load(f)
+    # === Extract Metadata ===
+    reranking_model = data.get('metadata/reranking_model')
+    rsa_iterations = data.get('metadata/rsa_iterations')
+    results = data.get('results')
+    # print(f"Reranking model: {reranking_model}, RSA iterations: {rsa_iterations}")
+    # === Validate Results ===
+    if not isinstance(results, list):
+        raise ValueError("The 'results' key is not a list. Please check the pickle file structure.")
+    # === Process and Flatten Results ===
+    csv_data = []
+    for index, result in enumerate(results):
+        # row = {
+        #     'index': index,
+        #     'id': str(result.get('id')[0]),
+        #     'consensuality_scores': result.get('consensuality_scores').to_dict()
+        #         if isinstance(result.get('consensuality_scores'), pd.Series) else None,
+        #     # Optional fields — uncomment as needed
+        #     # 'best_base': result.get('best_base').tolist() if isinstance(result.get('best_base'), np.ndarray) else None,
+        #     # 'best_rsa': result.get('best_rsa').tolist() if isinstance(result.get('best_rsa'), np.ndarray) else None,
+        #     # 'speaker_df': result.get('speaker_df').to_json() if isinstance(result.get('speaker_df'), pd.DataFrame) else None,
+        #     # 'listener_df': result.get('listener_df').to_json() if isinstance(result.get('listener_df'), pd.DataFrame) else None,
+        #     # 'initial_listener': result.get('initial_listener').to_json() if isinstance(result.get('initial_listener'), pd.DataFrame) else None,
+        #     # 'language_model_proba_df': result.get('language_model_proba_df').to_json() if isinstance(result.get('language_model_proba_df'), pd.DataFrame) else None,
+        #     # 'initial_consensuality_scores': result.get('initial_consensuality_scores').to_dict() if isinstance(result.get('initial_consensuality_scores'), pd.Series) else None,
+        #     # 'gold': result.get('gold'),
+        #     # 'rationality': result.get('rationality'),
+        #     # 'text_candidates': result.get('text_candidates').to_json() if isinstance(result.get('text_candidates'), pd.DataFrame) else None,
+        # }
+        row = {
+            'index': index,
+            'id': str(result.get('id')[0]),
+            'consensuality_scores': json.dumps(result.get('consensuality_scores').to_dict())
+                if isinstance(result.get('consensuality_scores'), pd.Series) else None,
+        }
+        csv_data.append(row)
+    # === Save to CSV ===
+    df = pd.DataFrame(csv_data)
+    df.to_csv(output_path, index=False)
+    print(f"Results saved to '{output_path}'.")
+if __name__ == "__main__":
+    BASE_DIR = Path(__file__).resolve().parent
+    # Set the path to the pickle file and the output CSV file
+    # ==== Uncomment the appropriate line below to set the pickle file path ====
+    # pickle_file = BASE_DIR / "glimpse" / "output" / "extractive_sentences-_-all_reviews_2017-_-none-_-2025-05-20-20-22-18-_-r3-_-rsa_reranked-google-pegasus-arxiv.pk"
+    # ==== Find the latest file in the directory and use it instead ====
+    # This assumes the pickle files are stored in the 'glimpse/output' directory
+    # list_of_files = glob.glob('./glimpse/output/*.pk')
+    # pickle_file = max(list_of_files, key=os.path.getctime)
+    # print (f"Using pickle file: {pickle_file}")
+    # output_file = BASE_DIR / "data" / "GLIMPSE_results_from_pk.csv"
+    # process_pickle_results(pickle_file, output_file)
+    output_dir = BASE_DIR / "data"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    pickle_files = sorted(glob.glob('./glimpse/output/*.pk'), key=os.path.getctime)
+    for pickle_file in pickle_files:
+        year_match = re.search(r'(\d{4})', os.path.basename(pickle_file))
+        year_tag = year_match.group(1) if year_match else 'unknown_year'
+        output_file = output_dir / f"GLIMPSE_results_{year_tag}.csv"
+        print(f"Using pickle file: {pickle_file}, saving as {output_file}")
+        process_pickle_results(Path(pickle_file), output_file)

glimpse-ui/interface/Demo.py ADDED Viewed

	@@ -0,0 +1,800 @@

+import math
+import sys, os.path
+import torch
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
+from glimpse.rsasumm.rsa_reranker import RSAReranking
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
+import pandas as pd
+from scored_reviews_builder import load_scored_reviews
+from glimpse.glimpse.data_loading.Glimpse_tokenizer import glimpse_tokenizer
+# from scibert.scibert_polarity.scibert_polarity import predict_polarity
+# Load scored reviews
+years, all_scored_reviews_df = load_scored_reviews()
+# -----------------------------------
+# Pre-processed Tab
+# -----------------------------------
+def get_preprocessed_scores(year):
+    scored_reviews = all_scored_reviews_df[all_scored_reviews_df["year"] == year]["scored_dict"].iloc[0]
+    return scored_reviews
+# -----------------------------------
+# Interactive Tab
+# -----------------------------------
+# RSA_model = "facebook/bart-large-cnn"
+RSA_model = "sshleifer/distilbart-cnn-12-3"
+model = AutoModelForSeq2SeqLM.from_pretrained(RSA_model)
+tokenizer = AutoTokenizer.from_pretrained(RSA_model)
+# Define the manual color map for topics
+topic_color_map = {
+    "Substance": "#cce0ff",             # lighter blue
+    "Clarity": "#e6ee9c",               # lighter yellow-green
+    "Soundness/Correctness": "#ffcccc", # lighter red
+    "Originality": "#d1c4e9",           # lighter purple
+    "Motivation/Impact": "#b2ebf2",     # lighter teal
+    "Meaningful Comparison": "#fff9c4", # lighter yellow
+    "Replicability": "#c8e6c9",         # lighter green
+}
+# GLIMPSE Home/Description Page
+glimpse_description = """
+# ReView: A Tool for Visualizing and Analyzing Scientific Reviews
+## Overview
+ReView is a visualization tool designed to assist **area chairs** and **researchers** in efficiently analyzing scholarly reviews. The interface offers two main ways to explore scholarly reviews:
+- Pre-Processed Reviews: Explore real peer reviews from ICLR (2017–2021) with structured visualizations of sentiment, topics, and reviewer agreement.
+- Interactive Tab: Enter your own reviews and view them analyzed in real time using the same NLP-powered highlighting options.
+All reviews are shown in their original, unaltered form, with visual overlays to help identify key insights such as disagreements, sentiment and common themes—reducing cognitive load and scrolling effort.
+---
+## **Key Features**
+- *Traceability and Transparency:* The tool preserves the original text of each review and overlays highlights for key aspects (e.g., sentiment, topic, agreement), allowing area chairs to trace back every insight to its source without modifying or summarizing the content.
+- *Structured Overview*: All reviews are displayed in one interface and with radio buttons, one can navigate from one highlighting option to the other.
+- *Interactive*: The tool allows users to input their own reviews and, within seconds, view them annotated with highlighted aspects
+---
+## **Highlighting Options**
+- *Agreement:* Identifies both shared and conflicting points across reviews, helping to surface consensus and disagreement.
+- *Polarity:* Highlights positive and negative sentiments within the reviews to reveal tone and stance.
+- *Topic:* Organizes the review sentences by their discussed topics, ensuring coverage of diverse reviewer perspectives and improving clarity.
+---
+### How to Use ReView
+ReView offers two main ways to explore peer reviews: using pre-processed reviews or by entering your own.
+#### 🗂️ Pre-Processed Reviews Tab
+Use this tab to explore reviews from ICLR (2017–2021):
+1. **Select a conference year** from the dropdown menu on the right.
+2. **Navigate between submissions** using the *Next* and *Previous* buttons on the left.
+3. **Choose a highlighting view** using the radio buttons:
+   - **Original**: Displays unmodified review text.
+   - **Agreement**: Highlights consensus points in **red** and disagreements in **purple**.
+   - **Polarity**: Highlights **positive** sentiment in **green** and **negative** sentiment in **red**.
+   - **Topic**: Highlights comments by discussion topic using color-coded labels.
+#### ✍️ Interactive Tab
+Use this tab to analyze your own review text:
+1. **Enter up to three reviews** in the input fields labeled *Review 1*, *Review 2*, and *Review 3*.
+2. **Click "Process"** to analyze the input (average processing time: ~42 seconds).
+3. **Explore the results** using the same highlighting options as above (Agreement, Polarity, Topic).
+"""
+EXAMPLES = [
+    "The paper gives really interesting insights on the topic of transfer learning. It is well presented and the experiment are extensive. I believe the authors missed Jane and al 2021. In addition, I think, there is a mistake in the math.",
+    "The paper gives really interesting insights on the topic of transfer learning. It is well presented and the experiment are extensive. Some parts remain really unclear and I would like to see a more detailed explanation of the proposed method.",
+    "The paper gives really interesting insights on the topic of transfer learning. It is not well presented and lack experiments. Some parts remain really unclear and I would like to see a more detailed explanation of the proposed method.",
+]
+# Function to summarize the input texts using the RSAReranking model in interactive mode
+def summarize(text1, text2, text3, focus, mode, rationality=1.0, iterations=1):
+    # print(focus, mode, rationality, iterations)
+    # get sentences for each text
+    text2_sentences = glimpse_tokenizer(text2)
+    text1_sentences = glimpse_tokenizer(text1)
+    text3_sentences = glimpse_tokenizer(text3)
+    # remove empty sentences
+    text1_sentences = [sentence for sentence in text1_sentences if sentence != ""]
+    text2_sentences = [sentence for sentence in text2_sentences if sentence != ""]
+    text3_sentences = [sentence for sentence in text3_sentences if sentence != ""]
+    sentences = list(set(text1_sentences + text2_sentences + text3_sentences))
+    # Load polarity model and tokenizer (SciBERT)
+    polarity_model_path = "scibert/scibert_polarity/final_model"
+    polarity_tokenizer = AutoTokenizer.from_pretrained(polarity_model_path)
+    polarity_model = AutoModelForSequenceClassification.from_pretrained(polarity_model_path)
+    polarity_model.eval()
+    polarity_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    polarity_model.to(polarity_device)
+    def predict_polarity(sent_list):
+        inputs = polarity_tokenizer(
+            sent_list, return_tensors="pt", padding=True, truncation=True, max_length=512
+        ).to(polarity_device)
+        with torch.no_grad():
+            logits = polarity_model(**inputs).logits
+            preds = torch.argmax(logits, dim=1).cpu().tolist()
+        emoji_map = {0: "➖", 1: None, 2: "➕"}
+        return dict(zip(sent_list, [emoji_map[p] for p in preds]))
+    # Run polarity prediction
+    polarity_map = predict_polarity(sentences)
+    # Load topic model and tokenizer (SciBERT)
+    topic_model_path = "scibert/scibert_topic/final_model"
+    topic_tokenizer = AutoTokenizer.from_pretrained(topic_model_path)
+    topic_model = AutoModelForSequenceClassification.from_pretrained(topic_model_path)
+    topic_model.eval()
+    topic_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    topic_model.to(topic_device)
+    def predict_topic(sent_list):
+        inputs = topic_tokenizer(
+            sent_list, return_tensors="pt", padding=True, truncation=True, max_length=512
+        ).to(topic_device)
+        with torch.no_grad():
+            logits = topic_model(**inputs).logits
+            preds = torch.argmax(logits, dim=1).cpu().tolist()
+        # Topic ID to label and emoji
+        id2label = {
+            0: "Substance",
+            1: "Clarity",
+            2: "Correctness",
+            3: "Originality",
+            4: "Impact",
+            5: "Comparison",
+            6: "Replicability",
+            7: None  # This is used for sentences that do not match any specific topic,
+        }
+        return dict(zip(sent_list, [id2label[p] for p in preds]))
+    # Run topic prediction
+    topic_map = predict_topic(sentences)
+    rsa_reranker = RSAReranking(
+        model,
+        tokenizer,
+        candidates=sentences,
+        source_texts=[text1, text2, text3],
+        device="cpu",
+        rationality=rationality,
+    )
+    (
+        best_rsa,
+        best_base,
+        speaker_df,
+        listener_df,
+        initial_listener,
+        language_model_proba_df,
+        initial_consensuality_scores,
+        consensuality_scores,
+    ) = rsa_reranker.rerank(t=iterations)
+    # apply exp to the probabilities
+    speaker_df = speaker_df.applymap(lambda x: math.exp(x))
+    text_1_summaries = speaker_df.loc[text1][text1_sentences]
+    text_1_summaries = text_1_summaries / text_1_summaries.sum()
+    text_2_summaries = speaker_df.loc[text2][text2_sentences]
+    text_2_summaries = text_2_summaries / text_2_summaries.sum()
+    text_3_summaries = speaker_df.loc[text3][text3_sentences]
+    text_3_summaries = text_3_summaries / text_3_summaries.sum()
+    # make list of tuples
+    text_1_summaries = [(sentence, text_1_summaries[sentence]) for sentence in text1_sentences]
+    text_2_summaries = [(sentence, text_2_summaries[sentence]) for sentence in text2_sentences]
+    text_3_summaries = [(sentence, text_3_summaries[sentence]) for sentence in text3_sentences]
+    # normalize consensuality scores between -1 and 1
+    consensuality_scores = (consensuality_scores - (consensuality_scores.max() - consensuality_scores.min()) / 2) / (consensuality_scores.max() - consensuality_scores.min()) / 2
+    # get most and least consensual sentences
+    # most consensual --> most common; least consensual --> most unique
+    most_consensual = consensuality_scores.sort_values(ascending=True).head(3).index.tolist()
+    least_consensual = consensuality_scores.sort_values(ascending=False).head(3).index.tolist()
+    # Convert lists to strings
+    most_consensual = " ".join(most_consensual)
+    least_consensual = " ".join(least_consensual)
+    text_1_consensuality = consensuality_scores.loc[text1_sentences]
+    text_2_consensuality = consensuality_scores.loc[text2_sentences]
+    text_3_consensuality = consensuality_scores.loc[text3_sentences]
+    text_1_consensuality = [(sentence, text_1_consensuality[sentence]) for sentence in text1_sentences]
+    text_2_consensuality = [(sentence, text_2_consensuality[sentence]) for sentence in text2_sentences]
+    text_3_consensuality = [(sentence, text_3_consensuality[sentence]) for sentence in text3_sentences]
+    def highlight_reviews(text_sentences, consensuality_scores, threshold_common=0.0, threshold_unique=0.0):
+        highlighted = []
+        for sentence in text_sentences:
+            # print(f"Processing sentence: {sentence}", "score:", consensuality_scores.loc[sentence])
+            score = consensuality_scores.loc[sentence]
+            score = score*2 if score > 0 else score  # amplify unique scores for better visibility
+            # common sentences --> positive consensuality scores
+            # unique sentences --> negative consensuality scores
+            score *= -1 # invert the score for highlighting
+            highlighted.append((sentence, score))
+        return highlighted
+    # Apply highlighting to each review
+    text_1_agreement = highlight_reviews(text1_sentences, consensuality_scores)
+    text_2_agreement = highlight_reviews(text2_sentences, consensuality_scores)
+    text_3_agreement = highlight_reviews(text3_sentences, consensuality_scores)
+    # Add polarity outputs
+    text_1_polarity = [(s, polarity_map[s]) for s in text1_sentences]
+    text_2_polarity = [(s, polarity_map[s]) for s in text2_sentences]
+    text_3_polarity = [(s, polarity_map[s]) for s in text3_sentences]
+    # Add topic outputs
+    text_1_topic = [(s, topic_map[s]) for s in text1_sentences]
+    text_2_topic = [(s, topic_map[s]) for s in text2_sentences]
+    text_3_topic = [(s, topic_map[s]) for s in text3_sentences]
+    # print(type(text_1_consensuality))
+    return (
+        # text_1_summaries, text_2_summaries, text_3_summaries,
+        # text_1_consensuality, text_2_consensuality, text_3_consensuality,
+        text_1_agreement, text_2_agreement, text_3_agreement,
+        most_consensual, least_consensual,
+        text_1_polarity, text_2_polarity, text_3_polarity,
+        text_1_topic, text_2_topic, text_3_topic,
+    )
+with gr.Blocks(title="ReView") as demo:
+    # gr.Markdown("# ReView Interface")
+    with gr.Tab("Introduction"):
+        gr.Markdown(glimpse_description)
+    # -----------------------------------
+    # Pre-processed Tab
+    # -----------------------------------
+    with gr.Tab("Pre-processed Reviews"):
+        # Initialize state for this session.
+        initial_year = 2017
+        initial_scored_reviews = get_preprocessed_scores(initial_year)
+        initial_review_ids = list(initial_scored_reviews.keys())
+        initial_review = initial_scored_reviews[initial_review_ids[0]]
+        number_of_displayed_reviews = len(initial_scored_reviews[initial_review_ids[0]])
+        initial_state = {
+            "year_choice": initial_year,
+            "scored_reviews_for_year": initial_scored_reviews,
+            "review_ids": initial_review_ids,
+            "current_review_index": 0,
+            "current_review": initial_review,
+            "number_of_displayed_reviews": number_of_displayed_reviews,
+        }
+        state = gr.State(initial_state)
+        def update_review_display(state, score_type):
+            review_ids = state["review_ids"]
+            current_index = state["current_review_index"]
+            current_review = state["scored_reviews_for_year"][review_ids[current_index]]
+            show_polarity = score_type == "Polarity"
+            show_consensuality = score_type == "Agreement"
+            show_topic = score_type == "Topic"
+            if show_polarity:
+                color_map = {"➕": "#d4fcd6", "➖": "#fcd6d6"}
+                legend = False
+            elif show_topic:
+                color_map = topic_color_map  # No color map for topics
+                legend = False
+            elif show_consensuality:
+                color_map = None  # Continuous scale, no predefined colors
+                legend = True
+            else:
+                color_map = {}  # Default to empty map
+                legend = False
+            new_review_id = (
+                f"### Submission Link:\n\n{review_ids[current_index]}<br>"
+                f"(Showing {current_index + 1} of {len(state['review_ids'])} reviews)"
+            )
+            number_of_displayed_reviews = len(current_review)
+            review_updates = []
+            consensuality_dict = {}
+            for i in range(8):
+                if i < number_of_displayed_reviews:
+                    review_item = list(current_review[i].items())
+                    if show_polarity:
+                        highlighted = []
+                        for sentence, metadata in review_item:
+                            polarity = metadata.get("polarity", None)
+                            if polarity >= 0.995:
+                                label = "➕"  # positive
+                            elif polarity <= -0.99:
+                                label = "➖"  # negative
+                            else:
+                                label = None  # ignore neutral (1)
+                            highlighted.append((sentence, label))
+                    elif show_consensuality:
+                        highlighted = []
+                        for sentence, metadata in review_item:
+                            score = metadata.get("consensuality", 0.0)
+                            score = score * 2 - 1  # Normalize to [-1, 1]
+                            score = score/2.5 if score > 0 else score  # Amplify unique scores for better visibility
+                            score *= -1  # Invert the score for highlighting
+                            consensuality_dict[sentence] = score
+                            highlighted.append((sentence, score))
+                    elif show_topic:
+                        highlighted = []
+                        for sentence, metadata in review_item:
+                            topic = metadata.get("topic", None)
+                            if topic != "NONE":
+                                highlighted.append((sentence, topic))
+                            else:
+                                highlighted.append((sentence, None))
+                    else:
+                        highlighted = [
+                            (sentence, None)
+                            for sentence, metadata in review_item
+                        ]
+                    review_updates.append(
+                        gr.update(
+                            visible=True,
+                            value=highlighted,
+                            color_map=color_map,
+                            show_legend=legend,
+                            key=f"updated_{score_type}_{i}"
+                        )
+                    )
+                else:
+                    review_updates.append(
+                        gr.update(
+                            visible=False,
+                            value=[],
+                            show_legend=False,
+                            color_map=color_map,
+                            key=f"updated_{score_type}_{i}"
+                        )
+                    )
+            # Set most consensual / unique sentences
+            if show_consensuality and consensuality_dict:
+                scores = pd.Series(consensuality_dict)
+                most_unique = scores.sort_values(ascending=True).head(3).index.tolist()
+                most_common = scores.sort_values(ascending=False).head(3).index.tolist()
+                most_common_text = "\n".join(most_common)
+                most_unique_text = "\n".join(most_unique)
+                most_common_visibility = gr.update(visible=True, value=most_common_text)
+                most_unique_visibility = gr.update(visible=True, value=most_unique_text)
+            else:
+                # Debugging statements to check visibility settings
+                # print("Hiding most common and unique sentences")
+                most_common_visibility = gr.update(visible=False, value=[])
+                most_unique_visibility = gr.update(visible=False, value=[])
+            # update topic color map
+            if show_topic:
+                topic_color_map_visibility = gr.update(
+                    visible=True,
+                    color_map=topic_color_map,
+                    value=[
+                        ("", "Substance"),
+                        ("", "Clarity"),
+                        ("", "Soundness/Correctness"),
+                        ("", "Originality"),
+                        ("", "Motivation/Impact"),
+                        ("", "Meaningful Comparison"),
+                        ("", "Replicability"),
+                    ]
+                )
+            else:
+                topic_color_map_visibility = gr.update(visible=False, value=[])
+            return (
+                new_review_id,
+                *review_updates,
+                most_common_visibility,
+                most_unique_visibility,
+                topic_color_map_visibility,
+                state
+            )
+        # Precompute the initial outputs so something is shown on load.
+        init_display = update_review_display(initial_state, score_type="Original")
+        # init_display returns: (review_id, review1, review2, review3, review4, review5, review6, review7, review8, state)
+        with gr.Row():
+            with gr.Column(scale=1):
+                review_id = gr.Markdown(value=init_display[0], container=True)
+                with gr.Row():
+                    previous_button = gr.Button("Previous", variant="secondary", interactive=True)
+                    next_button = gr.Button("Next", variant="primary", interactive=True)
+            with gr.Column(scale=1):
+                # Input controls.
+                year = gr.Dropdown(choices=years, label="Select Year", interactive=True, value=initial_year)
+                score_type = gr.Radio(
+                    choices=["Original", "Agreement", "Polarity", "Topic"],
+                    label="Score Type to Display",
+                    value="Original",
+                    interactive=True
+                )
+        # Output display.
+        with gr.Row():
+            most_common_sentences = gr.Textbox(
+            lines=8,
+            label="Most Common Opinions",
+            visible=False,
+            value=[]
+        )
+            most_unique_sentences = gr.Textbox(
+            lines=8,
+            label="Most Divergent Opinions",
+            visible=False,
+            value=[]
+        )
+        # Add a new textbox for topic labels and colors
+        topic_text_box = gr.HighlightedText(
+            label="Topic Labels (Color-Coded)",
+            visible=False,
+            value=[],
+            show_legend=True,
+        )
+        review1 = gr.HighlightedText(
+            show_legend=False,
+            label="Review 1",
+            visible= number_of_displayed_reviews >= 1,
+            key="initial_review1",
+            # color_map={"Positive": "#d4fcd6", "Negative": "#fcd6d6"}
+        )
+        review2 = gr.HighlightedText(
+            show_legend=False,
+            label="Review 2",
+            visible= number_of_displayed_reviews >= 2,
+            key="initial_review2"
+            # color_map={"Positive": "#d4fcd6", "Negative": "#fcd6d6"}
+        )
+        review3 = gr.HighlightedText(
+            show_legend=False,
+            label="Review 3",
+            visible= number_of_displayed_reviews >= 3,
+            key="initial_review3"
+            # color_map={"Positive": "#d4fcd6", "Negative": "#fcd6d6"}
+        )
+        review4 = gr.HighlightedText(
+            show_legend=False,
+            label="Review 4",
+            visible= number_of_displayed_reviews >= 4,
+            key="initial_review4"
+            # color_map={"Positive": "#d4fcd6", "Negative": "#fcd6d6"}
+        )
+        review5 = gr.HighlightedText(
+            show_legend=False,
+            label="Review 5",
+            visible= number_of_displayed_reviews >= 5,
+            key="initial_review5"
+            # color_map={"Positive": "#d4fcd6", "Negative": "#fcd6d6"}
+        )
+        review6 = gr.HighlightedText(
+            show_legend=False,
+            label="Review 6",
+            visible= number_of_displayed_reviews >= 6,
+            key="initial_review6"
+            # color_map={"Positive": "#d4fcd6", "Negative": "#fcd6d6"}
+        )
+        review7 = gr.HighlightedText(
+            show_legend=False,
+            label="Review 7",
+            visible= number_of_displayed_reviews >= 7,
+            key="initial_review7"
+            # color_map={"Positive": "#d4fcd6", "Negative": "#fcd6d6"}
+        )
+        review8 = gr.HighlightedText(
+            show_legend=False,
+            label="Review 8",
+            visible= number_of_displayed_reviews >= 8,
+            key="initial_review8"
+            # color_map={"Positive": "#d4fcd6", "Negative": "#fcd6d6"}
+        )
+        # Callback functions that update state.
+        def year_change(year, state, score_type):
+            state["year_choice"] = year
+            state["scored_reviews_for_year"] = get_preprocessed_scores(year)
+            state["review_ids"] = list(state["scored_reviews_for_year"].keys())
+            state["current_review_index"] = 0
+            state["current_review"] = state["scored_reviews_for_year"][state["review_ids"][0]]
+            return update_review_display(state, score_type)
+        def next_review(state, score_type):
+            state["current_review_index"] = (state["current_review_index"] + 1) % len(state["review_ids"])
+            state["current_review"] = state["scored_reviews_for_year"][state["review_ids"][state["current_review_index"]]]
+            return update_review_display(state, score_type)
+        def previous_review(state, score_type):
+            state["current_review_index"] = (state["current_review_index"] - 1) % len(state["review_ids"])
+            state["current_review"] = state["scored_reviews_for_year"][state["review_ids"][state["current_review_index"]]]
+            return update_review_display(state, score_type)
+        # Hook up the callbacks with the session state.
+        year.change(
+            fn=year_change,
+            inputs=[year, state, score_type],
+            outputs=[review_id, review1, review2, review3, review4, review5, review6, review7, review8, most_common_sentences, most_unique_sentences, topic_text_box, state]
+        )
+        score_type.change(
+            fn=update_review_display,
+            inputs=[state, score_type],
+            outputs=[review_id, review1, review2, review3, review4, review5, review6, review7, review8, most_common_sentences, most_unique_sentences, topic_text_box, state]
+        )
+        next_button.click(
+            fn=next_review,
+            inputs=[state, score_type],
+            outputs=[review_id, review1, review2, review3, review4, review5, review6, review7, review8, most_common_sentences, most_unique_sentences, topic_text_box, state]
+        )
+        previous_button.click(
+            fn=previous_review,
+            inputs=[state, score_type],
+            outputs=[review_id, review1, review2, review3, review4, review5, review6, review7, review8, most_common_sentences, most_unique_sentences, topic_text_box, state]
+        )
+    # -----------------------------------
+    # Interactive Tab
+    # -----------------------------------
+    with gr.Tab("Interactive", interactive=True):
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("## Input Reviews")
+                # review_count = gr.Slider(minimum=1, maximum=3, step=1, value=3, label="Number of Reviews", interactive=True)
+                review1_textbox = gr.Textbox(lines=5, value=EXAMPLES[0], label="Review 1", interactive=True)
+                review2_textbox = gr.Textbox(lines=5, value=EXAMPLES[1], label="Review 2", interactive=True)
+                review3_textbox = gr.Textbox(lines=5, value=EXAMPLES[2], label="Review 3", interactive=True)
+                with gr.Row():
+                    submit_button = gr.Button("Process", variant="primary", interactive=True)
+                    clear_button = gr.Button("Clear", variant="secondary", interactive=True)
+                gr.Markdown("**Note**: *Once your inputs are processed, you can see the different result by <ins>**only changing the parameters**</ins>, and without the need to re-process.*", container=True)
+            with gr.Column():
+                gr.Markdown("## Results")
+                mode_radio = gr.Radio(
+                    choices=[("In-line Highlighting", "highlight"), ("Generate Summaries", "summary")],
+                    value="highlight",
+                    label="Output Mode:",
+                    interactive=False,
+                    visible=False  # Initially hidden, will be shown based on mode selection
+                )
+                focus_radio = gr.Radio(
+                    choices=[("Agreement", "unique"), "Polarity", "Topic",],
+                    value="unique",
+                    label="Focus on:",
+                    interactive=True
+                )
+                generation_method_radio = gr.Radio(
+                    choices=[("Extractive", "extractive")], #TODO: add ("Abstractive", "abstractive") and abstractive generation
+                    value="extractive",
+                    label="Generation Method:",
+                    interactive=True,
+                    visible=False
+                )
+                # Fixed rationality (3.0) and iterations (2) to be consistent with the compute_rsa.py script
+                #iterations_slider = gr.Slider(minimum=1, maximum=10, step=1, value=2, label="Iterations", interactive=False, visible=False)
+                # rationality_slider = gr.Slider(minimum=0.0, maximum=10.0, step=0.1, value=2.0, label="Rationality", interactive=False, visible=False)
+                with gr.Row():
+                    unique_sentences = gr.Textbox(
+                        lines=6, label="Most Divergent Opinions", visible=True, value=None, container=True
+                    )
+                    common_sentences = gr.Textbox(
+                        lines=6, label="Most Common Opinions", visible=True, value=None, container=True
+                    )
+                uniqueness_score_text1 = gr.HighlightedText(
+                    show_legend=True, label="Agreement in Review 1", visible=True, value=None,
+                )
+                uniqueness_score_text2 = gr.HighlightedText(
+                    show_legend=True, label="Agreement in Review 2", visible=True, value=None,
+                )
+                uniqueness_score_text3 = gr.HighlightedText(
+                    show_legend=True, label="Agreement in Review 3", visible=True, value=None,
+                )
+                polarity_score_text1 = gr.HighlightedText(
+                    show_legend=True, label="Polarity in Review 1", visible=False, value=None,
+                    color_map={"➕": "#d4fcd6", "➖": "#fcd6d6" }
+                )
+                polarity_score_text2 = gr.HighlightedText(
+                    show_legend=True, label="Polarity in Review 2", visible=False, value=None,
+                    color_map={"➕": "#d4fcd6", "➖": "#fcd6d6" }
+                )
+                polarity_score_text3 = gr.HighlightedText(
+                    show_legend=True, label="Polarity in Review 3", visible=False, value=None,
+                    color_map={"➕": "#d4fcd6", "➖": "#fcd6d6" }
+                )
+                aspect_score_text1 = gr.HighlightedText(
+                    show_legend=False, label="Topic in Review 1", visible=False, value=None,
+                    color_map = topic_color_map
+                )
+                aspect_score_text2 = gr.HighlightedText(
+                    show_legend=False, label="Topic in Review 2", visible=False, value=None,
+                    color_map = topic_color_map
+                )
+                aspect_score_text3 = gr.HighlightedText(
+                    show_legend=False, label="Topic in Review 3", visible=False, value=None,
+                    color_map = topic_color_map
+                )
+            # Connect summarize function to submit button
+            submit_button.click(
+                fn=summarize,
+                inputs=[
+                    review1_textbox, review2_textbox, review3_textbox,
+                    focus_radio, mode_radio
+                ],
+                outputs=[
+                    uniqueness_score_text1, uniqueness_score_text2, uniqueness_score_text3,
+                    common_sentences, unique_sentences,
+                    polarity_score_text1, polarity_score_text2, polarity_score_text3,
+                    aspect_score_text1, aspect_score_text2, aspect_score_text3
+                ]
+            )
+            # Define clear button behavior
+            clear_button.click(
+                fn=lambda: (None, None, None, None, None, None, None, None, None, None, None), # clear all fields
+                inputs=[],
+                outputs=[
+                    review1_textbox, review2_textbox, review3_textbox,
+                    uniqueness_score_text1, uniqueness_score_text2, uniqueness_score_text3,
+                    common_sentences, unique_sentences
+                ]
+            )
+            # Update visibility of generation_method_radio based on mode_radio value
+            # def toggle_generation_method(mode):
+            #     if mode == "summary":
+            #         return gr.update(visible=True), gr.update(visible=False) # show generation method radio, hide focus radio
+            #     else:
+            #         return gr.update(visible=False), gr.update(visible=True) # show focus radio, hide generation method radio
+            # mode_radio.change(
+            #     fn=toggle_generation_method,
+            #     inputs=mode_radio,
+            #     outputs=[generation_method_radio, focus_radio]
+            # )
+            # Update visibility of output textboxes based on mode_radio and focus_radio values
+            def toggle_output_textboxes(mode, focus):
+                if mode == "highlight" and focus == "unique":
+                    return (
+                        gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), # in-line uniqueness highlights
+                        gr.update(visible=True), gr.update(visible=True), # summary highlights
+                        gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), # polarity highlights
+                        gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # aspect highlights
+                    )
+                elif focus == "Polarity":
+                    return (
+                        gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), # in-line uniqueness highlights
+                        gr.update(visible=False), gr.update(visible=False), # summary highlights
+                        gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), # polarity highlights
+                        gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # aspect highlights
+                    )
+                elif focus == "Topic":
+                    return (
+                        gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), # in-line uniqueness highlights
+                        gr.update(visible=False), gr.update(visible=False), # summary highlights
+                        gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), # polarity highlights
+                        gr.update(visible=True), gr.update(visible=True), gr.update(visible=True) # aspect highlights
+                    )
+            focus_radio.change(
+                fn=toggle_output_textboxes,
+                inputs=[mode_radio, focus_radio],
+                outputs=[
+                    uniqueness_score_text1, uniqueness_score_text2, uniqueness_score_text3,
+                    common_sentences, unique_sentences,
+                    polarity_score_text1, polarity_score_text2, polarity_score_text3,
+                    aspect_score_text1, aspect_score_text2, aspect_score_text3
+                ]
+            )
+            # mode_radio.change(
+            #     fn=toggle_output_textboxes,
+            #     inputs=[mode_radio, focus_radio],
+            #     outputs=[
+            #         uniqueness_score_text1, uniqueness_score_text2, uniqueness_score_text3,
+            #         consensuality_score_text1, consensuality_score_text2, consensuality_score_text3,
+            #         most_consensual_sentences, most_unique_sentences
+            #     ]
+            # )
+        # TODO: Configure the slider for the number of review boxes
+        # def toggle_reviews(number_of_displayed_reviews):
+        #     number_of_displayed_reviews = int(number_of_displayed_reviews)
+        #     updates = []
+        #     # for review(i), set visible True if its index is <= n, otherwise False.
+        #     for i in range(1, 4): updates.append(gr.update(visible=(i <= number_of_displayed_reviews)))
+        #     return tuple(updates)
+        # review_count.change(
+        #     fn=toggle_reviews,
+        #     inputs=[review_count],
+        #     outputs=[review1_textbox, review2_textbox, review3_textbox]
+        # )
+    demo.load(
+        fn=update_review_display,
+        inputs=[state, score_type],
+            outputs=[review_id, review1, review2, review3, review4, review5, review6, review7, review8, most_common_sentences, most_unique_sentences, topic_text_box, state]
+    )
+demo.launch(share=False)

glimpse-ui/scibert/scibert_polarity/final_model/config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "_name_or_path": "allenai/scibert_scivocab_uncased",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 31090
+}

glimpse-ui/scibert/scibert_polarity/final_model/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e259f6ae81187152e0aa80b9a478aec607c802eb270114eef2b64c8bac806d43
+size 439706620

glimpse-ui/scibert/scibert_polarity/final_model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}