{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "12404068-3244-43d6-8556-41e11489bb48", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pickle as pk\n", "import pandas as pd\n", "from pathlib import Path\n", "import numpy as np\n", "import seaborn as sns\n", "\n", "from rouge_score import rouge_scorer\n", "\n", "\n", "from lexrank import LexRank\n", "from lexrank.mappings.stopwords import STOPWORDS\n", "import nltk \n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "044c82d6-23c8-4c3b-a4b5-12acbbc1cc1a", "metadata": { "tags": [] }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "1f5cdf3f-5485-4b9f-a6fc-b2b8bd8aca7f", "metadata": { "tags": [] }, "outputs": [], "source": [ "\n", "\n", "\n", "path = Path(\"output/summaries/rsa_reranking/reviews_rsa_matrices/\")\n", "output_path = Path(\"output/summaries/methods_reviews/\")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "ec5e8ff3-6bef-42bc-8430-df93c1a4e79a", "metadata": { "tags": [] }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "d45cd444-0a81-4670-bbae-213e322ea281", "metadata": { "tags": [] }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "9e6c2863-9e5a-4e5a-bdb6-02e03c5f6105", "metadata": { "tags": [] }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "2ae23148-38ae-4385-99fb-db20da54334d", "metadata": { "tags": [] }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "d15ce19b-a3c7-4554-878f-41acd3204878", "metadata": { "tags": [] }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "1652ff75-b6c2-483a-a9af-ff3ca8616756", "metadata": { "tags": [] }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "983cd24c-b996-4224-8f87-ea79842c41a0", "metadata": { "tags": [] }, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "0db0e8f5-4b4a-4d55-8596-fe095aa4135f", "metadata": {}, "source": [ "# Consensus score based summaries:" ] }, { "cell_type": "code", "execution_count": null, "id": "560c7f8d-6b8e-4b5b-ba36-0dedc509791f", "metadata": { "tags": [] }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "662a5f1b-1e4d-458e-a437-b1d9c8db4552", "metadata": { "tags": [] }, "outputs": [], "source": [ "def consensus_scores_based_summaries(sample, n_consensus=3, n_dissensus=3):\n", " consensus_samples = sample['consensuality_scores'].sort_values(ascending=True).head(n_consensus).index.tolist()\n", " disensus_samples = sample['consensuality_scores'].sort_values(ascending=False).head(n_dissensus).index.tolist()\n", " \n", " consensus = \".\".join(consensus_samples)\n", " disensus = \".\".join(disensus_samples)\n", " \n", " return consensus + \"\\n\\n\" + disensus\n", " \n", " \n", "def rsa_scores_based_summaries(sample, n_consensus=3, n_rsa_speaker=3):\n", " consensus_samples = sample['consensuality_scores'].sort_values(ascending=True).head(n_consensus).index.tolist()\n", " rsa = sample['best_rsa'].tolist()[:n_rsa_speaker]\n", " \n", " consensus = \".\".join(consensus_samples)\n", " rsa = \".\".join(rsa)\n", " \n", " return consensus + \"\\n\\n\" + rsa\n", "\n", "scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)\n", "\n", "def lead(sample, N=10):\n", " texts = sample['speaker_df'].index.tolist()\n", " \n", " summary = \"\\n\".join([\".\".join(t.split('.')[:N]) for t in texts])\n", " \n", " return summary\n", "\n", " \n", " \n", "\n", "scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)\n", "\n", "\n", "def construct_templated_summaries(data, fn, dataset=None): \n", " records = []\n", " for sample in data['results']:\n", " summary = fn(sample)\n", " text = \"\\n\\n\".join(sample['speaker_df'].index.tolist())\n", " record = {'id' : sample['id'], 'summary': summary, 'metadata/reranking_model' : data['metadata/reranking_model'], 'metadata/rsa_iterations' : data['metadata/reranking_model'], \"text\": text}\n", " if dataset is not None:\n", " record['gold'] = dataset.loc[sample[\"id\"]]['gold'].tolist()[0]\n", " if record['gold'] is not None:\n", " rouges = scorer.score(summary, record['gold'])\n", " record |= {r : v.fmeasure for r, v in rouges.items()}\n", " \n", " \n", " \n", " records.append(record)\n", " \n", " return pd.DataFrame.from_records(records)\n", " \n", "\n", " \n", " \n" ] }, { "cell_type": "code", "execution_count": null, "id": "39c45180-a354-429c-8cde-3a7c78013cc6", "metadata": { "tags": [] }, "outputs": [], "source": [ "def prepare_dataset(dataset_name, dataset_path=\"data/processed/\"):\n", " dataset_path = Path(dataset_path)\n", " if dataset_name == \"amazon\":\n", " dataset = pd.read_csv(dataset_path / \"amazon_test.csv\")\n", " elif dataset_name == \"space\":\n", " dataset = pd.read_csv(dataset_path / \"space.csv\")\n", " elif dataset_name == \"yelp\":\n", " dataset = pd.read_csv(dataset_path / \"yelp_test.csv\")\n", " elif dataset_name == \"reviews\":\n", " dataset = pd.read_csv(dataset_path / \"test_metareviews.csv\")\n", " else:\n", " raise ValueError(f\"Unknown dataset {dataset_name}\")\n", "\n", "\n", " return dataset\n" ] }, { "cell_type": "code", "execution_count": null, "id": "addeda2b-71fc-4c9a-8e91-12cf70e52b1e", "metadata": {}, "outputs": [], "source": [ "# df = prepare_dataset('reviews')\n", "\n", "# for n, group in df.groupby('id'):\n", "# for idx, row in group.iterrows():\n", "# print(row['text'].replace('-----', \"\\n\"))\n", "# print(\"===========\")\n", "# break\n", "rsa_scores_based_summaries" ] }, { "cell_type": "code", "execution_count": null, "id": "7fe212dd-63d2-44b1-b06e-7792b9d504ac", "metadata": { "tags": [] }, "outputs": [], "source": [ "\n", "\n", "for n in [1, 2, 3, 4, 5, 6]:\n", " for file in path.glob(\"*.pk\"):\n", " print(file)\n", " with file.open('rb') as fd:\n", " data = pk.load(fd)\n", "\n", " Path(output_path).mkdir(parents=True, exist_ok=True)\n", " model_name, dataset_name, decoding_config, date = str(file.stem).split('-_-')[:4]\n", "\n", " dataset = prepare_dataset(dataset_name, dataset_path=\"data/processed/\")\n", " dataset = dataset.set_index('id')\n", " \n", " fn = lambda sample: consensus_scores_based_summaries(sample, n_consensus=n, n_dissensus=n)\n", "\n", " df = construct_templated_summaries(data, fn, dataset=dataset)\n", " \n", " df['metadata/method'] = \"Agreement\"\n", " df['metadata/n_sentences'] = 2*n\n", " df['metadata/n_consensus'] = n\n", " df['metadata/n_dissensus'] = n\n", "\n", " name = file.stem + \"-_-\" + f\"consensus_score_based_{n}.csv\"\n", "\n", " if (output_path / name).exists():\n", " df_old = pd.read_csv(output_path / name)\n", "\n", " for col in df.columns:\n", " if col not in df_old.columns:\n", " df_old[col] = float(\"nan\")\n", "\n", " # add entry to the dataframe\n", " for col in df.columns:\n", " df_old[col] = df[col]\n", "\n", " df = df_old\n", "\n", " df.to_csv(output_path / name)\n", " \n", " \n", " \n", " " ] }, { "cell_type": "code", "execution_count": null, "id": "ab45111b-9c9f-44ee-8cc1-613bfa32a007", "metadata": { "tags": [] }, "outputs": [], "source": [ "\n", "for n in [1, 2, 3, 4, 5, 6]:\n", " for file in path.glob(\"*.pk\"):\n", " with file.open('rb') as fd:\n", " data = pk.load(fd)\n", "\n", " Path(output_path).mkdir(parents=True, exist_ok=True)\n", " model_name, dataset_name, decoding_config, date = str(file.stem).split('-_-')[:4]\n", "\n", " dataset = prepare_dataset(dataset_name, dataset_path=\"data/processed/\")\n", " dataset = dataset.set_index('id')\n", "\n", " fn = lambda sample: rsa_scores_based_summaries(sample, n_consensus=n, n_rsa_speaker=n)\n", " df = construct_templated_summaries(data, fn, dataset=dataset)\n", "\n", " df['metadata/method'] = \"Speaker+Agreement\"\n", " df['metadata/n_sentences'] = 2*n\n", " df['metadata/n_consensus'] = n\n", " df['metadata/n_dissensus'] = n\n", "\n", " name = file.stem + \"-_-\" + f\"rsa_score_based_{n}.csv\"\n", "\n", " if (output_path / name).exists():\n", " df_old = pd.read_csv(output_path / name)\n", "\n", " for col in df.columns:\n", " if col not in df_old.columns:\n", " df_old[col] = float(\"nan\")\n", "\n", " # add entry to the dataframe\n", " for col in df.columns:\n", " df_old[col] = df[col]\n", "\n", " df = df_old\n", "\n", " df.to_csv(output_path / name)" ] }, { "cell_type": "code", "execution_count": null, "id": "8b57c318-5fc8-49fc-8746-128e1112e46a", "metadata": { "tags": [] }, "outputs": [], "source": [ "\n", "for n in [1, 2, 3, 4, 5, 6, 7, 8]:\n", " for file in path.glob(\"*.pk\"):\n", " with file.open('rb') as fd:\n", " data = pk.load(fd)\n", "\n", " Path(output_path).mkdir(parents=True, exist_ok=True)\n", " model_name, dataset_name, decoding_config, date = str(file.stem).split('-_-')[:4]\n", "\n", " dataset = prepare_dataset(dataset_name, dataset_path=\"data/processed/\")\n", " dataset = dataset.set_index('id')\n", "\n", " fn = lambda sample: lead(sample, N=2*n)\n", "\n", "\n", " df = construct_templated_summaries(data, fn, dataset=dataset)\n", "\n", " df['metadata/method'] = \"Lead\"\n", " df['metadata/n_sentences'] = 2*n\n", "\n", " name = file.stem + \"-_-\" + f\"lead_{2*n}.csv\"\n", "\n", " if (output_path / name).exists():\n", " df_old = pd.read_csv(output_path / name)\n", "\n", " for col in df.columns:\n", " if col not in df_old.columns:\n", " df_old[col] = float(\"nan\")\n", "\n", " # add entry to the dataframe\n", " for col in df.columns:\n", " df_old[col] = df[col]\n", "\n", " df = df_old\n", "\n", " df.to_csv(output_path / name)" ] }, { "cell_type": "code", "execution_count": null, "id": "af574823-667d-4722-90bc-2bb095ad3a01", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c32d1d88-c6f5-4ada-aec6-219d90cade16", "metadata": { "tags": [] }, "outputs": [], "source": [ "import seaborn as sns\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, "id": "868ac37e-6187-46f5-935c-111ca532b1b0", "metadata": { "tags": [] }, "outputs": [], "source": [ "output_path = Path(\"output/summaries/methods_reviews/\")" ] }, { "cell_type": "code", "execution_count": null, "id": "7263e9d0-6a43-4698-bf6f-82fff0839316", "metadata": { "tags": [] }, "outputs": [], "source": [ "import subprocess\n", "\n", "\n", "for file in output_path.glob(\"*.csv\"):\n", " print(file)\n", " cmd = [\"python\", \"mds/evaluate_bartbert_metrics.py\", \"--summaries\", file]\n", " subprocess.run(cmd)" ] }, { "cell_type": "code", "execution_count": null, "id": "18a63537-c44e-421c-beff-50c6518115bf", "metadata": { "tags": [] }, "outputs": [], "source": [ "dfs = []\n", "for file in output_path.glob(\"*.csv\"):\n", " model_name, dataset_name, decoding_config, date = str(file.stem).split('-_-')[:4]\n", " method = str(file.stem).split('-_-')[-1]\n", " \n", " df = pd.read_csv(file)\n", " df['metadata/Model'] = model_name\n", " df['metadata/Dataset'] = dataset_name\n", " df['metadata/method'] = method\n", " \n", " df[\"Method\"] = f\"{model_name}/{method}\"\n", " \n", " dfs.append(df)\n", " \n", "df = pd.concat(dfs)\n", " \n", " \n", "df" ] }, { "cell_type": "code", "execution_count": null, "id": "eef1ec67-62f9-458f-9380-debe40bac46a", "metadata": { "tags": [] }, "outputs": [], "source": [ "sns.catplot(data=df, hue='Method', y='rougeL', x='metadata/Dataset', kind='bar')" ] }, { "cell_type": "code", "execution_count": null, "id": "aed3e026-fd89-416f-a333-c841eaf566e4", "metadata": {}, "outputs": [], "source": [ "sns.catplot(data=df, hue='metadata/method', y='rouge1', x='metadata/reranking_model', kind='bar', row=\"metadata/model\")" ] }, { "cell_type": "code", "execution_count": null, "id": "2e433925-6be6-4322-af07-45b4c07ff5ff", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "pytorch-gpu-2.0.0_py3.10.9", "language": "python", "name": "module-conda-env-pytorch-gpu-2.0.0_py3.10.9" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 5 }