Spaces:
Running
Running
Yiqiao Jin
commited on
Commit
Β·
53709ed
1
Parent(s):
bdafe83
Update demo
Browse files- README.md +13 -0
- agentreview/agent.py +2 -0
- arguments.py β agentreview/arguments.py +4 -4
- agentreview/backends/openai.py +11 -13
- const.py β agentreview/const.py +2 -0
- agentreview/dataset/download_openreview_paper.py +4 -5
- agentreview/dataset/process_submissions.py +2 -3
- agentreview/environments/paper_review.py +1 -2
- agentreview/paper_processor.py +1 -1
- agentreview/paper_review_arena.py +1 -4
- agentreview/paper_review_player.py +3 -1
- agentreview/paper_review_settings.py +5 -2
- agentreview/role_descriptions.py +1 -1
- agentreview/ui/cli.py +11 -11
- agentreview/utility/__init__.py +0 -0
- {utility β agentreview/utility}/authentication_utils.py +10 -0
- {utility β agentreview/utility}/data_utils.py +0 -0
- agentreview/utility/experiment_utils.py +84 -0
- {utility β agentreview/utility}/general_utils.py +0 -0
- {utility β agentreview/utility}/metrics_utils.py +0 -0
- {utility β agentreview/utility}/text_utils.py +0 -0
- {utility β agentreview/utility}/utils.py +106 -50
- data +1 -0
- demo.py +217 -0
- notebooks/demo.ipynb +0 -0
- requirements.txt +1 -1
- run_paper_decision_cli.py +48 -53
- run_paper_review_cli.py +75 -73
README.md
CHANGED
|
@@ -1,3 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# AgentReview
|
| 2 |
|
| 3 |
Official implementation for the π[EMNLP 2024](https://2024.emnlp.org/) (main) paper: [AgentReview: Exploring Peer Review Dynamics with LLM Agents](https://arxiv.org/abs/2406.12708)
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: AgentReview
|
| 3 |
+
emoji: π
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: pink
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.4.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: apache-2.0
|
| 11 |
+
short_description: EMNLP 2024
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
# AgentReview
|
| 15 |
|
| 16 |
Official implementation for the π[EMNLP 2024](https://2024.emnlp.org/) (main) paper: [AgentReview: Exploring Peer Review Dynamics with LLM Agents](https://arxiv.org/abs/2406.12708)
|
agentreview/agent.py
CHANGED
|
@@ -69,8 +69,10 @@ class Player(Agent):
|
|
| 69 |
self.data_dir = kwargs.pop("data_dir", None)
|
| 70 |
self.args = args
|
| 71 |
|
|
|
|
| 72 |
if isinstance(backend, BackendConfig):
|
| 73 |
backend_config = backend
|
|
|
|
| 74 |
backend = load_backend(backend_config)
|
| 75 |
elif isinstance(backend, IntelligenceBackend):
|
| 76 |
backend_config = backend.to_config()
|
|
|
|
| 69 |
self.data_dir = kwargs.pop("data_dir", None)
|
| 70 |
self.args = args
|
| 71 |
|
| 72 |
+
|
| 73 |
if isinstance(backend, BackendConfig):
|
| 74 |
backend_config = backend
|
| 75 |
+
backend_config['openai_client_type'] = args.openai_client_type
|
| 76 |
backend = load_backend(backend_config)
|
| 77 |
elif isinstance(backend, IntelligenceBackend):
|
| 78 |
backend_config = backend.to_config()
|
arguments.py β agentreview/arguments.py
RENAMED
|
@@ -26,7 +26,8 @@ def parse_args():
|
|
| 26 |
|
| 27 |
|
| 28 |
parser.add_argument(
|
| 29 |
-
"--api_version", type=str, default="2023-
|
|
|
|
| 30 |
)
|
| 31 |
|
| 32 |
# Experiment configuration
|
|
@@ -54,11 +55,10 @@ def parse_args():
|
|
| 54 |
)
|
| 55 |
|
| 56 |
parser.add_argument(
|
| 57 |
-
"--
|
| 58 |
)
|
| 59 |
-
|
| 60 |
parser.add_argument(
|
| 61 |
-
"--
|
| 62 |
)
|
| 63 |
|
| 64 |
parser.add_argument(
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
parser.add_argument(
|
| 29 |
+
"--api_version", type=str, default="2023-05-15", help="API version to be used for making requests. Required "
|
| 30 |
+
"for Azure OpenAI clients."
|
| 31 |
)
|
| 32 |
|
| 33 |
# Experiment configuration
|
|
|
|
| 55 |
)
|
| 56 |
|
| 57 |
parser.add_argument(
|
| 58 |
+
"--overwrite", action="store_true", help="If set, existing results or output files will be overwritten without prompting."
|
| 59 |
)
|
|
|
|
| 60 |
parser.add_argument(
|
| 61 |
+
"--skip_logging", action="store_true", help="If set, we do not log the messages in the console."
|
| 62 |
)
|
| 63 |
|
| 64 |
parser.add_argument(
|
agentreview/backends/openai.py
CHANGED
|
@@ -3,17 +3,11 @@ from typing import List
|
|
| 3 |
|
| 4 |
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
| 5 |
|
| 6 |
-
from arguments import parse_args
|
| 7 |
-
from utility.authentication_utils import get_openai_client
|
| 8 |
from .base import IntelligenceBackend
|
| 9 |
from ..message import SYSTEM_NAME, Message
|
| 10 |
|
| 11 |
-
args = parse_args()
|
| 12 |
-
|
| 13 |
-
client = get_openai_client(client_type=args.openai_client_type)
|
| 14 |
-
|
| 15 |
-
OPENAI_CLIENT_TYPE = args.openai_client_type
|
| 16 |
-
|
| 17 |
# Default config follows the OpenAI playground
|
| 18 |
DEFAULT_TEMPERATURE = 1.0
|
| 19 |
DEFAULT_MAX_TOKENS = 4096
|
|
@@ -57,19 +51,22 @@ class OpenAIChat(IntelligenceBackend):
|
|
| 57 |
merge_other_agents_as_one_user=merge_other_agents_as_one_user,
|
| 58 |
**kwargs,
|
| 59 |
)
|
| 60 |
-
|
|
|
|
| 61 |
self.temperature = temperature
|
| 62 |
self.max_tokens = max_tokens
|
| 63 |
self.model = model
|
| 64 |
self.merge_other_agent_as_user = merge_other_agents_as_one_user
|
| 65 |
|
|
|
|
|
|
|
| 66 |
@retry(stop=stop_after_attempt(6), wait=wait_random_exponential(min=1, max=60))
|
| 67 |
def _get_response(self, messages):
|
| 68 |
# Refer to https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/switching-endpoints for how to
|
| 69 |
# make API calls
|
| 70 |
|
| 71 |
-
if
|
| 72 |
-
completion = client.chat.completions.create(
|
| 73 |
model=self.model,
|
| 74 |
messages=messages,
|
| 75 |
temperature=self.temperature,
|
|
@@ -77,8 +74,8 @@ class OpenAIChat(IntelligenceBackend):
|
|
| 77 |
stop=STOP,
|
| 78 |
)
|
| 79 |
|
| 80 |
-
elif
|
| 81 |
-
completion = client.chat.completions.create(
|
| 82 |
model=self.model,
|
| 83 |
messages=messages,
|
| 84 |
temperature=self.temperature,
|
|
@@ -90,6 +87,7 @@ class OpenAIChat(IntelligenceBackend):
|
|
| 90 |
raise NotImplementedError
|
| 91 |
|
| 92 |
response = completion.choices[0].message.content
|
|
|
|
| 93 |
response = response.strip()
|
| 94 |
return response
|
| 95 |
|
|
|
|
| 3 |
|
| 4 |
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
| 5 |
|
| 6 |
+
from agentreview.arguments import parse_args
|
| 7 |
+
from agentreview.utility.authentication_utils import get_openai_client
|
| 8 |
from .base import IntelligenceBackend
|
| 9 |
from ..message import SYSTEM_NAME, Message
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
# Default config follows the OpenAI playground
|
| 12 |
DEFAULT_TEMPERATURE = 1.0
|
| 13 |
DEFAULT_MAX_TOKENS = 4096
|
|
|
|
| 51 |
merge_other_agents_as_one_user=merge_other_agents_as_one_user,
|
| 52 |
**kwargs,
|
| 53 |
)
|
| 54 |
+
self.client_type = kwargs.get("openai_client_type", None)
|
| 55 |
+
self.client = get_openai_client(self.client_type)
|
| 56 |
self.temperature = temperature
|
| 57 |
self.max_tokens = max_tokens
|
| 58 |
self.model = model
|
| 59 |
self.merge_other_agent_as_user = merge_other_agents_as_one_user
|
| 60 |
|
| 61 |
+
|
| 62 |
+
|
| 63 |
@retry(stop=stop_after_attempt(6), wait=wait_random_exponential(min=1, max=60))
|
| 64 |
def _get_response(self, messages):
|
| 65 |
# Refer to https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/switching-endpoints for how to
|
| 66 |
# make API calls
|
| 67 |
|
| 68 |
+
if self.client_type == "openai":
|
| 69 |
+
completion = self.client.chat.completions.create(
|
| 70 |
model=self.model,
|
| 71 |
messages=messages,
|
| 72 |
temperature=self.temperature,
|
|
|
|
| 74 |
stop=STOP,
|
| 75 |
)
|
| 76 |
|
| 77 |
+
elif self.client_type == "azure_openai":
|
| 78 |
+
completion = self.client.chat.completions.create(
|
| 79 |
model=self.model,
|
| 80 |
messages=messages,
|
| 81 |
temperature=self.temperature,
|
|
|
|
| 87 |
raise NotImplementedError
|
| 88 |
|
| 89 |
response = completion.choices[0].message.content
|
| 90 |
+
|
| 91 |
response = response.strip()
|
| 92 |
return response
|
| 93 |
|
const.py β agentreview/const.py
RENAMED
|
@@ -10,6 +10,8 @@ PAPER_DECISIONS_ICLR2019 = ["Accept-oral", "Accept-poster", "Reject"]
|
|
| 10 |
|
| 11 |
AREA_CHAIR_TYPES = ['inclusive', 'conformist', 'authoritarian', 'BASELINE']
|
| 12 |
|
|
|
|
|
|
|
| 13 |
# These are papers that contain potentially sensitive content. GPT-4 refused to generate reviews for these papers.
|
| 14 |
FILTERED_PAPER_IDS = {
|
| 15 |
"ICLR2020": [],
|
|
|
|
| 10 |
|
| 11 |
AREA_CHAIR_TYPES = ['inclusive', 'conformist', 'authoritarian', 'BASELINE']
|
| 12 |
|
| 13 |
+
GLOBAL_PROMPT = "This is a realistic simulation of academic peer review."
|
| 14 |
+
|
| 15 |
# These are papers that contain potentially sensitive content. GPT-4 refused to generate reviews for these papers.
|
| 16 |
FILTERED_PAPER_IDS = {
|
| 17 |
"ICLR2020": [],
|
agentreview/dataset/download_openreview_paper.py
CHANGED
|
@@ -15,14 +15,14 @@ import os
|
|
| 15 |
import time
|
| 16 |
import requests
|
| 17 |
|
| 18 |
-
from arguments import parse_args
|
| 19 |
|
| 20 |
try:
|
| 21 |
import openreview
|
| 22 |
except ImportError:
|
| 23 |
raise ImportError("Please install openreview package using `pip install openreview-py`")
|
| 24 |
|
| 25 |
-
def download_papers():
|
| 26 |
"""Downloads all papers from ICLR 2023 using OpenReview API.
|
| 27 |
|
| 28 |
This function authenticates with the OpenReview API using environment
|
|
@@ -36,8 +36,6 @@ def download_papers():
|
|
| 36 |
AssertionError: If the conference argument is not for ICLR.
|
| 37 |
"""
|
| 38 |
|
| 39 |
-
args = parse_args()
|
| 40 |
-
|
| 41 |
openreview_username = os.environ.get("OPENREVIEW_USERNAME")
|
| 42 |
openreview_password = os.environ.get("OPENREVIEW_PASSWORD")
|
| 43 |
|
|
@@ -133,4 +131,5 @@ def download_papers():
|
|
| 133 |
|
| 134 |
|
| 135 |
if __name__ == "__main__":
|
| 136 |
-
|
|
|
|
|
|
| 15 |
import time
|
| 16 |
import requests
|
| 17 |
|
| 18 |
+
from agentreview.arguments import parse_args
|
| 19 |
|
| 20 |
try:
|
| 21 |
import openreview
|
| 22 |
except ImportError:
|
| 23 |
raise ImportError("Please install openreview package using `pip install openreview-py`")
|
| 24 |
|
| 25 |
+
def download_papers(args):
|
| 26 |
"""Downloads all papers from ICLR 2023 using OpenReview API.
|
| 27 |
|
| 28 |
This function authenticates with the OpenReview API using environment
|
|
|
|
| 36 |
AssertionError: If the conference argument is not for ICLR.
|
| 37 |
"""
|
| 38 |
|
|
|
|
|
|
|
| 39 |
openreview_username = os.environ.get("OPENREVIEW_USERNAME")
|
| 40 |
openreview_password = os.environ.get("OPENREVIEW_PASSWORD")
|
| 41 |
|
|
|
|
| 131 |
|
| 132 |
|
| 133 |
if __name__ == "__main__":
|
| 134 |
+
args = parse_args()
|
| 135 |
+
download_papers(args)
|
agentreview/dataset/process_submissions.py
CHANGED
|
@@ -22,9 +22,8 @@ from tqdm import tqdm
|
|
| 22 |
|
| 23 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 24 |
|
| 25 |
-
import
|
| 26 |
-
from
|
| 27 |
-
from utility.utils import print_colored
|
| 28 |
|
| 29 |
decision_map = {
|
| 30 |
# ICLR 2023
|
|
|
|
| 22 |
|
| 23 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 24 |
|
| 25 |
+
from agentreview.arguments import parse_args
|
| 26 |
+
from agentreview.utility.utils import print_colored
|
|
|
|
| 27 |
|
| 28 |
decision_map = {
|
| 29 |
# ICLR 2023
|
agentreview/environments/paper_review.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
| 1 |
import json
|
| 2 |
-
import json
|
| 3 |
import logging
|
| 4 |
import os.path as osp
|
| 5 |
from typing import List
|
| 6 |
|
| 7 |
from agentreview.environments import Conversation
|
| 8 |
-
from utility.utils import get_rebuttal_dir
|
| 9 |
from .base import TimeStep
|
| 10 |
from ..message import Message
|
| 11 |
from ..paper_review_message import PaperReviewMessagePool
|
|
|
|
| 1 |
import json
|
|
|
|
| 2 |
import logging
|
| 3 |
import os.path as osp
|
| 4 |
from typing import List
|
| 5 |
|
| 6 |
from agentreview.environments import Conversation
|
| 7 |
+
from agentreview.utility.utils import get_rebuttal_dir
|
| 8 |
from .base import TimeStep
|
| 9 |
from ..message import Message
|
| 10 |
from ..paper_review_message import PaperReviewMessagePool
|
agentreview/paper_processor.py
CHANGED
|
@@ -148,7 +148,7 @@ def convert_text_into_dict(text: str) -> dict:
|
|
| 148 |
|
| 149 |
|
| 150 |
if __name__ == "__main__":
|
| 151 |
-
from utility.authentication_utils import read_and_set_openai_key
|
| 152 |
from agentreview.review import get_lm_review
|
| 153 |
|
| 154 |
read_and_set_openai_key()
|
|
|
|
| 148 |
|
| 149 |
|
| 150 |
if __name__ == "__main__":
|
| 151 |
+
from agentreview.utility.authentication_utils import read_and_set_openai_key
|
| 152 |
from agentreview.review import get_lm_review
|
| 153 |
|
| 154 |
read_and_set_openai_key()
|
agentreview/paper_review_arena.py
CHANGED
|
@@ -1,14 +1,11 @@
|
|
| 1 |
import csv
|
| 2 |
-
import glob
|
| 3 |
import json
|
| 4 |
import logging
|
| 5 |
-
import os
|
| 6 |
from typing import Union
|
| 7 |
|
| 8 |
from agentreview.arena import Arena, TooManyInvalidActions
|
| 9 |
from agentreview.role_descriptions import get_reviewer_description
|
| 10 |
-
from utility.utils import
|
| 11 |
-
get_paper_review_and_rebuttal_dir, format_metareviews
|
| 12 |
from .agent import Player
|
| 13 |
from .config import ArenaConfig
|
| 14 |
from .environments import TimeStep, load_environment
|
|
|
|
| 1 |
import csv
|
|
|
|
| 2 |
import json
|
| 3 |
import logging
|
|
|
|
| 4 |
from typing import Union
|
| 5 |
|
| 6 |
from agentreview.arena import Arena, TooManyInvalidActions
|
| 7 |
from agentreview.role_descriptions import get_reviewer_description
|
| 8 |
+
from agentreview.utility.utils import format_metareviews
|
|
|
|
| 9 |
from .agent import Player
|
| 10 |
from .config import ArenaConfig
|
| 11 |
from .environments import TimeStep, load_environment
|
agentreview/paper_review_player.py
CHANGED
|
@@ -56,6 +56,8 @@ class Reviewer(Player):
|
|
| 56 |
global_prompt: str = None,
|
| 57 |
**kwargs,
|
| 58 |
):
|
|
|
|
|
|
|
| 59 |
super().__init__(name, role_desc, backend, global_prompt, **kwargs)
|
| 60 |
|
| 61 |
def act(self, observation: List[Message]) -> str:
|
|
@@ -94,7 +96,7 @@ class PaperExtractorPlayer(Player):
|
|
| 94 |
Returns:
|
| 95 |
str: The action (response) of the player.
|
| 96 |
"""
|
| 97 |
-
|
| 98 |
logging.info(f"Loading {self.conference} paper {self.paper_id} ({self.paper_decision}) ...")
|
| 99 |
|
| 100 |
loader = PDFReader()
|
|
|
|
| 56 |
global_prompt: str = None,
|
| 57 |
**kwargs,
|
| 58 |
):
|
| 59 |
+
print("kwargs")
|
| 60 |
+
print(kwargs)
|
| 61 |
super().__init__(name, role_desc, backend, global_prompt, **kwargs)
|
| 62 |
|
| 63 |
def act(self, observation: List[Message]) -> str:
|
|
|
|
| 96 |
Returns:
|
| 97 |
str: The action (response) of the player.
|
| 98 |
"""
|
| 99 |
+
|
| 100 |
logging.info(f"Loading {self.conference} paper {self.paper_id} ({self.paper_decision}) ...")
|
| 101 |
|
| 102 |
loader = PDFReader()
|
agentreview/paper_review_settings.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
default_reviewer_setting = {
|
| 2 |
"is_benign": None,
|
| 3 |
"is_knowledgeable": None,
|
|
@@ -6,7 +8,7 @@ default_reviewer_setting = {
|
|
| 6 |
}
|
| 7 |
|
| 8 |
|
| 9 |
-
def get_experiment_settings(setting: dict):
|
| 10 |
"""
|
| 11 |
Generate experiment settings based on provided configurations for area chairs (AC) and reviewers.
|
| 12 |
|
|
@@ -19,7 +21,8 @@ def get_experiment_settings(setting: dict):
|
|
| 19 |
"""
|
| 20 |
|
| 21 |
experiment_setting = {
|
| 22 |
-
"
|
|
|
|
| 23 |
"players": {
|
| 24 |
|
| 25 |
# Paper Extractor is a special player that extracts a paper from the dataset.
|
|
|
|
| 1 |
+
from typing import Union
|
| 2 |
+
|
| 3 |
default_reviewer_setting = {
|
| 4 |
"is_benign": None,
|
| 5 |
"is_knowledgeable": None,
|
|
|
|
| 8 |
}
|
| 9 |
|
| 10 |
|
| 11 |
+
def get_experiment_settings(paper_id: Union[int, None] = None, paper_decision: Union[str, None] = None, setting: dict = None):
|
| 12 |
"""
|
| 13 |
Generate experiment settings based on provided configurations for area chairs (AC) and reviewers.
|
| 14 |
|
|
|
|
| 21 |
"""
|
| 22 |
|
| 23 |
experiment_setting = {
|
| 24 |
+
"paper_id": paper_id,
|
| 25 |
+
"paper_decision": paper_decision,
|
| 26 |
"players": {
|
| 27 |
|
| 28 |
# Paper Extractor is a special player that extracts a paper from the dataset.
|
agentreview/role_descriptions.py
CHANGED
|
@@ -5,7 +5,7 @@ import numpy as np
|
|
| 5 |
|
| 6 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 7 |
|
| 8 |
-
import const
|
| 9 |
from agentreview.config import AgentConfig
|
| 10 |
|
| 11 |
PLAYER_BACKEND = {
|
|
|
|
| 5 |
|
| 6 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 7 |
|
| 8 |
+
from agentreview import const
|
| 9 |
from agentreview.config import AgentConfig
|
| 10 |
|
| 11 |
PLAYER_BACKEND = {
|
agentreview/ui/cli.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import logging
|
| 2 |
-
import logging
|
| 3 |
import os
|
| 4 |
import os.path as osp
|
| 5 |
from typing import Union
|
|
@@ -11,8 +10,8 @@ from prompt_toolkit.completion import WordCompleter
|
|
| 11 |
from prompt_toolkit.styles import Style
|
| 12 |
from rich.console import Console
|
| 13 |
|
| 14 |
-
from utility.utils import get_rebuttal_dir,
|
| 15 |
-
|
| 16 |
from ..arena import Arena, TooManyInvalidActions
|
| 17 |
from ..backends.human import HumanBackendError
|
| 18 |
from ..environments import PaperReview, PaperDecision
|
|
@@ -222,7 +221,8 @@ class ArenaCLI:
|
|
| 222 |
# Print the new messages
|
| 223 |
for msg in messages:
|
| 224 |
message_str = f"[{msg.agent_name}->{msg.visible_to}]: {msg.content}"
|
| 225 |
-
|
|
|
|
| 226 |
msg.logged = True
|
| 227 |
|
| 228 |
step += 1
|
|
@@ -251,7 +251,7 @@ class ArenaCLI:
|
|
| 251 |
self.arena.save_history(path_review_history)
|
| 252 |
|
| 253 |
elif env.type_name == "paper_decision":
|
| 254 |
-
ac_decisions =
|
| 255 |
conference=args.conference,
|
| 256 |
model_name=args.model_name,
|
| 257 |
ac_scoring_method=args.ac_scoring_method,
|
|
@@ -261,9 +261,9 @@ class ArenaCLI:
|
|
| 261 |
|
| 262 |
ac_decisions += [env.ac_decisions]
|
| 263 |
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
|
|
|
| 1 |
import logging
|
|
|
|
| 2 |
import os
|
| 3 |
import os.path as osp
|
| 4 |
from typing import Union
|
|
|
|
| 10 |
from prompt_toolkit.styles import Style
|
| 11 |
from rich.console import Console
|
| 12 |
|
| 13 |
+
from agentreview.utility.utils import get_rebuttal_dir, load_llm_ac_decisions, \
|
| 14 |
+
save_llm_ac_decisions
|
| 15 |
from ..arena import Arena, TooManyInvalidActions
|
| 16 |
from ..backends.human import HumanBackendError
|
| 17 |
from ..environments import PaperReview, PaperDecision
|
|
|
|
| 221 |
# Print the new messages
|
| 222 |
for msg in messages:
|
| 223 |
message_str = f"[{msg.agent_name}->{msg.visible_to}]: {msg.content}"
|
| 224 |
+
if self.args.skip_logging:
|
| 225 |
+
console.print(color_dict[name_to_color[msg.agent_name]] + message_str + CRStyle.RESET_ALL)
|
| 226 |
msg.logged = True
|
| 227 |
|
| 228 |
step += 1
|
|
|
|
| 251 |
self.arena.save_history(path_review_history)
|
| 252 |
|
| 253 |
elif env.type_name == "paper_decision":
|
| 254 |
+
ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir,
|
| 255 |
conference=args.conference,
|
| 256 |
model_name=args.model_name,
|
| 257 |
ac_scoring_method=args.ac_scoring_method,
|
|
|
|
| 261 |
|
| 262 |
ac_decisions += [env.ac_decisions]
|
| 263 |
|
| 264 |
+
save_llm_ac_decisions(ac_decisions,
|
| 265 |
+
output_dir=args.output_dir,
|
| 266 |
+
conference=args.conference,
|
| 267 |
+
model_name=args.model_name,
|
| 268 |
+
ac_scoring_method=args.ac_scoring_method,
|
| 269 |
+
experiment_name=args.experiment_name)
|
agentreview/utility/__init__.py
ADDED
|
File without changes
|
{utility β agentreview/utility}/authentication_utils.py
RENAMED
|
@@ -16,6 +16,16 @@ def get_openai_client(client_type: str):
|
|
| 16 |
|
| 17 |
assert client_type in ["azure_openai", "openai"]
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
if client_type == "openai":
|
| 20 |
client = openai.OpenAI(
|
| 21 |
api_key=os.environ['OPENAI_API_KEY']
|
|
|
|
| 16 |
|
| 17 |
assert client_type in ["azure_openai", "openai"]
|
| 18 |
|
| 19 |
+
endpoint: str = os.environ['AZURE_ENDPOINT']
|
| 20 |
+
|
| 21 |
+
if not endpoint.startswith("https://"):
|
| 22 |
+
endpoint = f"https://{endpoint}.openai.azure.com"
|
| 23 |
+
|
| 24 |
+
os.environ['AZURE_ENDPOINT'] = endpoint
|
| 25 |
+
|
| 26 |
+
if not os.environ.get('OPENAI_API_VERSION'):
|
| 27 |
+
os.environ['OPENAI_API_VERSION'] = "2023-05-15"
|
| 28 |
+
|
| 29 |
if client_type == "openai":
|
| 30 |
client = openai.OpenAI(
|
| 31 |
api_key=os.environ['OPENAI_API_KEY']
|
{utility β agentreview/utility}/data_utils.py
RENAMED
|
File without changes
|
agentreview/utility/experiment_utils.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 5 |
+
|
| 6 |
+
from agentreview.agent import Player
|
| 7 |
+
from agentreview.paper_review_player import PaperExtractorPlayer, AreaChair, Reviewer
|
| 8 |
+
from agentreview.role_descriptions import get_ac_config, get_reviewer_player_config, get_author_config, \
|
| 9 |
+
get_paper_extractor_config
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def initialize_players(experiment_setting: dict, args):
|
| 13 |
+
paper_id = experiment_setting['paper_id']
|
| 14 |
+
paper_decision = experiment_setting['paper_decision']
|
| 15 |
+
|
| 16 |
+
if args.task == "paper_decision":
|
| 17 |
+
experiment_setting["players"] = {k: v for k, v in experiment_setting["players"].items() if k.startswith("AC")}
|
| 18 |
+
|
| 19 |
+
players = []
|
| 20 |
+
|
| 21 |
+
for role, players_list in experiment_setting["players"].items():
|
| 22 |
+
|
| 23 |
+
for i, player_config in enumerate(players_list):
|
| 24 |
+
if role == "AC":
|
| 25 |
+
|
| 26 |
+
# For AC, `env_type` is either "paper_decision" or "paper_review"
|
| 27 |
+
player_config = get_ac_config(env_type=args.task,
|
| 28 |
+
scoring_method=args.ac_scoring_method,
|
| 29 |
+
num_papers_per_area_chair=args.num_papers_per_area_chair,
|
| 30 |
+
global_settings=experiment_setting['global_settings'],
|
| 31 |
+
acceptance_rate=args.acceptance_rate,
|
| 32 |
+
**player_config)
|
| 33 |
+
|
| 34 |
+
player_config['model'] = args.model_name
|
| 35 |
+
|
| 36 |
+
player = AreaChair(data_dir=args.data_dir,
|
| 37 |
+
conference=args.conference,
|
| 38 |
+
args=args,
|
| 39 |
+
**player_config)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
elif args.task == "paper_review":
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
if role == "Paper Extractor":
|
| 46 |
+
|
| 47 |
+
player_config = get_paper_extractor_config(global_settings=experiment_setting['global_settings'])
|
| 48 |
+
|
| 49 |
+
player = PaperExtractorPlayer(data_dir=args.data_dir, paper_id=paper_id,
|
| 50 |
+
paper_decision=paper_decision,
|
| 51 |
+
args=args,
|
| 52 |
+
conference=args.conference, **player_config)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
elif role == "Author":
|
| 57 |
+
|
| 58 |
+
# Author requires no behavior customization.
|
| 59 |
+
# So we directly use the Player class
|
| 60 |
+
player_config = get_author_config()
|
| 61 |
+
player = Player(data_dir=args.data_dir,
|
| 62 |
+
conference=args.conference,
|
| 63 |
+
args=args,
|
| 64 |
+
**player_config)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
elif role == "Reviewer":
|
| 69 |
+
player_config = get_reviewer_player_config(reviewer_index=i + 1,
|
| 70 |
+
global_settings=experiment_setting['global_settings'],
|
| 71 |
+
**player_config)
|
| 72 |
+
player_config['model'] = args.model_name
|
| 73 |
+
player = Reviewer(data_dir=args.data_dir, conference=args.conference, args=args, **player_config)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
else:
|
| 77 |
+
raise NotImplementedError(f"Unknown role for paper review (stage 1-4): {role}")
|
| 78 |
+
|
| 79 |
+
else:
|
| 80 |
+
raise NotImplementedError(f"Unknown role for paper decision (stage 5): {role}")
|
| 81 |
+
|
| 82 |
+
players.append(player)
|
| 83 |
+
|
| 84 |
+
return players
|
{utility β agentreview/utility}/general_utils.py
RENAMED
|
File without changes
|
{utility β agentreview/utility}/metrics_utils.py
RENAMED
|
File without changes
|
{utility β agentreview/utility}/text_utils.py
RENAMED
|
File without changes
|
{utility β agentreview/utility}/utils.py
RENAMED
|
@@ -9,8 +9,8 @@ from typing import Union, List, Dict, Tuple
|
|
| 9 |
import numpy as np
|
| 10 |
import pandas as pd
|
| 11 |
|
| 12 |
-
import const
|
| 13 |
-
from utility.general_utils import check_cwd, set_seed
|
| 14 |
|
| 15 |
|
| 16 |
def generate_num_papers_to_accept(n, batch_number, shuffle=True):
|
|
@@ -36,25 +36,25 @@ def generate_num_papers_to_accept(n, batch_number, shuffle=True):
|
|
| 36 |
return array
|
| 37 |
|
| 38 |
|
| 39 |
-
def
|
| 40 |
-
|
| 41 |
|
| 42 |
-
num_papers = sum([len(batch) for batch in
|
| 43 |
|
| 44 |
if num_papers == 0:
|
| 45 |
raise ValueError("No papers found in batch")
|
| 46 |
|
| 47 |
-
num_papers_to_accept = generate_num_papers_to_accept(n=
|
| 48 |
-
batch_number=len(
|
| 49 |
|
| 50 |
-
for idx_batch, batch in enumerate(
|
| 51 |
tups = sorted([(paper_id, rank) for paper_id, rank in batch.items()], key=lambda x: x[1], reverse=False)
|
| 52 |
|
| 53 |
paper_ids = [int(paper_id) for paper_id, rank in tups]
|
| 54 |
|
| 55 |
-
|
| 56 |
|
| 57 |
-
return
|
| 58 |
|
| 59 |
|
| 60 |
def get_paper_decision_mapping(data_dir: str, conference: str, verbose: bool = False):
|
|
@@ -151,6 +151,8 @@ def get_rebuttal_dir(output_dir: str,
|
|
| 151 |
|
| 152 |
|
| 153 |
def print_colored(text, color='red'):
|
|
|
|
|
|
|
| 154 |
foreground_colors = {
|
| 155 |
'black': 30,
|
| 156 |
'red': 31,
|
|
@@ -161,7 +163,16 @@ def print_colored(text, color='red'):
|
|
| 161 |
'cyan': 36,
|
| 162 |
'white': 37,
|
| 163 |
}
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
|
| 167 |
def get_ac_decision_path(output_dir: str, conference: str, model_name: str, ac_scoring_method: str, experiment_name:
|
|
@@ -351,71 +362,116 @@ def get_experiment_names(conference: str = "ICLR2023"):
|
|
| 351 |
return experiment_names
|
| 352 |
|
| 353 |
|
| 354 |
-
def
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
|
|
|
|
|
|
|
|
|
|
| 361 |
print("=" * 30)
|
| 362 |
print(f"Experiment Name: {experiment_name}")
|
| 363 |
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
|
|
|
|
|
|
|
|
|
| 369 |
|
| 370 |
-
paper_ids = sorted(
|
| 371 |
-
|
|
|
|
| 372 |
|
| 373 |
if ac_scoring_method == "ranking":
|
| 374 |
-
|
| 375 |
-
|
| 376 |
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
# True means accept, False means reject
|
| 380 |
-
decisions_gpt4 = np.array(
|
| 381 |
-
[True if paper_id in papers_accepted_by_gpt4 else False for paper_id in paper_ids])
|
| 382 |
|
| 383 |
elif ac_scoring_method == "recommendation":
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
[
|
| 387 |
-
|
| 388 |
-
|
| 389 |
else:
|
| 390 |
-
raise NotImplementedError
|
| 391 |
|
| 392 |
-
return
|
| 393 |
|
| 394 |
|
| 395 |
-
def
|
| 396 |
-
|
| 397 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
|
| 399 |
if osp.exists(path):
|
| 400 |
-
|
|
|
|
| 401 |
print(f"Loaded {len(ac_decision)} batches of existing AC decisions from {path}")
|
| 402 |
-
|
| 403 |
else:
|
| 404 |
ac_decision = []
|
| 405 |
print(f"No existing AC decisions found at {path}")
|
| 406 |
|
| 407 |
-
ac_decision = [batch for batch in ac_decision if
|
| 408 |
|
| 409 |
for i, batch in enumerate(ac_decision):
|
| 410 |
if i != len(ac_decision) - 1:
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
|
| 416 |
return ac_decision
|
| 417 |
|
| 418 |
-
|
| 419 |
def write_to_excel(data, file_path, sheet_name):
|
| 420 |
"""
|
| 421 |
Write data to an Excel file.
|
|
@@ -436,7 +492,7 @@ def write_to_excel(data, file_path, sheet_name):
|
|
| 436 |
data.to_excel(writer, sheet_name=sheet_name, index=False)
|
| 437 |
|
| 438 |
|
| 439 |
-
def
|
| 440 |
path = get_ac_decision_path(**kwargs)
|
| 441 |
|
| 442 |
json.dump(ac_decisions, open(path, 'w', encoding='utf-8'), indent=2)
|
|
|
|
| 9 |
import numpy as np
|
| 10 |
import pandas as pd
|
| 11 |
|
| 12 |
+
from agentreview import const
|
| 13 |
+
from agentreview.utility.general_utils import check_cwd, set_seed
|
| 14 |
|
| 15 |
|
| 16 |
def generate_num_papers_to_accept(n, batch_number, shuffle=True):
|
|
|
|
| 36 |
return array
|
| 37 |
|
| 38 |
|
| 39 |
+
def get_papers_accepted_by_llm(llm_ac_decisions, acceptance_rate: float) -> list:
|
| 40 |
+
papers_accepted_by_llm = []
|
| 41 |
|
| 42 |
+
num_papers = sum([len(batch) for batch in llm_ac_decisions])
|
| 43 |
|
| 44 |
if num_papers == 0:
|
| 45 |
raise ValueError("No papers found in batch")
|
| 46 |
|
| 47 |
+
num_papers_to_accept = generate_num_papers_to_accept(n=acceptance_rate * num_papers,
|
| 48 |
+
batch_number=len(llm_ac_decisions))
|
| 49 |
|
| 50 |
+
for idx_batch, batch in enumerate(llm_ac_decisions):
|
| 51 |
tups = sorted([(paper_id, rank) for paper_id, rank in batch.items()], key=lambda x: x[1], reverse=False)
|
| 52 |
|
| 53 |
paper_ids = [int(paper_id) for paper_id, rank in tups]
|
| 54 |
|
| 55 |
+
papers_accepted_by_llm += paper_ids[:num_papers_to_accept[idx_batch]]
|
| 56 |
|
| 57 |
+
return papers_accepted_by_llm
|
| 58 |
|
| 59 |
|
| 60 |
def get_paper_decision_mapping(data_dir: str, conference: str, verbose: bool = False):
|
|
|
|
| 151 |
|
| 152 |
|
| 153 |
def print_colored(text, color='red'):
|
| 154 |
+
|
| 155 |
+
# Dictionary of ANSI color codes for terminal
|
| 156 |
foreground_colors = {
|
| 157 |
'black': 30,
|
| 158 |
'red': 31,
|
|
|
|
| 163 |
'cyan': 36,
|
| 164 |
'white': 37,
|
| 165 |
}
|
| 166 |
+
try:
|
| 167 |
+
|
| 168 |
+
# get_ipython is specific to Jupyter and IPython.
|
| 169 |
+
# We use this to decide whether we are running a Jupyter notebook or not.
|
| 170 |
+
get_ipython
|
| 171 |
+
print(text) # Plain text in Jupyter
|
| 172 |
+
except:
|
| 173 |
+
# If not Jupyter, print with color codes
|
| 174 |
+
color_code = foreground_colors.get(color, 31) # Default to red if color not found
|
| 175 |
+
print(f"\033[{color_code}m{text}\033[0m")
|
| 176 |
|
| 177 |
|
| 178 |
def get_ac_decision_path(output_dir: str, conference: str, model_name: str, ac_scoring_method: str, experiment_name:
|
|
|
|
| 362 |
return experiment_names
|
| 363 |
|
| 364 |
|
| 365 |
+
def load_llm_ac_decisions_as_array(
|
| 366 |
+
output_dir: str,
|
| 367 |
+
experiment_name: str,
|
| 368 |
+
ac_scoring_method: str,
|
| 369 |
+
acceptance_rate: float,
|
| 370 |
+
conference: str,
|
| 371 |
+
model_name: str,
|
| 372 |
+
num_papers_per_area_chair: int
|
| 373 |
+
) -> Tuple[np.ndarray, np.ndarray]:
|
| 374 |
+
"""Loads and processes GPT-4 generated area chair (AC) decisions for an experiment.
|
| 375 |
+
|
| 376 |
+
Args:
|
| 377 |
+
experiment_name (str): Name of the experiment.
|
| 378 |
+
ac_scoring_method (str): Method used for AC scoring ('ranking' or 'recommendation').
|
| 379 |
+
acceptance_rate (float): Acceptance rate for the conference.
|
| 380 |
+
conference (str): Name of the conference.
|
| 381 |
+
model_name (str): Model name used to generate AC decisions.
|
| 382 |
+
num_papers_per_area_chair (int): Number of papers assigned to each area chair.
|
| 383 |
+
|
| 384 |
+
Returns:
|
| 385 |
+
Tuple[np.ndarray, np.ndarray]: An array of decisions (True for accept, False for reject)
|
| 386 |
+
and an array of paper IDs in the order processed.
|
| 387 |
|
| 388 |
+
Raises:
|
| 389 |
+
NotImplementedError: If `ac_scoring_method` is not 'ranking' or 'recommendation'.
|
| 390 |
+
"""
|
| 391 |
print("=" * 30)
|
| 392 |
print(f"Experiment Name: {experiment_name}")
|
| 393 |
|
| 394 |
+
llm_ac_decisions = load_llm_ac_decisions(
|
| 395 |
+
output_dir=output_dir,
|
| 396 |
+
conference=conference,
|
| 397 |
+
model_name=model_name,
|
| 398 |
+
ac_scoring_method=ac_scoring_method,
|
| 399 |
+
experiment_name=experiment_name,
|
| 400 |
+
num_papers_per_area_chair=num_papers_per_area_chair
|
| 401 |
+
)
|
| 402 |
|
| 403 |
+
paper_ids = sorted(
|
| 404 |
+
int(paper_id) for batch in llm_ac_decisions for paper_id in batch
|
| 405 |
+
)
|
| 406 |
|
| 407 |
if ac_scoring_method == "ranking":
|
| 408 |
+
if len(paper_ids) != len(set(paper_ids)):
|
| 409 |
+
raise ValueError(f"Duplicate paper_ids found in the AC decisions: {Counter(paper_ids)}")
|
| 410 |
|
| 411 |
+
papers_accepted_by_llm = get_papers_accepted_by_llm(llm_ac_decisions, acceptance_rate)
|
| 412 |
+
decisions_llm = np.array([paper_id in papers_accepted_by_llm for paper_id in paper_ids])
|
|
|
|
|
|
|
|
|
|
| 413 |
|
| 414 |
elif ac_scoring_method == "recommendation":
|
| 415 |
+
llm_ac_decisions_flat = {int(k): v for batch in llm_ac_decisions for k, v in batch.items()}
|
| 416 |
+
decisions_llm = np.array(
|
| 417 |
+
[llm_ac_decisions_flat[paper_id].startswith("Accept") for paper_id in paper_ids]
|
| 418 |
+
)
|
|
|
|
| 419 |
else:
|
| 420 |
+
raise NotImplementedError(f"Scoring method '{ac_scoring_method}' not implemented.")
|
| 421 |
|
| 422 |
+
return decisions_llm, np.array(paper_ids)
|
| 423 |
|
| 424 |
|
| 425 |
+
def load_llm_ac_decisions(
|
| 426 |
+
output_dir: str,
|
| 427 |
+
conference: str,
|
| 428 |
+
model_name: str,
|
| 429 |
+
ac_scoring_method: str,
|
| 430 |
+
experiment_name: str,
|
| 431 |
+
num_papers_per_area_chair: int
|
| 432 |
+
) -> List[Dict[str, str]]:
|
| 433 |
+
"""Loads GPT-4 generated area chair (AC) decisions from a specified path.
|
| 434 |
+
|
| 435 |
+
Args:
|
| 436 |
+
conference (str): Name of the conference.
|
| 437 |
+
model_name (str): Model name used to generate AC decisions.
|
| 438 |
+
ac_scoring_method (str): Method used for AC scoring ('ranking' or 'recommendation').
|
| 439 |
+
experiment_name (str): Name of the experiment.
|
| 440 |
+
num_papers_per_area_chair (int): Number of papers assigned to each area chair.
|
| 441 |
+
|
| 442 |
+
Returns:
|
| 443 |
+
List[Dict[str, str]]: List of batches, where each batch contains paper ID and decision.
|
| 444 |
+
|
| 445 |
+
Raises:
|
| 446 |
+
AssertionError: If a non-final batch has a paper count different from `num_papers_per_area_chair`.
|
| 447 |
+
"""
|
| 448 |
+
path = get_ac_decision_path(
|
| 449 |
+
output_dir=output_dir,
|
| 450 |
+
conference=conference,
|
| 451 |
+
model_name=model_name,
|
| 452 |
+
ac_scoring_method=ac_scoring_method,
|
| 453 |
+
experiment_name=experiment_name
|
| 454 |
+
)
|
| 455 |
|
| 456 |
if osp.exists(path):
|
| 457 |
+
with open(path, 'r', encoding='utf-8') as file:
|
| 458 |
+
ac_decision = json.load(file)
|
| 459 |
print(f"Loaded {len(ac_decision)} batches of existing AC decisions from {path}")
|
|
|
|
| 460 |
else:
|
| 461 |
ac_decision = []
|
| 462 |
print(f"No existing AC decisions found at {path}")
|
| 463 |
|
| 464 |
+
ac_decision = [batch for batch in ac_decision if batch] # Remove empty batches
|
| 465 |
|
| 466 |
for i, batch in enumerate(ac_decision):
|
| 467 |
if i != len(ac_decision) - 1:
|
| 468 |
+
if len(batch) != num_papers_per_area_chair:
|
| 469 |
+
raise AssertionError(
|
| 470 |
+
f"Batch {i} has {len(batch)} papers, expected {num_papers_per_area_chair} for non-final batches."
|
| 471 |
+
)
|
| 472 |
|
| 473 |
return ac_decision
|
| 474 |
|
|
|
|
| 475 |
def write_to_excel(data, file_path, sheet_name):
|
| 476 |
"""
|
| 477 |
Write data to an Excel file.
|
|
|
|
| 492 |
data.to_excel(writer, sheet_name=sheet_name, index=False)
|
| 493 |
|
| 494 |
|
| 495 |
+
def save_llm_ac_decisions(ac_decisions: List[dict], **kwargs):
|
| 496 |
path = get_ac_decision_path(**kwargs)
|
| 497 |
|
| 498 |
json.dump(ac_decisions, open(path, 'w', encoding='utf-8'), indent=2)
|
data
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../agent4reviews/data
|
demo.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# coding: utf-8
|
| 3 |
+
|
| 4 |
+
# # AgentReview
|
| 5 |
+
#
|
| 6 |
+
#
|
| 7 |
+
#
|
| 8 |
+
# In this tutorial, you will explore customizing the AgentReview experiment.
|
| 9 |
+
#
|
| 10 |
+
# π Venue: EMNLP 2024 (Oral)
|
| 11 |
+
#
|
| 12 |
+
# π arXiv: [https://arxiv.org/abs/2406.12708](https://arxiv.org/abs/2406.12708)
|
| 13 |
+
#
|
| 14 |
+
# π Website: [https://agentreview.github.io/](https://agentreview.github.io/)
|
| 15 |
+
#
|
| 16 |
+
# ```bibtex
|
| 17 |
+
# @inproceedings{jin2024agentreview,
|
| 18 |
+
# title={AgentReview: Exploring Peer Review Dynamics with LLM Agents},
|
| 19 |
+
# author={Jin, Yiqiao and Zhao, Qinlin and Wang, Yiyang and Chen, Hao and Zhu, Kaijie and Xiao, Yijia and Wang, Jindong},
|
| 20 |
+
# booktitle={EMNLP},
|
| 21 |
+
# year={2024}
|
| 22 |
+
# }
|
| 23 |
+
# ```
|
| 24 |
+
#
|
| 25 |
+
|
| 26 |
+
# In[2]:
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
import os
|
| 30 |
+
|
| 31 |
+
import numpy as np
|
| 32 |
+
|
| 33 |
+
from agentreview import const
|
| 34 |
+
|
| 35 |
+
os.environ["OPENAI_API_VERSION"] = "2024-06-01-preview"
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# ## Overview
|
| 39 |
+
#
|
| 40 |
+
# AgentReview features a range of customizable variables, such as characteristics of reviewers, authors, area chairs (ACs), as well as the reviewing mechanisms
|
| 41 |
+
|
| 42 |
+
# In[3]:
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# ## Review Pipeline
|
| 47 |
+
#
|
| 48 |
+
# The simulation adopts a structured, 5-phase pipeline (Section 2 in the [paper](https://arxiv.org/abs/2406.12708)):
|
| 49 |
+
#
|
| 50 |
+
# * **I. Reviewer Assessment.** Each manuscript is evaluated by three reviewers independently.
|
| 51 |
+
# * **II. Author-Reviewer Discussion.** Authors submit rebuttals to address reviewers' concerns;
|
| 52 |
+
# * **III. Reviewer-AC Discussion.** The AC facilitates discussions among reviewers, prompting updates to their initial assessments.
|
| 53 |
+
# * **IV. Meta-Review Compilation.** The AC synthesizes the discussions into a meta-review.
|
| 54 |
+
# * **V. Paper Decision.** The AC makes the final decision on whether to accept or reject the paper, based on all gathered inputs.
|
| 55 |
+
|
| 56 |
+
# In[2]:
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# In[4]:
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
import os
|
| 64 |
+
|
| 65 |
+
if os.path.basename(os.getcwd()) == "notebooks":
|
| 66 |
+
os.chdir("..")
|
| 67 |
+
# Change the working directory to AgentReview
|
| 68 |
+
print(f"Changing the current working directory to {os.path.basename(os.getcwd())}")
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
# In[5]:
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
from argparse import Namespace
|
| 75 |
+
|
| 76 |
+
args = Namespace(openai_key=None,
|
| 77 |
+
deployment=None,
|
| 78 |
+
openai_client_type='azure_openai',
|
| 79 |
+
endpoint=None,
|
| 80 |
+
api_version='2023-05-15',
|
| 81 |
+
ac_scoring_method='ranking',
|
| 82 |
+
conference='ICLR2024',
|
| 83 |
+
num_reviewers_per_paper=3,
|
| 84 |
+
ignore_missing_metareviews=False,
|
| 85 |
+
overwrite=False,
|
| 86 |
+
num_papers_per_area_chair=10,
|
| 87 |
+
model_name='gpt-4o',
|
| 88 |
+
output_dir='outputs',
|
| 89 |
+
max_num_words=16384,
|
| 90 |
+
visual_dir='outputs/visual',
|
| 91 |
+
device='cuda',
|
| 92 |
+
data_dir='./data', # Directory to all paper PDF
|
| 93 |
+
acceptance_rate=0.32,
|
| 94 |
+
task='paper_review')
|
| 95 |
+
|
| 96 |
+
os.environ['OPENAI_API_VERSION'] = args.api_version
|
| 97 |
+
|
| 98 |
+
# In[13]:
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
malicious_Rx1_setting = {
|
| 102 |
+
"AC": [
|
| 103 |
+
"BASELINE"
|
| 104 |
+
],
|
| 105 |
+
|
| 106 |
+
"reviewer": [
|
| 107 |
+
"malicious",
|
| 108 |
+
"BASELINE",
|
| 109 |
+
"BASELINE"
|
| 110 |
+
],
|
| 111 |
+
|
| 112 |
+
"author": [
|
| 113 |
+
"BASELINE"
|
| 114 |
+
],
|
| 115 |
+
"global_settings":{
|
| 116 |
+
"provides_numeric_rating": ['reviewer', 'ac'],
|
| 117 |
+
"persons_aware_of_authors_identities": []
|
| 118 |
+
}
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
all_settings = {"malicious_Rx1": malicious_Rx1_setting}
|
| 122 |
+
args.experiment_name = "malicious_Rx1_setting"
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
#
|
| 126 |
+
# `malicious_Rx1` means 1 reviewer is a malicious reviewer, and the other reviewers are default (i.e. `BASELINE`) reviewers.
|
| 127 |
+
#
|
| 128 |
+
#
|
| 129 |
+
|
| 130 |
+
# ## Reviews
|
| 131 |
+
#
|
| 132 |
+
# Define the review pipeline
|
| 133 |
+
|
| 134 |
+
# In[10]:
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
from agentreview.environments import PaperReview
|
| 138 |
+
|
| 139 |
+
def review_one_paper(paper_id, setting):
|
| 140 |
+
paper_decision = paper_id2decision[paper_id]
|
| 141 |
+
|
| 142 |
+
experiment_setting = get_experiment_settings(paper_id=paper_id,
|
| 143 |
+
paper_decision=paper_decision,
|
| 144 |
+
setting=setting)
|
| 145 |
+
print(f"Paper ID: {paper_id} (Decision in {args.conference}: {paper_decision})")
|
| 146 |
+
|
| 147 |
+
players = initialize_players(experiment_setting=experiment_setting, args=args)
|
| 148 |
+
|
| 149 |
+
player_names = [player.name for player in players]
|
| 150 |
+
|
| 151 |
+
env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id,
|
| 152 |
+
args=args, experiment_setting=experiment_setting)
|
| 153 |
+
|
| 154 |
+
arena = PaperReviewArena(players=players, environment=env, args=args)
|
| 155 |
+
arena.launch_cli(interactive=False)
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
# In[11]:
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
import os
|
| 162 |
+
import sys
|
| 163 |
+
|
| 164 |
+
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "agentreview")))
|
| 165 |
+
|
| 166 |
+
from agentreview.paper_review_settings import get_experiment_settings
|
| 167 |
+
from agentreview.paper_review_arena import PaperReviewArena
|
| 168 |
+
from agentreview.utility.experiment_utils import initialize_players
|
| 169 |
+
from agentreview.utility.utils import project_setup, get_paper_decision_mapping
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
# In[14]:
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
sampled_paper_ids = [39]
|
| 176 |
+
|
| 177 |
+
paper_id2decision, paper_decision2ids = get_paper_decision_mapping(args.data_dir, args.conference)
|
| 178 |
+
|
| 179 |
+
for paper_id in sampled_paper_ids:
|
| 180 |
+
review_one_paper(paper_id, malicious_Rx1_setting)
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def run_paper_decision():
|
| 185 |
+
args.task = "paper_decision"
|
| 186 |
+
|
| 187 |
+
# Make sure the same set of papers always go through the same AC no matter which setting we choose
|
| 188 |
+
NUM_PAPERS = len(const.year2paper_ids[args.conference])
|
| 189 |
+
order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False)
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
# Paper IDs we actually used in experiments
|
| 193 |
+
experimental_paper_ids = []
|
| 194 |
+
|
| 195 |
+
# For papers that have not been decided yet, load their metareviews
|
| 196 |
+
metareviews = []
|
| 197 |
+
print("Shuffling paper IDs")
|
| 198 |
+
sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order]
|
| 199 |
+
|
| 200 |
+
# Exclude papers that already have AC decisions
|
| 201 |
+
existing_ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir,
|
| 202 |
+
conference=args.conference,
|
| 203 |
+
model_name=args.model_name,
|
| 204 |
+
ac_scoring_method=args.ac_scoring_method,
|
| 205 |
+
experiment_name=args.experiment_name,
|
| 206 |
+
num_papers_per_area_chair=args.num_papers_per_area_chair)
|
| 207 |
+
|
| 208 |
+
sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions]
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
# In[ ]:
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
|
notebooks/demo.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
CHANGED
|
@@ -16,4 +16,4 @@ transformers
|
|
| 16 |
tenacity
|
| 17 |
openai
|
| 18 |
gradio
|
| 19 |
-
|
|
|
|
| 16 |
tenacity
|
| 17 |
openai
|
| 18 |
gradio
|
| 19 |
+
jupyter
|
run_paper_decision_cli.py
CHANGED
|
@@ -6,17 +6,15 @@ import numpy as np
|
|
| 6 |
|
| 7 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 8 |
|
| 9 |
-
import const
|
|
|
|
| 10 |
from agentreview.experiment_config import all_settings
|
| 11 |
from agentreview.paper_review_settings import get_experiment_settings
|
| 12 |
-
from agentreview.config import AgentConfig
|
| 13 |
from agentreview.environments import PaperDecision
|
| 14 |
from agentreview.paper_review_arena import PaperReviewArena
|
| 15 |
-
from agentreview.
|
| 16 |
-
from
|
| 17 |
-
|
| 18 |
-
from utility.utils import project_setup, get_paper_decision_mapping, \
|
| 19 |
-
load_metareview, load_gpt4_generated_ac_decisions
|
| 20 |
|
| 21 |
# Set up logging configuration
|
| 22 |
logging.basicConfig(
|
|
@@ -27,6 +25,8 @@ logging.basicConfig(
|
|
| 27 |
]
|
| 28 |
)
|
| 29 |
|
|
|
|
|
|
|
| 30 |
|
| 31 |
def main(args):
|
| 32 |
"""
|
|
@@ -46,18 +46,16 @@ def main(args):
|
|
| 46 |
NUM_PAPERS = len(const.year2paper_ids[args.conference])
|
| 47 |
order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False)
|
| 48 |
|
| 49 |
-
metareviews = []
|
| 50 |
-
|
| 51 |
# Paper IDs we actually used in experiments
|
| 52 |
experimental_paper_ids = []
|
| 53 |
|
| 54 |
# For papers that have not been decided yet, load their metareviews
|
| 55 |
-
|
| 56 |
print("Shuffling paper IDs")
|
| 57 |
sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order]
|
| 58 |
|
| 59 |
# Exclude papers that already have AC decisions
|
| 60 |
-
existing_ac_decisions =
|
| 61 |
conference=args.conference,
|
| 62 |
model_name=args.model_name,
|
| 63 |
ac_scoring_method=args.ac_scoring_method,
|
|
@@ -68,65 +66,62 @@ def main(args):
|
|
| 68 |
|
| 69 |
sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions]
|
| 70 |
|
| 71 |
-
|
|
|
|
| 72 |
|
| 73 |
-
|
| 74 |
|
| 75 |
for paper_id in sampled_paper_ids:
|
| 76 |
|
| 77 |
-
experiment_setting = get_experiment_settings(all_settings[args.experiment_name])
|
| 78 |
-
|
| 79 |
# Load meta-reviews
|
| 80 |
metareview = load_metareview(output_dir=args.output_dir, paper_id=paper_id,
|
| 81 |
experiment_name=args.experiment_name,
|
| 82 |
model_name=args.model_name, conference=args.conference)
|
| 83 |
|
| 84 |
if metareview is None:
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
f"completely filtered out due to content policy. "
|
| 90 |
-
f"Loading the BASELINE metareview...")
|
| 91 |
-
|
| 92 |
-
metareview = load_metareview(paper_id=paper_id, experiment_name="BASELINE",
|
| 93 |
-
model_name=args.model_name, conference=args.conference)
|
| 94 |
|
| 95 |
-
|
| 96 |
-
raise ValueError(f"Metareview for {paper_id} does not exist")
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
|
| 101 |
num_batches = len(experimental_paper_ids) // args.num_papers_per_area_chair
|
| 102 |
|
| 103 |
for batch_index in range(num_batches):
|
| 104 |
-
experiment_setting["players"] = {k: v for k, v in experiment_setting["players"].items() if k.startswith("AC")}
|
| 105 |
-
|
| 106 |
-
players = []
|
| 107 |
-
|
| 108 |
-
for role, players_li in experiment_setting["players"].items():
|
| 109 |
-
|
| 110 |
-
for i, player_config in enumerate(players_li):
|
| 111 |
-
|
| 112 |
-
# This phase should only contain the Area Chair
|
| 113 |
-
if role == "AC":
|
| 114 |
-
|
| 115 |
-
player_config = get_ac_config(env_type="paper_decision",
|
| 116 |
-
scoring_method=args.ac_scoring_method,
|
| 117 |
-
num_papers_per_area_chair=args.num_papers_per_area_chair,
|
| 118 |
-
global_settings=experiment_setting['global_settings'],
|
| 119 |
-
acceptance_rate=args.acceptance_rate
|
| 120 |
-
**player_config)
|
| 121 |
-
|
| 122 |
-
player_config = AgentConfig(**player_config)
|
| 123 |
-
player_config['model'] = args.model_name
|
| 124 |
-
player = AreaChair(**player_config)
|
| 125 |
-
|
| 126 |
-
else:
|
| 127 |
-
raise NotImplementedError(f"Unknown role: {role}")
|
| 128 |
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
player_names = [player.name for player in players]
|
| 132 |
|
|
@@ -141,7 +136,7 @@ def main(args):
|
|
| 141 |
metareviews=metareviews,
|
| 142 |
experiment_setting=experiment_setting, ac_scoring_method=args.ac_scoring_method)
|
| 143 |
|
| 144 |
-
arena = PaperReviewArena(players=players, environment=env, args=args)
|
| 145 |
arena.launch_cli(interactive=False)
|
| 146 |
|
| 147 |
|
|
|
|
| 6 |
|
| 7 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 8 |
|
| 9 |
+
from agentreview import const
|
| 10 |
+
from agentreview.utility.experiment_utils import initialize_players
|
| 11 |
from agentreview.experiment_config import all_settings
|
| 12 |
from agentreview.paper_review_settings import get_experiment_settings
|
|
|
|
| 13 |
from agentreview.environments import PaperDecision
|
| 14 |
from agentreview.paper_review_arena import PaperReviewArena
|
| 15 |
+
from agentreview.arguments import parse_args
|
| 16 |
+
from agentreview.utility.utils import project_setup, get_paper_decision_mapping, \
|
| 17 |
+
load_metareview, load_llm_ac_decisions
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# Set up logging configuration
|
| 20 |
logging.basicConfig(
|
|
|
|
| 25 |
]
|
| 26 |
)
|
| 27 |
|
| 28 |
+
logger = logging.getLogger(__name__)
|
| 29 |
+
|
| 30 |
|
| 31 |
def main(args):
|
| 32 |
"""
|
|
|
|
| 46 |
NUM_PAPERS = len(const.year2paper_ids[args.conference])
|
| 47 |
order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False)
|
| 48 |
|
|
|
|
|
|
|
| 49 |
# Paper IDs we actually used in experiments
|
| 50 |
experimental_paper_ids = []
|
| 51 |
|
| 52 |
# For papers that have not been decided yet, load their metareviews
|
| 53 |
+
metareviews = []
|
| 54 |
print("Shuffling paper IDs")
|
| 55 |
sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order]
|
| 56 |
|
| 57 |
# Exclude papers that already have AC decisions
|
| 58 |
+
existing_ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir,
|
| 59 |
conference=args.conference,
|
| 60 |
model_name=args.model_name,
|
| 61 |
ac_scoring_method=args.ac_scoring_method,
|
|
|
|
| 66 |
|
| 67 |
sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions]
|
| 68 |
|
| 69 |
+
experiment_setting = get_experiment_settings(paper_id=None, paper_decision=None, setting=all_settings[
|
| 70 |
+
args.experiment_name])
|
| 71 |
|
| 72 |
+
logger.info(f"Loading metareview!")
|
| 73 |
|
| 74 |
for paper_id in sampled_paper_ids:
|
| 75 |
|
|
|
|
|
|
|
| 76 |
# Load meta-reviews
|
| 77 |
metareview = load_metareview(output_dir=args.output_dir, paper_id=paper_id,
|
| 78 |
experiment_name=args.experiment_name,
|
| 79 |
model_name=args.model_name, conference=args.conference)
|
| 80 |
|
| 81 |
if metareview is None:
|
| 82 |
+
print(f"Metareview for {paper_id} does not exist. This may happen because the conversation is "
|
| 83 |
+
f"completely filtered out due to content policy. "
|
| 84 |
+
f"Loading the BASELINE metareview...")
|
| 85 |
|
| 86 |
+
metareview = load_metareview(output_dir=args.output_dir, paper_id=paper_id,
|
| 87 |
+
experiment_name="BASELINE",
|
| 88 |
+
model_name=args.model_name, conference=args.conference)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
+
if metareview is not None:
|
|
|
|
| 91 |
|
| 92 |
+
metareviews += [metareview]
|
| 93 |
+
experimental_paper_ids += [paper_id]
|
| 94 |
|
| 95 |
num_batches = len(experimental_paper_ids) // args.num_papers_per_area_chair
|
| 96 |
|
| 97 |
for batch_index in range(num_batches):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
+
players = initialize_players(experiment_setting=experiment_setting, args=args)
|
| 100 |
+
|
| 101 |
+
# players = []
|
| 102 |
+
#
|
| 103 |
+
# for role, players_li in experiment_setting["players"].items():
|
| 104 |
+
#
|
| 105 |
+
# for i, player_config in enumerate(players_li):
|
| 106 |
+
#
|
| 107 |
+
# # This phase should only contain the Area Chair
|
| 108 |
+
# if role == "AC":
|
| 109 |
+
#
|
| 110 |
+
# player_config = get_ac_config(env_type="paper_decision",
|
| 111 |
+
# scoring_method=args.ac_scoring_method,
|
| 112 |
+
# num_papers_per_area_chair=args.num_papers_per_area_chair,
|
| 113 |
+
# global_settings=experiment_setting['global_settings'],
|
| 114 |
+
# acceptance_rate=args.acceptance_rate
|
| 115 |
+
# ** player_config)
|
| 116 |
+
#
|
| 117 |
+
# # player_config = AgentConfig(**player_config)
|
| 118 |
+
# player_config['model'] = args.model_name
|
| 119 |
+
# player = AreaChair(**player_config)
|
| 120 |
+
#
|
| 121 |
+
# else:
|
| 122 |
+
# raise NotImplementedError(f"Unknown role: {role}")
|
| 123 |
+
#
|
| 124 |
+
# players.append(player)
|
| 125 |
|
| 126 |
player_names = [player.name for player in players]
|
| 127 |
|
|
|
|
| 136 |
metareviews=metareviews,
|
| 137 |
experiment_setting=experiment_setting, ac_scoring_method=args.ac_scoring_method)
|
| 138 |
|
| 139 |
+
arena = PaperReviewArena(players=players, environment=env, args=args, global_prompt=const.GLOBAL_PROMPT)
|
| 140 |
arena.launch_cli(interactive=False)
|
| 141 |
|
| 142 |
|
run_paper_review_cli.py
CHANGED
|
@@ -4,18 +4,17 @@ import os
|
|
| 4 |
import sys
|
| 5 |
from argparse import Namespace
|
| 6 |
|
|
|
|
| 7 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 8 |
|
| 9 |
-
from
|
|
|
|
| 10 |
from agentreview.experiment_config import all_settings
|
| 11 |
-
from agentreview.agent import Player
|
| 12 |
from agentreview.environments import PaperReview
|
| 13 |
from agentreview.paper_review_settings import get_experiment_settings
|
| 14 |
from agentreview.paper_review_arena import PaperReviewArena
|
| 15 |
-
from agentreview.
|
| 16 |
-
from agentreview.
|
| 17 |
-
get_paper_extractor_config
|
| 18 |
-
from utility.utils import project_setup, get_paper_decision_mapping
|
| 19 |
|
| 20 |
# Set up logging configuration
|
| 21 |
logging.basicConfig(
|
|
@@ -53,81 +52,84 @@ def main(args: Namespace):
|
|
| 53 |
sampled_paper_ids = [int(os.path.basename(p).split(".pdf")[0]) for p in paper_paths if p.endswith(".pdf")]
|
| 54 |
|
| 55 |
for paper_id in sampled_paper_ids:
|
| 56 |
-
|
| 57 |
-
experiment_setting = get_experiment_settings(all_settings[args.experiment_name])
|
| 58 |
-
|
| 59 |
# Ground-truth decision in the conference.
|
| 60 |
# We use this to partition the papers into different quality.
|
| 61 |
paper_decision = paper_id2decision[paper_id]
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id,
|
| 128 |
args=args, experiment_setting=experiment_setting)
|
| 129 |
|
| 130 |
-
arena = PaperReviewArena(players=players, environment=env, args=args)
|
| 131 |
arena.launch_cli(interactive=False)
|
| 132 |
|
| 133 |
logger.info("Done!")
|
|
|
|
| 4 |
import sys
|
| 5 |
from argparse import Namespace
|
| 6 |
|
| 7 |
+
|
| 8 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 9 |
|
| 10 |
+
from agentreview import const
|
| 11 |
+
from agentreview.arguments import parse_args
|
| 12 |
from agentreview.experiment_config import all_settings
|
|
|
|
| 13 |
from agentreview.environments import PaperReview
|
| 14 |
from agentreview.paper_review_settings import get_experiment_settings
|
| 15 |
from agentreview.paper_review_arena import PaperReviewArena
|
| 16 |
+
from agentreview.utility.experiment_utils import initialize_players
|
| 17 |
+
from agentreview.utility.utils import project_setup, get_paper_decision_mapping
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# Set up logging configuration
|
| 20 |
logging.basicConfig(
|
|
|
|
| 52 |
sampled_paper_ids = [int(os.path.basename(p).split(".pdf")[0]) for p in paper_paths if p.endswith(".pdf")]
|
| 53 |
|
| 54 |
for paper_id in sampled_paper_ids:
|
|
|
|
|
|
|
|
|
|
| 55 |
# Ground-truth decision in the conference.
|
| 56 |
# We use this to partition the papers into different quality.
|
| 57 |
paper_decision = paper_id2decision[paper_id]
|
| 58 |
|
| 59 |
+
experiment_setting = get_experiment_settings(paper_id=paper_id,
|
| 60 |
+
paper_decision=paper_decision,
|
| 61 |
+
setting=all_settings[args.experiment_name])
|
| 62 |
+
|
| 63 |
+
logger.info(f"Experiment Started!")
|
| 64 |
+
logger.info(f"Paper ID: {paper_id} (Decision in {args.conference}: {paper_decision})")
|
| 65 |
+
|
| 66 |
+
players = initialize_players(experiment_setting=experiment_setting, args=args)
|
| 67 |
+
|
| 68 |
+
player_names = [player.name for player in players]
|
| 69 |
+
|
| 70 |
+
# for role, players_list in experiment_setting["players"].items():
|
| 71 |
+
#
|
| 72 |
+
# for i, player_config in enumerate(players_list):
|
| 73 |
+
# if role == "Paper Extractor":
|
| 74 |
+
#
|
| 75 |
+
# player_config = get_paper_extractor_config(global_settings=experiment_setting['global_settings'], )
|
| 76 |
+
#
|
| 77 |
+
# player = PaperExtractorPlayer(data_dir=args.data_dir, paper_id=paper_id,
|
| 78 |
+
# paper_decision=paper_decision,
|
| 79 |
+
# args=args,
|
| 80 |
+
# conference=args.conference, **player_config)
|
| 81 |
+
#
|
| 82 |
+
# player_names.append(player.name)
|
| 83 |
+
#
|
| 84 |
+
#
|
| 85 |
+
# elif role == "AC":
|
| 86 |
+
#
|
| 87 |
+
# player_config = get_ac_config(env_type="paper_review",
|
| 88 |
+
# scoring_method=args.ac_scoring_method,
|
| 89 |
+
# num_papers_per_area_chair=args.num_papers_per_area_chair,
|
| 90 |
+
# global_settings=experiment_setting['global_settings'],
|
| 91 |
+
# acceptance_rate=args.acceptance_rate,
|
| 92 |
+
# **player_config)
|
| 93 |
+
#
|
| 94 |
+
# player_config['model'] = args.model_name
|
| 95 |
+
#
|
| 96 |
+
# player = AreaChair(data_dir=args.data_dir,
|
| 97 |
+
# conference=args.conference,
|
| 98 |
+
# args=args,
|
| 99 |
+
# **player_config)
|
| 100 |
+
#
|
| 101 |
+
# player_names.append(player.name)
|
| 102 |
+
#
|
| 103 |
+
#
|
| 104 |
+
# elif role == "Author":
|
| 105 |
+
#
|
| 106 |
+
# # Author requires no behavior customization.
|
| 107 |
+
# # So we directly use the Player class
|
| 108 |
+
# player_config = get_author_config()
|
| 109 |
+
# player = Player(data_dir=args.data_dir,
|
| 110 |
+
# conference=args.conference,
|
| 111 |
+
# args=args,
|
| 112 |
+
# **player_config)
|
| 113 |
+
#
|
| 114 |
+
# player_names.append(player.name)
|
| 115 |
+
#
|
| 116 |
+
# elif role == "Reviewer":
|
| 117 |
+
# player_config = get_reviewer_player_config(reviewer_index=i + 1,
|
| 118 |
+
# global_settings=experiment_setting['global_settings'],
|
| 119 |
+
# **player_config)
|
| 120 |
+
# player_config['model'] = args.model_name
|
| 121 |
+
# player = Reviewer(data_dir=args.data_dir, conference=args.conference, **player_config)
|
| 122 |
+
# player_names.append(player.name)
|
| 123 |
+
#
|
| 124 |
+
# else:
|
| 125 |
+
# raise NotImplementedError(f"Unknown role: {role}")
|
| 126 |
+
#
|
| 127 |
+
# players.append(player)
|
| 128 |
|
| 129 |
env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id,
|
| 130 |
args=args, experiment_setting=experiment_setting)
|
| 131 |
|
| 132 |
+
arena = PaperReviewArena(players=players, environment=env, args=args, global_prompt=const.GLOBAL_PROMPT)
|
| 133 |
arena.launch_cli(interactive=False)
|
| 134 |
|
| 135 |
logger.info("Done!")
|