Spaces:
Running
Running
Delete chat_reviewer.py
Browse files- chat_reviewer.py +0 -183
chat_reviewer.py
DELETED
@@ -1,183 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import os
|
3 |
-
import re
|
4 |
-
import datetime
|
5 |
-
import time
|
6 |
-
import openai, tenacity
|
7 |
-
import argparse
|
8 |
-
import configparser
|
9 |
-
import json
|
10 |
-
import tiktoken
|
11 |
-
from get_paper_from_pdf import Paper
|
12 |
-
|
13 |
-
# 定义Reviewer类
|
14 |
-
class Reviewer:
|
15 |
-
# 初始化方法,设置属性
|
16 |
-
def __init__(self, args=None):
|
17 |
-
if args.language == 'en':
|
18 |
-
self.language = 'English'
|
19 |
-
elif args.language == 'zh':
|
20 |
-
self.language = 'Chinese'
|
21 |
-
else:
|
22 |
-
self.language = 'Chinese'
|
23 |
-
# 创建一个ConfigParser对象
|
24 |
-
self.config = configparser.ConfigParser()
|
25 |
-
# 读取配置文件
|
26 |
-
self.config.read('apikey.ini')
|
27 |
-
# 获取某个键对应的值
|
28 |
-
self.chat_api_list = self.config.get('OpenAI', 'OPENAI_API_KEYS')[1:-1].replace('\'', '').split(',')
|
29 |
-
self.chat_api_list = [api.strip() for api in self.chat_api_list if len(api) > 5]
|
30 |
-
self.cur_api = 0
|
31 |
-
self.file_format = args.file_format
|
32 |
-
self.max_token_num = 4096
|
33 |
-
self.encoding = tiktoken.get_encoding("gpt2")
|
34 |
-
|
35 |
-
def validateTitle(self, title):
|
36 |
-
# 修正论文的路径格式
|
37 |
-
rstr = r"[\/\\\:\*\?\"\<\>\|]" # '/ \ : * ? " < > |'
|
38 |
-
new_title = re.sub(rstr, "_", title) # 替换为下划线
|
39 |
-
return new_title
|
40 |
-
|
41 |
-
|
42 |
-
def review_by_chatgpt(self, paper_list):
|
43 |
-
htmls = []
|
44 |
-
for paper_index, paper in enumerate(paper_list):
|
45 |
-
sections_of_interest = self.stage_1(paper)
|
46 |
-
# extract the essential parts of the paper
|
47 |
-
text = ''
|
48 |
-
text += 'Title:' + paper.title + '. '
|
49 |
-
text += 'Abstract: ' + paper.section_texts['Abstract']
|
50 |
-
intro_title = next((item for item in paper.section_names if 'ntroduction' in item), None)
|
51 |
-
if intro_title is not None:
|
52 |
-
text += 'Introduction: ' + paper.section_texts[intro_title]
|
53 |
-
# Similar for conclusion section
|
54 |
-
conclusion_title = next((item for item in paper.section_names if 'onclusion' in item), None)
|
55 |
-
if conclusion_title is not None:
|
56 |
-
text += 'Conclusion: ' + paper.section_texts[conclusion_title]
|
57 |
-
for heading in sections_of_interest:
|
58 |
-
if heading in paper.section_names:
|
59 |
-
text += heading + ': ' + paper.section_texts[heading]
|
60 |
-
chat_review_text = self.chat_review(text=text)
|
61 |
-
htmls.append('## Paper:' + str(paper_index+1))
|
62 |
-
htmls.append('\n\n\n')
|
63 |
-
htmls.append(chat_review_text)
|
64 |
-
|
65 |
-
# 将审稿意见保存起来
|
66 |
-
date_str = str(datetime.datetime.now())[:13].replace(' ', '-')
|
67 |
-
try:
|
68 |
-
export_path = os.path.join('./', 'output_file')
|
69 |
-
os.makedirs(export_path)
|
70 |
-
except:
|
71 |
-
pass
|
72 |
-
mode = 'w' if paper_index == 0 else 'a'
|
73 |
-
file_name = os.path.join(export_path, date_str+'-'+self.validateTitle(paper.title)+"."+self.file_format)
|
74 |
-
self.export_to_markdown("\n".join(htmls), file_name=file_name, mode=mode)
|
75 |
-
htmls = []
|
76 |
-
|
77 |
-
|
78 |
-
def stage_1(self, paper):
|
79 |
-
htmls = []
|
80 |
-
text = ''
|
81 |
-
text += 'Title: ' + paper.title + '. '
|
82 |
-
text += 'Abstract: ' + paper.section_texts['Abstract']
|
83 |
-
openai.api_key = self.chat_api_list[self.cur_api]
|
84 |
-
self.cur_api += 1
|
85 |
-
self.cur_api = 0 if self.cur_api >= len(self.chat_api_list)-1 else self.cur_api
|
86 |
-
messages = [
|
87 |
-
{"role": "system",
|
88 |
-
"content": f"You are a professional reviewer in the field of {args.research_fields}. "
|
89 |
-
f"I will give you a paper. You need to review this paper and discuss the novelty and originality of ideas, correctness, clarity, the significance of results, potential impact and quality of the presentation. "
|
90 |
-
f"Due to the length limitations, I am only allowed to provide you the abstract, introduction, conclusion and at most two sections of this paper."
|
91 |
-
f"Now I will give you the title and abstract and the headings of potential sections. "
|
92 |
-
f"You need to reply at most two headings. Then I will further provide you the full information, includes aforementioned sections and at most two sections you called for.\n\n"
|
93 |
-
f"Title: {paper.title}\n\n"
|
94 |
-
f"Abstract: {paper.section_texts['Abstract']}\n\n"
|
95 |
-
f"Potential Sections: {paper.section_names[2:-1]}\n\n"
|
96 |
-
f"Follow the following format to output your choice of sections:"
|
97 |
-
f"{{chosen section 1}}, {{chosen section 2}}\n\n"},
|
98 |
-
{"role": "user", "content": text},
|
99 |
-
]
|
100 |
-
response = openai.ChatCompletion.create(
|
101 |
-
model="gpt-3.5-turbo",
|
102 |
-
messages=messages,
|
103 |
-
)
|
104 |
-
result = ''
|
105 |
-
for choice in response.choices:
|
106 |
-
result += choice.message.content
|
107 |
-
print(result)
|
108 |
-
return result.split(',')
|
109 |
-
|
110 |
-
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
111 |
-
stop=tenacity.stop_after_attempt(5),
|
112 |
-
reraise=True)
|
113 |
-
def chat_review(self, text):
|
114 |
-
openai.api_key = self.chat_api_list[self.cur_api]
|
115 |
-
self.cur_api += 1
|
116 |
-
self.cur_api = 0 if self.cur_api >= len(self.chat_api_list)-1 else self.cur_api
|
117 |
-
review_prompt_token = 1000
|
118 |
-
text_token = len(self.encoding.encode(text))
|
119 |
-
input_text_index = int(len(text)*(self.max_token_num-review_prompt_token)/text_token)
|
120 |
-
input_text = "This is the paper for your review:" + text[:input_text_index]
|
121 |
-
with open('ReviewFormat.txt', 'r') as file: # 读取特定的审稿格式
|
122 |
-
review_format = file.read()
|
123 |
-
messages=[
|
124 |
-
{"role": "system", "content": "You are a professional reviewer in the field of "+args.research_fields+". Now I will give you a paper. You need to give a complete review opinion according to the following requirements and format:"+ review_format +" Please answer in {}.".format(self.language)},
|
125 |
-
{"role": "user", "content": input_text},
|
126 |
-
]
|
127 |
-
|
128 |
-
response = openai.ChatCompletion.create(
|
129 |
-
model="gpt-3.5-turbo",
|
130 |
-
messages=messages,
|
131 |
-
)
|
132 |
-
result = ''
|
133 |
-
for choice in response.choices:
|
134 |
-
result += choice.message.content
|
135 |
-
print("********"*10)
|
136 |
-
print(result)
|
137 |
-
print("********"*10)
|
138 |
-
print("prompt_token_used:", response.usage.prompt_tokens)
|
139 |
-
print("completion_token_used:", response.usage.completion_tokens)
|
140 |
-
print("total_token_used:", response.usage.total_tokens)
|
141 |
-
print("response_time:", response.response_ms/1000.0, 's')
|
142 |
-
return result
|
143 |
-
|
144 |
-
def export_to_markdown(self, text, file_name, mode='w'):
|
145 |
-
# 使用markdown模块的convert方法,将文本转换为html格式
|
146 |
-
# html = markdown.markdown(text)
|
147 |
-
# 打开一个文件,以写入模式
|
148 |
-
with open(file_name, mode, encoding="utf-8") as f:
|
149 |
-
# 将html格式的内容写入文件
|
150 |
-
f.write(text)
|
151 |
-
|
152 |
-
def main(args):
|
153 |
-
|
154 |
-
reviewer1 = Reviewer(args=args)
|
155 |
-
# 开始判断是路径还是文件:
|
156 |
-
paper_list = []
|
157 |
-
if args.paper_path.endswith(".pdf"):
|
158 |
-
paper_list.append(Paper(path=args.paper_path))
|
159 |
-
else:
|
160 |
-
for root, dirs, files in os.walk(args.paper_path):
|
161 |
-
print("root:", root, "dirs:", dirs, 'files:', files) #当前目录路径
|
162 |
-
for filename in files:
|
163 |
-
# 如果找到PDF文件,则将其复制到目标文件夹中
|
164 |
-
if filename.endswith(".pdf"):
|
165 |
-
paper_list.append(Paper(path=os.path.join(root, filename)))
|
166 |
-
print("------------------paper_num: {}------------------".format(len(paper_list)))
|
167 |
-
[print(paper_index, paper_name.path.split('\\')[-1]) for paper_index, paper_name in enumerate(paper_list)]
|
168 |
-
reviewer1.review_by_chatgpt(paper_list=paper_list)
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
if __name__ == '__main__':
|
173 |
-
parser = argparse.ArgumentParser()
|
174 |
-
parser.add_argument("--paper_path", type=str, default='', help="path of papers")
|
175 |
-
parser.add_argument("--file_format", type=str, default='txt', help="output file format")
|
176 |
-
parser.add_argument("--research_fields", type=str, default='computer science, artificial intelligence and reinforcement learning', help="the research fields of paper")
|
177 |
-
parser.add_argument("--language", type=str, default='en', help="output lauguage, en or zh")
|
178 |
-
|
179 |
-
args = parser.parse_args()
|
180 |
-
start_time = time.time()
|
181 |
-
main(args=args)
|
182 |
-
print("review time:", time.time() - start_time)
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|