ShiwenNi commited on
Commit
730f882
·
1 Parent(s): f7f4e93

Delete chat_reviewer.py

Browse files
Files changed (1) hide show
  1. chat_reviewer.py +0 -183
chat_reviewer.py DELETED
@@ -1,183 +0,0 @@
1
- import numpy as np
2
- import os
3
- import re
4
- import datetime
5
- import time
6
- import openai, tenacity
7
- import argparse
8
- import configparser
9
- import json
10
- import tiktoken
11
- from get_paper_from_pdf import Paper
12
-
13
- # 定义Reviewer类
14
- class Reviewer:
15
- # 初始化方法,设置属性
16
- def __init__(self, args=None):
17
- if args.language == 'en':
18
- self.language = 'English'
19
- elif args.language == 'zh':
20
- self.language = 'Chinese'
21
- else:
22
- self.language = 'Chinese'
23
- # 创建一个ConfigParser对象
24
- self.config = configparser.ConfigParser()
25
- # 读取配置文件
26
- self.config.read('apikey.ini')
27
- # 获取某个键对应的值
28
- self.chat_api_list = self.config.get('OpenAI', 'OPENAI_API_KEYS')[1:-1].replace('\'', '').split(',')
29
- self.chat_api_list = [api.strip() for api in self.chat_api_list if len(api) > 5]
30
- self.cur_api = 0
31
- self.file_format = args.file_format
32
- self.max_token_num = 4096
33
- self.encoding = tiktoken.get_encoding("gpt2")
34
-
35
- def validateTitle(self, title):
36
- # 修正论文的路径格式
37
- rstr = r"[\/\\\:\*\?\"\<\>\|]" # '/ \ : * ? " < > |'
38
- new_title = re.sub(rstr, "_", title) # 替换为下划线
39
- return new_title
40
-
41
-
42
- def review_by_chatgpt(self, paper_list):
43
- htmls = []
44
- for paper_index, paper in enumerate(paper_list):
45
- sections_of_interest = self.stage_1(paper)
46
- # extract the essential parts of the paper
47
- text = ''
48
- text += 'Title:' + paper.title + '. '
49
- text += 'Abstract: ' + paper.section_texts['Abstract']
50
- intro_title = next((item for item in paper.section_names if 'ntroduction' in item), None)
51
- if intro_title is not None:
52
- text += 'Introduction: ' + paper.section_texts[intro_title]
53
- # Similar for conclusion section
54
- conclusion_title = next((item for item in paper.section_names if 'onclusion' in item), None)
55
- if conclusion_title is not None:
56
- text += 'Conclusion: ' + paper.section_texts[conclusion_title]
57
- for heading in sections_of_interest:
58
- if heading in paper.section_names:
59
- text += heading + ': ' + paper.section_texts[heading]
60
- chat_review_text = self.chat_review(text=text)
61
- htmls.append('## Paper:' + str(paper_index+1))
62
- htmls.append('\n\n\n')
63
- htmls.append(chat_review_text)
64
-
65
- # 将审稿意见保存起来
66
- date_str = str(datetime.datetime.now())[:13].replace(' ', '-')
67
- try:
68
- export_path = os.path.join('./', 'output_file')
69
- os.makedirs(export_path)
70
- except:
71
- pass
72
- mode = 'w' if paper_index == 0 else 'a'
73
- file_name = os.path.join(export_path, date_str+'-'+self.validateTitle(paper.title)+"."+self.file_format)
74
- self.export_to_markdown("\n".join(htmls), file_name=file_name, mode=mode)
75
- htmls = []
76
-
77
-
78
- def stage_1(self, paper):
79
- htmls = []
80
- text = ''
81
- text += 'Title: ' + paper.title + '. '
82
- text += 'Abstract: ' + paper.section_texts['Abstract']
83
- openai.api_key = self.chat_api_list[self.cur_api]
84
- self.cur_api += 1
85
- self.cur_api = 0 if self.cur_api >= len(self.chat_api_list)-1 else self.cur_api
86
- messages = [
87
- {"role": "system",
88
- "content": f"You are a professional reviewer in the field of {args.research_fields}. "
89
- f"I will give you a paper. You need to review this paper and discuss the novelty and originality of ideas, correctness, clarity, the significance of results, potential impact and quality of the presentation. "
90
- f"Due to the length limitations, I am only allowed to provide you the abstract, introduction, conclusion and at most two sections of this paper."
91
- f"Now I will give you the title and abstract and the headings of potential sections. "
92
- f"You need to reply at most two headings. Then I will further provide you the full information, includes aforementioned sections and at most two sections you called for.\n\n"
93
- f"Title: {paper.title}\n\n"
94
- f"Abstract: {paper.section_texts['Abstract']}\n\n"
95
- f"Potential Sections: {paper.section_names[2:-1]}\n\n"
96
- f"Follow the following format to output your choice of sections:"
97
- f"{{chosen section 1}}, {{chosen section 2}}\n\n"},
98
- {"role": "user", "content": text},
99
- ]
100
- response = openai.ChatCompletion.create(
101
- model="gpt-3.5-turbo",
102
- messages=messages,
103
- )
104
- result = ''
105
- for choice in response.choices:
106
- result += choice.message.content
107
- print(result)
108
- return result.split(',')
109
-
110
- @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
111
- stop=tenacity.stop_after_attempt(5),
112
- reraise=True)
113
- def chat_review(self, text):
114
- openai.api_key = self.chat_api_list[self.cur_api]
115
- self.cur_api += 1
116
- self.cur_api = 0 if self.cur_api >= len(self.chat_api_list)-1 else self.cur_api
117
- review_prompt_token = 1000
118
- text_token = len(self.encoding.encode(text))
119
- input_text_index = int(len(text)*(self.max_token_num-review_prompt_token)/text_token)
120
- input_text = "This is the paper for your review:" + text[:input_text_index]
121
- with open('ReviewFormat.txt', 'r') as file: # 读取特定的审稿格式
122
- review_format = file.read()
123
- messages=[
124
- {"role": "system", "content": "You are a professional reviewer in the field of "+args.research_fields+". Now I will give you a paper. You need to give a complete review opinion according to the following requirements and format:"+ review_format +" Please answer in {}.".format(self.language)},
125
- {"role": "user", "content": input_text},
126
- ]
127
-
128
- response = openai.ChatCompletion.create(
129
- model="gpt-3.5-turbo",
130
- messages=messages,
131
- )
132
- result = ''
133
- for choice in response.choices:
134
- result += choice.message.content
135
- print("********"*10)
136
- print(result)
137
- print("********"*10)
138
- print("prompt_token_used:", response.usage.prompt_tokens)
139
- print("completion_token_used:", response.usage.completion_tokens)
140
- print("total_token_used:", response.usage.total_tokens)
141
- print("response_time:", response.response_ms/1000.0, 's')
142
- return result
143
-
144
- def export_to_markdown(self, text, file_name, mode='w'):
145
- # 使用markdown模块的convert方法,将文本转换为html格式
146
- # html = markdown.markdown(text)
147
- # 打开一个文件,以写入模式
148
- with open(file_name, mode, encoding="utf-8") as f:
149
- # 将html格式的内容写入文件
150
- f.write(text)
151
-
152
- def main(args):
153
-
154
- reviewer1 = Reviewer(args=args)
155
- # 开始判断是路径还是文件:
156
- paper_list = []
157
- if args.paper_path.endswith(".pdf"):
158
- paper_list.append(Paper(path=args.paper_path))
159
- else:
160
- for root, dirs, files in os.walk(args.paper_path):
161
- print("root:", root, "dirs:", dirs, 'files:', files) #当前目录路径
162
- for filename in files:
163
- # 如果找到PDF文件,则将其复制到目标文件夹中
164
- if filename.endswith(".pdf"):
165
- paper_list.append(Paper(path=os.path.join(root, filename)))
166
- print("------------------paper_num: {}------------------".format(len(paper_list)))
167
- [print(paper_index, paper_name.path.split('\\')[-1]) for paper_index, paper_name in enumerate(paper_list)]
168
- reviewer1.review_by_chatgpt(paper_list=paper_list)
169
-
170
-
171
-
172
- if __name__ == '__main__':
173
- parser = argparse.ArgumentParser()
174
- parser.add_argument("--paper_path", type=str, default='', help="path of papers")
175
- parser.add_argument("--file_format", type=str, default='txt', help="output file format")
176
- parser.add_argument("--research_fields", type=str, default='computer science, artificial intelligence and reinforcement learning', help="the research fields of paper")
177
- parser.add_argument("--language", type=str, default='en', help="output lauguage, en or zh")
178
-
179
- args = parser.parse_args()
180
- start_time = time.time()
181
- main(args=args)
182
- print("review time:", time.time() - start_time)
183
-