rodrigomasini commited on
Commit
065a0bb
·
verified ·
1 Parent(s): 67b666f

Create app_v1.0.0

Browse files
Files changed (1) hide show
  1. app_v1.0.0 +399 -0
app_v1.0.0 ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import random
3
+ from recurrentgpt import RecurrentGPT
4
+ from human_simulator import Human
5
+ from sentence_transformers import SentenceTransformer
6
+ from utils import get_init, parse_instructions
7
+ import re
8
+
9
+ # from urllib.parse import quote_plus
10
+ # from pymongo import MongoClient
11
+
12
+ # uri = "mongodb://%s:%s@%s" % (quote_plus("xxx"),
13
+ # quote_plus("xxx"), "localhost")
14
+ # client = MongoClient(uri, maxPoolSize=None)
15
+ # db = client.recurrentGPT_db
16
+ # log = db.log
17
+
18
+ _CACHE = {}
19
+
20
+
21
+ # Build the semantic search model
22
+ embedder = SentenceTransformer('multi-qa-mpnet-base-cos-v1')
23
+
24
+ def init_prompt(novel_type, description):
25
+ if description == "":
26
+ description = ""
27
+ else:
28
+ description = " about " + description
29
+ return f"""
30
+ Please write a {novel_type} novel{description} with 50 chapters. Follow the format below precisely:
31
+ Begin with the name of the novel.
32
+ Next, write an outline for the first chapter. The outline should describe the background and the beginning of the novel.
33
+ Write the first three paragraphs with their indication of the novel based on your outline. Write in a novelistic style and take your time to set the scene.
34
+ Write a summary that captures the key information of the three paragraphs.
35
+ Finally, write three different instructions for what to write next, each containing around five sentences. Each instruction should present a possible, interesting continuation of the story.
36
+ The output format should follow these guidelines:
37
+ Name: <name of the novel>
38
+ Outline: <outline for the first chapter>
39
+ Paragraph 1: <content for paragraph 1>
40
+ Paragraph 2: <content for paragraph 2>
41
+ Paragraph 3: <content for paragraph 3>
42
+ Summary: <content of summary>
43
+ Instruction 1: <content for instruction 1>
44
+ Instruction 2: <content for instruction 2>
45
+ Instruction 3: <content for instruction 3>
46
+ Make sure to be precise and follow the output format strictly.
47
+ """
48
+
49
+ def init(novel_type, description, request: gr.Request):
50
+ if novel_type == "":
51
+ novel_type = "Science Fiction"
52
+ global _CACHE
53
+ cookie = request.headers['cookie']
54
+ print(cookie)
55
+ cookie = cookie.split('; _gat_gtag')[0]
56
+ print(cookie)
57
+ # prepare first init
58
+ init_paragraphs = get_init(text=init_prompt(novel_type,description))
59
+ # print(init_paragraphs)
60
+ start_input_to_human = {
61
+ 'output_paragraph': init_paragraphs['Paragraph 3'],
62
+ 'input_paragraph': '\n\n'.join([init_paragraphs['Paragraph 1'], init_paragraphs['Paragraph 2']]),
63
+ 'output_memory': init_paragraphs['Summary'],
64
+ "output_instruction": [init_paragraphs['Instruction 1'], init_paragraphs['Instruction 2'], init_paragraphs['Instruction 3']]
65
+ }
66
+
67
+ _CACHE[cookie] = {"start_input_to_human": start_input_to_human,
68
+ "init_paragraphs": init_paragraphs}
69
+ written_paras = f"""Title: {init_paragraphs['name']}
70
+ Outline: {init_paragraphs['Outline']}
71
+ Paragraphs:
72
+ {start_input_to_human['input_paragraph']}"""
73
+ long_memory = parse_instructions([init_paragraphs['Paragraph 1'], init_paragraphs['Paragraph 2']])
74
+ # short memory, long memory, current written paragraphs, 3 next instructions
75
+ return start_input_to_human['output_memory'], long_memory, written_paras, init_paragraphs['Instruction 1'], init_paragraphs['Instruction 2'], init_paragraphs['Instruction 3']
76
+
77
+ def step(short_memory, long_memory, instruction1, instruction2, instruction3, current_paras, request: gr.Request, ):
78
+ if current_paras == "":
79
+ return "", "", "", "", "", ""
80
+ global _CACHE
81
+ # print(list(_CACHE.keys()))
82
+ # print(request.headers.get('cookie'))
83
+ cookie = request.headers['cookie']
84
+ cookie = cookie.split('; _gat_gtag')[0]
85
+ cache = _CACHE[cookie]
86
+
87
+ if "writer" not in cache:
88
+ start_input_to_human = cache["start_input_to_human"]
89
+ start_input_to_human['output_instruction'] = [
90
+ instruction1, instruction2, instruction3]
91
+ init_paragraphs = cache["init_paragraphs"]
92
+ human = Human(input=start_input_to_human,
93
+ memory=None, embedder=embedder)
94
+ human.step()
95
+ start_short_memory = init_paragraphs['Summary']
96
+ writer_start_input = human.output
97
+
98
+ # Init writerGPT
99
+ writer = RecurrentGPT(input=writer_start_input, short_memory=start_short_memory, long_memory=[
100
+ init_paragraphs['Paragraph 1'], init_paragraphs['Paragraph 2']], memory_index=None, embedder=embedder)
101
+ cache["writer"] = writer
102
+ cache["human"] = human
103
+ writer.step()
104
+ else:
105
+ human = cache["human"]
106
+ writer = cache["writer"]
107
+ output = writer.output
108
+ output['output_memory'] = short_memory
109
+ #randomly select one instruction out of three
110
+ instruction_index = random.randint(0,2)
111
+ output['output_instruction'] = [instruction1, instruction2, instruction3][instruction_index]
112
+ human.input = output
113
+ human.step()
114
+ writer.input = human.output
115
+ writer.step()
116
+
117
+ long_memory = [[v] for v in writer.long_memory]
118
+ # short memory, long memory, current written paragraphs, 3 next instructions
119
+ return writer.output['output_memory'], long_memory, current_paras + '\n\n' + writer.output['input_paragraph'], human.output['output_instruction'], *writer.output['output_instruction']
120
+
121
+
122
+ def controled_step(short_memory, long_memory, selected_instruction, current_paras, request: gr.Request, ):
123
+ if current_paras == "":
124
+ return "", "", "", "", "", ""
125
+ global _CACHE
126
+ # print(list(_CACHE.keys()))
127
+ # print(request.headers.get('cookie'))
128
+ cookie = request.headers['cookie']
129
+ cookie = cookie.split('; _gat_gtag')[0]
130
+ cache = _CACHE[cookie]
131
+ if "writer" not in cache:
132
+ start_input_to_human = cache["start_input_to_human"]
133
+ start_input_to_human['output_instruction'] = selected_instruction
134
+ init_paragraphs = cache["init_paragraphs"]
135
+ human = Human(input=start_input_to_human,
136
+ memory=None, embedder=embedder)
137
+ human.step()
138
+ start_short_memory = init_paragraphs['Summary']
139
+ writer_start_input = human.output
140
+
141
+ # Init writerGPT
142
+ writer = RecurrentGPT(input=writer_start_input, short_memory=start_short_memory, long_memory=[
143
+ init_paragraphs['Paragraph 1'], init_paragraphs['Paragraph 2']], memory_index=None, embedder=embedder)
144
+ cache["writer"] = writer
145
+ cache["human"] = human
146
+ writer.step()
147
+ else:
148
+ human = cache["human"]
149
+ writer = cache["writer"]
150
+ output = writer.output
151
+ output['output_memory'] = short_memory
152
+ output['output_instruction'] = selected_instruction
153
+ human.input = output
154
+ human.step()
155
+ writer.input = human.output
156
+ writer.step()
157
+
158
+ # short memory, long memory, current written paragraphs, 3 next instructions
159
+ return writer.output['output_memory'], parse_instructions(writer.long_memory), current_paras + '\n\n' + writer.output['input_paragraph'], *writer.output['output_instruction']
160
+
161
+
162
+ # SelectData is a subclass of EventData
163
+ def on_select(instruction1, instruction2, instruction3, evt: gr.SelectData):
164
+ selected_plan = int(evt.value.replace("Instruction ", ""))
165
+ selected_plan = [instruction1, instruction2, instruction3][selected_plan-1]
166
+ return selected_plan
167
+
168
+ #----------------#
169
+ # Grammar metrics
170
+ import re
171
+ from textstat import textstat
172
+
173
+ #def pre_process_text(text):
174
+ # sentences_list = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
175
+ # # Split the elements of the list by newline characters
176
+ # split_sentences = []
177
+ # for sentence in sentences_list:
178
+ # split_sentences.extend(re.split(r'\n+', sentence))
179
+ # # Remove empty elements
180
+ # cleaned_sentences = [sentence for sentence in split_sentences if sentence.strip()]
181
+ # sentences_number = len(cleaned_sentences)
182
+ # return cleaned_sentences, sentences_number
183
+
184
+ # Function to clean the sentences list and return words only
185
+ #def extract_words(sentences):
186
+ # words = []
187
+ # for sentence in sentences:
188
+ # # Extract words using regex, ignoring special characters
189
+ # words.extend(re.findall(r'\b\w+\b', sentence))
190
+ # return words
191
+
192
+ #def count_syllables(word):
193
+ # return len(re.findall(r'[aeiouyAEIOUY]', word))
194
+
195
+ #def flesch_kincaid_grade_level(text):
196
+ # sentences, sentences_count = pre_process_text(text)
197
+ # words = extract_words(sentences)
198
+ # syllables = sum([count_syllables(word) for word in text.split()])
199
+ #
200
+ # if sentences_count == 0 or words == 0:
201
+ # return float('nan') # Return NaN to indicate an error
202
+ # return 0.39 * (words / sentences_count) + 11.8 * (syllables / words) - 15.59
203
+
204
+ #def flesch_reading_ease(text):
205
+ # sentences, sentences_count = pre_process_text(text)
206
+ # words = extract_words(sentences)
207
+ # syllables = sum([count_syllables(word) for word in words])
208
+ #
209
+ # if sentences_count == 0 or words == 0:
210
+ # return float('nan') # Return NaN to indicate an error
211
+ # return 206.835 - 1.015 * (words / sentences_count) - 84.6 * (syllables / words)
212
+
213
+ #def gunning_fog_index(text):
214
+ # sentences, sentences_count = pre_process_text(text)
215
+ # words = extract_words(sentences)
216
+ # complex_words = len([word for word in words if count_syllables(word) >= 3])
217
+ #
218
+ # if sentences_count == 0 or words == 0:
219
+ # return float('nan') # Return NaN to indicate an error
220
+ # return 0.4 * ((words / sentences_count) + 100 * (complex_words / words))
221
+
222
+ def pre_process_text(text):
223
+ # Normalize line breaks and whitespace
224
+ text = re.sub(r'\n\s*\n', '\n\n', text.strip())
225
+
226
+ # Split the text into sections
227
+ sections = re.split(r'\n{2,}', text)
228
+ print("Sections:", sections)
229
+
230
+ # Remove empty strings from the split result
231
+ sections = [section.strip() for section in sections if section.strip()]
232
+ print("Non-empty Sections:", sections)
233
+
234
+ # Combine sections into a single string
235
+ combined_text = ' '.join(sections)
236
+ print("Combined Text:", combined_text)
237
+
238
+ # Split the text into sentences
239
+ sentences_list = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', combined_text)
240
+ print("Sentences List:", sentences_list)
241
+
242
+ # Split the elements of the list by newline characters
243
+ split_sentences = []
244
+ for sentence in sentences_list:
245
+ split_sentences.extend(re.split(r'\n+', sentence))
246
+ print("Split Sentences:", split_sentences)
247
+
248
+ # Remove empty elements
249
+ cleaned_sentences = [sentence for sentence in split_sentences if sentence.strip()]
250
+ print("Cleaned Sentences:", cleaned_sentences)
251
+
252
+ combined_cleaned_text = " ".join(cleaned_sentences)
253
+ print("Combined Cleaned Text:", combined_cleaned_text)
254
+
255
+ return combined_cleaned_text
256
+
257
+ def flesch_kincaid_grade_level(text):
258
+ sentences = pre_process_text(text)
259
+ return textstat.flesch_kincaid_grade(sentences)
260
+
261
+ def flesch_reading_ease(text):
262
+ sentences = pre_process_text(text)
263
+ return textstat.flesch_reading_ease(sentences)
264
+
265
+ def gunning_fog_index(text):
266
+ sentences = pre_process_text(text)
267
+ return textstat.gunning_fog(sentences)
268
+
269
+ def calculate_readability_metrics(text):
270
+ fk_grade_level = flesch_kincaid_grade_level(text)
271
+ fk_reading_ease = flesch_reading_ease(text)
272
+ gunning_fog = gunning_fog_index(text)
273
+
274
+ return fk_grade_level, fk_reading_ease, gunning_fog
275
+ #-------------#
276
+
277
+ with gr.Blocks(title="RecurrentGPT", css="footer {visibility: hidden}", theme='sudeepshouche/minimalist') as demo:
278
+ gr.Markdown(
279
+ """
280
+ # RecurrentGPT
281
+ Interactive Generation of (Arbitrarily) Long Texts with Human-in-the-Loop
282
+ """)
283
+ with gr.Tab("Auto-Generation"):
284
+ with gr.Row():
285
+ with gr.Column():
286
+ with gr.Box():
287
+ with gr.Row():
288
+ with gr.Column(scale=1, min_width=200):
289
+ novel_type = gr.Textbox(
290
+ label="Novel Type", placeholder="e.g. science fiction")
291
+ with gr.Column(scale=2, min_width=400):
292
+ description = gr.Textbox(label="Description")
293
+ btn_init = gr.Button(
294
+ "Init Novel Generation", variant="primary")
295
+ gr.Examples(["Science Fiction", "Romance", "Mystery", "Fantasy",
296
+ "Historical", "Horror", "Thriller", "Western", "Young Adult", ], inputs=[novel_type])
297
+ written_paras = gr.Textbox(
298
+ label="Written Paragraphs (editable)", max_lines=21, lines=21)
299
+ with gr.Box():
300
+ gr.Markdown("### Readability Metrics\n")
301
+ fk_grade = gr.Number(label="Flesch-Kincaid Grade Level")
302
+ fr_ease = gr.Number(label="Flesch Reading Ease")
303
+ g_fog = gr.Number(label="Gunning Fog Index")
304
+
305
+ calculate_button = gr.Button("Calculate Metrics")
306
+
307
+ def update_metrics(text):
308
+ grade, ease, fog = calculate_readability_metrics(text)
309
+ return grade, ease, fog
310
+
311
+ with gr.Column():
312
+ with gr.Box():
313
+ gr.Markdown("### Memory Module\n")
314
+ short_memory = gr.Textbox(
315
+ label="Short-Term Memory (editable)", max_lines=3, lines=3)
316
+ long_memory = gr.Textbox(
317
+ label="Long-Term Memory (editable)", max_lines=6, lines=6)
318
+ # long_memory = gr.Dataframe(
319
+ # # label="Long-Term Memory (editable)",
320
+ # headers=["Long-Term Memory (editable)"],
321
+ # datatype=["str"],
322
+ # row_count=3,
323
+ # max_rows=3,
324
+ # col_count=(1, "fixed"),
325
+ # type="array",
326
+ # )
327
+ with gr.Box():
328
+ gr.Markdown("### Instruction Module\n")
329
+ with gr.Row():
330
+ instruction1 = gr.Textbox(
331
+ label="Instruction 1 (editable)", max_lines=4, lines=4)
332
+ instruction2 = gr.Textbox(
333
+ label="Instruction 2 (editable)", max_lines=4, lines=4)
334
+ instruction3 = gr.Textbox(
335
+ label="Instruction 3 (editable)", max_lines=4, lines=4)
336
+ selected_plan = gr.Textbox(
337
+ label="Revised Instruction (from last step)", max_lines=2, lines=2)
338
+
339
+ btn_step = gr.Button("Next Step", variant="primary")
340
+
341
+ btn_init.click(init, inputs=[novel_type, description], outputs=[
342
+ short_memory, long_memory, written_paras, instruction1, instruction2, instruction3])
343
+ btn_step.click(step, inputs=[short_memory, long_memory, instruction1, instruction2, instruction3, written_paras], outputs=[
344
+ short_memory, long_memory, written_paras, selected_plan, instruction1, instruction2, instruction3])
345
+ calculate_button.click(update_metrics, inputs=[written_paras], outputs=[fk_grade, fr_ease, g_fog])
346
+
347
+ with gr.Tab("Human-in-the-Loop"):
348
+ with gr.Row():
349
+ with gr.Column():
350
+ with gr.Box():
351
+ with gr.Row():
352
+ with gr.Column(scale=1, min_width=200):
353
+ novel_type = gr.Textbox(
354
+ label="Novel Type", placeholder="e.g. science fiction")
355
+ with gr.Column(scale=2, min_width=400):
356
+ description = gr.Textbox(label="Description")
357
+ btn_init = gr.Button(
358
+ "Init Novel Generation", variant="primary")
359
+ gr.Examples(["Science Fiction", "Romance", "Mystery", "Fantasy",
360
+ "Historical", "Horror", "Thriller", "Western", "Young Adult", ], inputs=[novel_type])
361
+ written_paras = gr.Textbox(
362
+ label="Written Paragraphs (editable)", max_lines=23, lines=23)
363
+ with gr.Column():
364
+ with gr.Box():
365
+ gr.Markdown("### Memory Module\n")
366
+ short_memory = gr.Textbox(
367
+ label="Short-Term Memory (editable)", max_lines=3, lines=3)
368
+ long_memory = gr.Textbox(
369
+ label="Long-Term Memory (editable)", max_lines=6, lines=6)
370
+ with gr.Box():
371
+ gr.Markdown("### Instruction Module\n")
372
+ with gr.Row():
373
+ instruction1 = gr.Textbox(
374
+ label="Instruction 1", max_lines=3, lines=3, interactive=False)
375
+ instruction2 = gr.Textbox(
376
+ label="Instruction 2", max_lines=3, lines=3, interactive=False)
377
+ instruction3 = gr.Textbox(
378
+ label="Instruction 3", max_lines=3, lines=3, interactive=False)
379
+ with gr.Row():
380
+ with gr.Column(scale=1, min_width=100):
381
+ selected_plan = gr.Radio(["Instruction 1", "Instruction 2", "Instruction 3"], label="Instruction Selection",)
382
+ # info="Select the instruction you want to revise and use for the next step generation.")
383
+ with gr.Column(scale=3, min_width=300):
384
+ selected_instruction = gr.Textbox(
385
+ label="Selected Instruction (editable)", max_lines=5, lines=5)
386
+
387
+ btn_step = gr.Button("Next Step", variant="primary")
388
+
389
+ btn_init.click(init, inputs=[novel_type, description], outputs=[
390
+ short_memory, long_memory, written_paras, instruction1, instruction2, instruction3])
391
+ btn_step.click(controled_step, inputs=[short_memory, long_memory, selected_instruction, written_paras], outputs=[
392
+ short_memory, long_memory, written_paras, instruction1, instruction2, instruction3])
393
+ selected_plan.select(on_select, inputs=[
394
+ instruction1, instruction2, instruction3], outputs=[selected_instruction])
395
+
396
+ demo.queue(concurrency_count=1)
397
+
398
+ if __name__ == "__main__":
399
+ demo.launch()