JohanBeytell commited on
Commit
cdb1e03
·
verified ·
1 Parent(s): e93bd2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -128
app.py CHANGED
@@ -1,92 +1,9 @@
1
- import sentencepiece as spm
2
- import numpy as np
3
- import tensorflow as tf
4
- from tensorflow.keras.preprocessing.sequence import pad_sequences
5
- from valx import detect_profanity, detect_hate_speech
6
- import gradio as gr
7
-
8
- sp = spm.SentencePieceProcessor()
9
- sp.Load("dungen_dev_preview.model")
10
-
11
- model = tf.keras.models.load_model("dungen_dev_preview_model.keras")
12
-
13
- max_seq_len = 25
14
-
15
- def generate_text(seed_text, next_words=30, temperature=0.5):
16
- seed_text = seed_text.strip().lower()
17
-
18
- if "|" in seed_text:
19
- gr.Warning("The prompt should not contain the '|' character. Using default prompt.")
20
- seed_text = 'game name | '
21
- elif detect_profanity([seed_text], language='All'):
22
- gr.Warning("Profanity detected in the prompt, using the default prompt.")
23
- seed_text = 'game name | '
24
- elif (hate_speech_result := detect_hate_speech(seed_text)) and hate_speech_result[0] in ['Hate Speech', 'Offensive Speech']:
25
- gr.Warning('Harmful speech detected in the prompt, using default prompt.')
26
- seed_text = 'game name | '
27
- else:
28
- seed_text += ' | '
29
-
30
- generated_text = seed_text
31
- if generated_text != 'game name | ': # only generate if not the default prompt
32
- for _ in range(next_words):
33
- token_list = sp.encode_as_ids(generated_text)
34
- token_list = pad_sequences([token_list], maxlen=max_seq_len - 1, padding='pre')
35
- predicted = model.predict(token_list, verbose=0)[0]
36
-
37
- predicted = np.asarray(predicted).astype("float64")
38
- predicted = np.log(predicted + 1e-8) / temperature
39
- exp_preds = np.exp(predicted)
40
- predicted = exp_preds / np.sum(exp_preds)
41
-
42
- next_index = np.random.choice(len(predicted), p=predicted)
43
- next_token = sp.id_to_piece(next_index)
44
- generated_text += next_token
45
-
46
- if next_token.endswith('</s>') or next_token.endswith('<unk>'):
47
- break
48
-
49
- decoded = sp.decode_pieces(sp.encode_as_pieces(generated_text))
50
- decoded = decoded.replace("</s>", "").replace("<unk>", "").strip()
51
-
52
- if '|' in decoded:
53
- decoded = decoded.split('|', 1)[1].strip()
54
-
55
- if any(detect_profanity([decoded], language='All')) or (hate_speech_result := detect_hate_speech(decoded)) and hate_speech_result[0] in ['Hate Speech', 'Offensive Speech']:
56
- gr.Warning("Flagged potentially harmful output.")
57
- decoded = 'Flagged Output'
58
-
59
- return decoded
60
-
61
- demo = gr.Interface(
62
- fn=generate_text,
63
- inputs=[
64
- gr.Textbox(label="Prompt", value="a female character name", max_lines=1),
65
- gr.Slider(1, 100, step=1, label='Next Words', value=30),
66
- gr.Slider(0.1, 1, value=0.5, label='Temperature', info='Controls randomness of generation, higher values = more creative, lower values = more probalistic')
67
- ],
68
- outputs=gr.Textbox(label="Generated Names"),
69
- title='Dungen Dev - Name Generator',
70
- description='A prompt-based name generator for game developers. Dungen Dev is an experimental model, and may produce outputs that are inappropriate, biased, or potentially harmful and inaccurate. Caution is advised.',
71
- examples=[
72
- ["a male character name", 30, 0.5],
73
- ["a futuristic city name", 30, 0.5],
74
- ["an item name", 30, 0.5],
75
- ["a dark and mysterious forest name", 30, 0.5],
76
- ["an evil character name", 30, 0.5]
77
- ]
78
- )
79
-
80
- demo.launch()
81
-
82
  # import sentencepiece as spm
83
  # import numpy as np
84
  # import tensorflow as tf
85
  # from tensorflow.keras.preprocessing.sequence import pad_sequences
86
  # from valx import detect_profanity, detect_hate_speech
87
  # import gradio as gr
88
- # import csv
89
- # from datetime import datetime
90
 
91
  # sp = spm.SentencePieceProcessor()
92
  # sp.Load("dungen_dev_preview.model")
@@ -111,7 +28,7 @@ demo.launch()
111
  # seed_text += ' | '
112
 
113
  # generated_text = seed_text
114
- # if generated_text != 'game name | ': # only generate if not the default prompt
115
  # for _ in range(next_words):
116
  # token_list = sp.encode_as_ids(generated_text)
117
  # token_list = pad_sequences([token_list], maxlen=max_seq_len - 1, padding='pre')
@@ -139,52 +56,16 @@ demo.launch()
139
  # gr.Warning("Flagged potentially harmful output.")
140
  # decoded = 'Flagged Output'
141
 
142
- # return decoded, gr.update(interactive=True)
143
-
144
- # flagged_outputs = []
145
-
146
- # def flag_output(prompt, generated_text, next_words, temperature):
147
- # if not generated_text.strip():
148
- # return "Cannot flag an empty output."
149
-
150
- # timestamp = datetime.now().isoformat()
151
- # flagged_outputs.append({
152
- # "Prompt": prompt,
153
- # "Generated Text": generated_text,
154
- # "Next Words": next_words,
155
- # "Temperature": temperature,
156
- # "Timestamp": timestamp
157
- # })
158
-
159
- # with open("flagged_outputs.csv", "a", newline="") as file:
160
- # writer = csv.DictWriter(file, fieldnames=["Prompt", "Generated Text", "Next Words", "Temperature", "Timestamp"])
161
- # if file.tell() == 0:
162
- # writer.writeheader()
163
- # writer.writerow({
164
- # "Prompt": prompt,
165
- # "Generated Text": generated_text,
166
- # "Next Words": next_words,
167
- # "Temperature": temperature,
168
- # "Timestamp": timestamp
169
- # })
170
-
171
- # return "Output flagged successfully."
172
-
173
- # def enable_flag_button(state):
174
- # return gr.update(interactive=state)
175
 
176
  # demo = gr.Interface(
177
  # fn=generate_text,
178
  # inputs=[
179
  # gr.Textbox(label="Prompt", value="a female character name", max_lines=1),
180
  # gr.Slider(1, 100, step=1, label='Next Words', value=30),
181
- # gr.Slider(0.1, 1, value=0.5, label='Temperature', info='Controls randomness of generation, higher values = more creative, lower values = more probabilistic')
182
  # ],
183
- # outputs=[
184
- # gr.Textbox(label="Generated Name", interactive=True),
185
- # gr.Button("Flag Output", interactive=False)
186
- # ],
187
- # live=False,
188
  # title='Dungen Dev - Name Generator',
189
  # description='A prompt-based name generator for game developers. Dungen Dev is an experimental model, and may produce outputs that are inappropriate, biased, or potentially harmful and inaccurate. Caution is advised.',
190
  # examples=[
@@ -193,10 +74,137 @@ demo.launch()
193
  # ["an item name", 30, 0.5],
194
  # ["a dark and mysterious forest name", 30, 0.5],
195
  # ["an evil character name", 30, 0.5]
196
- # ],
197
- # theme="default",
198
- # allow_flagging=False
199
  # )
200
 
201
- # demo.queue()
202
- # demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import sentencepiece as spm
2
  # import numpy as np
3
  # import tensorflow as tf
4
  # from tensorflow.keras.preprocessing.sequence import pad_sequences
5
  # from valx import detect_profanity, detect_hate_speech
6
  # import gradio as gr
 
 
7
 
8
  # sp = spm.SentencePieceProcessor()
9
  # sp.Load("dungen_dev_preview.model")
 
28
  # seed_text += ' | '
29
 
30
  # generated_text = seed_text
31
+ # if generated_text != 'game name | ': # only generate if not the default prompt
32
  # for _ in range(next_words):
33
  # token_list = sp.encode_as_ids(generated_text)
34
  # token_list = pad_sequences([token_list], maxlen=max_seq_len - 1, padding='pre')
 
56
  # gr.Warning("Flagged potentially harmful output.")
57
  # decoded = 'Flagged Output'
58
 
59
+ # return decoded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  # demo = gr.Interface(
62
  # fn=generate_text,
63
  # inputs=[
64
  # gr.Textbox(label="Prompt", value="a female character name", max_lines=1),
65
  # gr.Slider(1, 100, step=1, label='Next Words', value=30),
66
+ # gr.Slider(0.1, 1, value=0.5, label='Temperature', info='Controls randomness of generation, higher values = more creative, lower values = more probalistic')
67
  # ],
68
+ # outputs=gr.Textbox(label="Generated Names"),
 
 
 
 
69
  # title='Dungen Dev - Name Generator',
70
  # description='A prompt-based name generator for game developers. Dungen Dev is an experimental model, and may produce outputs that are inappropriate, biased, or potentially harmful and inaccurate. Caution is advised.',
71
  # examples=[
 
74
  # ["an item name", 30, 0.5],
75
  # ["a dark and mysterious forest name", 30, 0.5],
76
  # ["an evil character name", 30, 0.5]
77
+ # ]
 
 
78
  # )
79
 
80
+ # demo.launch()
81
+
82
+ import sentencepiece as spm
83
+ import numpy as np
84
+ import tensorflow as tf
85
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
86
+ from valx import detect_profanity, detect_hate_speech
87
+ import gradio as gr
88
+ from datasets import load_dataset, DatasetDict, Dataset
89
+ from huggingface_hub import HfApi
90
+ from datetime import datetime
91
+
92
+ sp = spm.SentencePieceProcessor()
93
+ sp.Load("dungen_dev_preview.model")
94
+
95
+ model = tf.keras.models.load_model("dungen_dev_preview_model.keras")
96
+
97
+ max_seq_len = 25
98
+
99
+ def generate_text(seed_text, next_words=30, temperature=0.5):
100
+ seed_text = seed_text.strip().lower()
101
+
102
+ if "|" in seed_text:
103
+ gr.Warning("The prompt should not contain the '|' character. Using default prompt.")
104
+ seed_text = 'game name | '
105
+ elif detect_profanity([seed_text], language='All'):
106
+ gr.Warning("Profanity detected in the prompt, using the default prompt.")
107
+ seed_text = 'game name | '
108
+ elif (hate_speech_result := detect_hate_speech(seed_text)) and hate_speech_result[0] in ['Hate Speech', 'Offensive Speech']:
109
+ gr.Warning('Harmful speech detected in the prompt, using default prompt.')
110
+ seed_text = 'game name | '
111
+ else:
112
+ seed_text += ' | '
113
+
114
+ generated_text = seed_text
115
+ if generated_text != 'game name | ': # only generate if not the default prompt
116
+ for _ in range(next_words):
117
+ token_list = sp.encode_as_ids(generated_text)
118
+ token_list = pad_sequences([token_list], maxlen=max_seq_len - 1, padding='pre')
119
+ predicted = model.predict(token_list, verbose=0)[0]
120
+
121
+ predicted = np.asarray(predicted).astype("float64")
122
+ predicted = np.log(predicted + 1e-8) / temperature
123
+ exp_preds = np.exp(predicted)
124
+ predicted = exp_preds / np.sum(exp_preds)
125
+
126
+ next_index = np.random.choice(len(predicted), p=predicted)
127
+ next_token = sp.id_to_piece(next_index)
128
+ generated_text += next_token
129
+
130
+ if next_token.endswith('</s>') or next_token.endswith('<unk>'):
131
+ break
132
+
133
+ decoded = sp.decode_pieces(sp.encode_as_pieces(generated_text))
134
+ decoded = decoded.replace("</s>", "").replace("<unk>", "").strip()
135
+
136
+ if '|' in decoded:
137
+ decoded = decoded.split('|', 1)[1].strip()
138
+
139
+ if any(detect_profanity([decoded], language='All')) or (hate_speech_result := detect_hate_speech(decoded)) and hate_speech_result[0] in ['Hate Speech', 'Offensive Speech']:
140
+ gr.Warning("Flagged potentially harmful output.")
141
+ decoded = 'Flagged Output'
142
+
143
+ return decoded, gr.update(interactive=True)
144
+
145
+ flagged_outputs = []
146
+
147
+ def flag_output(prompt, generated_text, next_words, temperature):
148
+ if not generated_text.strip():
149
+ return "Cannot flag an empty output."
150
+
151
+ timestamp = datetime.now().isoformat()
152
+
153
+ dataset_id = "InfinitodeLTD/DungenDev-FlaggedOutputs"
154
+
155
+ # Load the existing dataset (if it exists) or create an empty DatasetDict
156
+ try:
157
+ dataset = load_dataset(dataset_id)
158
+ except:
159
+ dataset = DatasetDict()
160
+
161
+ # Prepare new data to append
162
+ new_data = [{
163
+ "Prompt": prompt,
164
+ "Generated Text": generated_text,
165
+ "Next Words": next_words,
166
+ "Temperature": temperature,
167
+ "Timestamp": timestamp
168
+ }]
169
+
170
+ new_dataset = Dataset.from_list(new_data)
171
+
172
+ if "train" in dataset:
173
+ dataset["train"] = concatenate_datasets([dataset["train"], new_dataset]) # Append to existing train
174
+ else:
175
+ dataset["train"] = new_dataset # Create the train split
176
+
177
+ dataset.push_to_hub(dataset_id)
178
+
179
+ return "Output flagged successfully."
180
+
181
+ def enable_flag_button(state):
182
+ return gr.update(interactive=state)
183
+
184
+ demo = gr.Interface(
185
+ fn=generate_text,
186
+ inputs=[
187
+ gr.Textbox(label="Prompt", value="a female character name", max_lines=1),
188
+ gr.Slider(1, 100, step=1, label='Next Words', value=30),
189
+ gr.Slider(0.1, 1, value=0.5, label='Temperature', info='Controls randomness of generation, higher values = more creative, lower values = more probabilistic')
190
+ ],
191
+ outputs=[
192
+ gr.Textbox(label="Generated Name", interactive=True),
193
+ gr.Button("Flag Output", interactive=False, elem_id="flag-button")
194
+ ],
195
+ live=False,
196
+ title='Dungen Dev - Name Generator',
197
+ description='A prompt-based name generator for game developers. Dungen Dev is an experimental model, and may produce outputs that are inappropriate, biased, or potentially harmful and inaccurate. Caution is advised.',
198
+ examples=[
199
+ ["a male character name", 30, 0.5],
200
+ ["a futuristic city name", 30, 0.5],
201
+ ["an item name", 30, 0.5],
202
+ ["a dark and mysterious forest name", 30, 0.5],
203
+ ["an evil character name", 30, 0.5]
204
+ ],
205
+ theme="default",
206
+ allow_flagging=False
207
+ )
208
+
209
+ demo.queue()
210
+ demo.launch()