JohanBeytell commited on
Commit
0b0a0da
·
verified ·
1 Parent(s): 8406ef1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -145
app.py CHANGED
@@ -1,120 +1,16 @@
1
- # import sentencepiece as spm
2
- # import numpy as np
3
- # import tensorflow as tf
4
- # from tensorflow.keras.preprocessing.sequence import pad_sequences
5
- # from valx import detect_profanity, detect_hate_speech
6
- # import gradio as gr
7
-
8
- # sp = spm.SentencePieceProcessor()
9
- # sp.Load("dungen_dev_preview.model")
10
-
11
- # model = tf.keras.models.load_model("dungen_dev_preview_model.keras")
12
-
13
- # max_seq_len = 25
14
-
15
- # def generate_text(seed_text, next_words=30, temperature=0.5):
16
- # seed_text = seed_text.strip().lower()
17
-
18
- # if "|" in seed_text:
19
- # gr.Warning("The prompt should not contain the '|' character. Using default prompt.")
20
- # seed_text = 'game name | '
21
- # elif detect_profanity([seed_text], language='All'):
22
- # gr.Warning("Profanity detected in the prompt, using the default prompt.")
23
- # seed_text = 'game name | '
24
- # elif (hate_speech_result := detect_hate_speech(seed_text)) and hate_speech_result[0] in ['Hate Speech', 'Offensive Speech']:
25
- # gr.Warning('Harmful speech detected in the prompt, using default prompt.')
26
- # seed_text = 'game name | '
27
- # else:
28
- # seed_text += ' | '
29
-
30
- # generated_text = seed_text
31
- # if generated_text != 'game name | ': # only generate if not the default prompt
32
- # for _ in range(next_words):
33
- # token_list = sp.encode_as_ids(generated_text)
34
- # token_list = pad_sequences([token_list], maxlen=max_seq_len - 1, padding='pre')
35
- # predicted = model.predict(token_list, verbose=0)[0]
36
-
37
- # predicted = np.asarray(predicted).astype("float64")
38
- # predicted = np.log(predicted + 1e-8) / temperature
39
- # exp_preds = np.exp(predicted)
40
- # predicted = exp_preds / np.sum(exp_preds)
41
-
42
- # next_index = np.random.choice(len(predicted), p=predicted)
43
- # next_token = sp.id_to_piece(next_index)
44
- # generated_text += next_token
45
-
46
- # if next_token.endswith('</s>') or next_token.endswith('<unk>'):
47
- # break
48
-
49
- # decoded = sp.decode_pieces(sp.encode_as_pieces(generated_text))
50
- # decoded = decoded.replace("</s>", "").replace("<unk>", "").strip()
51
-
52
- # if '|' in decoded:
53
- # decoded = decoded.split('|', 1)[1].strip()
54
-
55
- # if any(detect_profanity([decoded], language='All')) or (hate_speech_result := detect_hate_speech(decoded)) and hate_speech_result[0] in ['Hate Speech', 'Offensive Speech']:
56
- # gr.Warning("Flagged potentially harmful output.")
57
- # decoded = 'Flagged Output'
58
-
59
- # return decoded
60
-
61
- # demo = gr.Interface(
62
- # fn=generate_text,
63
- # inputs=[
64
- # gr.Textbox(label="Prompt", value="a female character name", max_lines=1),
65
- # gr.Slider(1, 100, step=1, label='Next Words', value=30),
66
- # gr.Slider(0.1, 1, value=0.5, label='Temperature', info='Controls randomness of generation, higher values = more creative, lower values = more probalistic')
67
- # ],
68
- # outputs=gr.Textbox(label="Generated Names"),
69
- # title='Dungen Dev - Name Generator',
70
- # description='A prompt-based name generator for game developers. Dungen Dev is an experimental model, and may produce outputs that are inappropriate, biased, or potentially harmful and inaccurate. Caution is advised.',
71
- # examples=[
72
- # ["a male character name", 30, 0.5],
73
- # ["a futuristic city name", 30, 0.5],
74
- # ["an item name", 30, 0.5],
75
- # ["a dark and mysterious forest name", 30, 0.5],
76
- # ["an evil character name", 30, 0.5]
77
- # ]
78
- # )
79
-
80
- # demo.launch()
81
  import sentencepiece as spm
82
  import numpy as np
83
  import tensorflow as tf
84
  from tensorflow.keras.preprocessing.sequence import pad_sequences
85
  from valx import detect_profanity, detect_hate_speech
86
  import gradio as gr
87
- import logging
88
- import csv
89
- import os
90
- from datetime import datetime
91
- from datasets import load_dataset, Dataset
92
 
93
- # Model and SentencePiece loading
94
  sp = spm.SentencePieceProcessor()
95
  sp.Load("dungen_dev_preview.model")
96
- model = tf.keras.models.load_model("dungen_dev_preview_model.keras")
97
- max_seq_len = 25
98
-
99
- logging.basicConfig(filename="app.log", level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
100
 
101
- # Dataset setup
102
- FLAGGED_DATASET_ID = "InfinitodeLTD/DungenDev-FlaggedOutputs"
103
-
104
- def load_or_create_dataset(dataset_id):
105
- try:
106
- dataset = load_dataset(dataset_id)
107
- if "flagged_data" not in dataset:
108
- raise ValueError("Dataset does not contain the 'flagged_data' config.")
109
-
110
- return dataset["flagged_data"]
111
- except (datasets.DatasetNotFoundError, ValueError) as e:
112
- logging.warning(f"Dataset not found or incorrect schema: {e}. Creating a new dataset.")
113
- dataset = Dataset.from_dict({"Timestamp": [], "Prompt": [], "Flagged Text": []})
114
- dataset.push_to_hub(dataset_id, config_name="flagged_data") # important: config_name
115
- return dataset
116
 
117
- flagged_dataset = load_or_create_dataset(FLAGGED_DATASET_ID)
118
 
119
  def generate_text(seed_text, next_words=30, temperature=0.5):
120
  seed_text = seed_text.strip().lower()
@@ -132,7 +28,7 @@ def generate_text(seed_text, next_words=30, temperature=0.5):
132
  seed_text += ' | '
133
 
134
  generated_text = seed_text
135
- if generated_text != 'game name | ':
136
  for _ in range(next_words):
137
  token_list = sp.encode_as_ids(generated_text)
138
  token_list = pad_sequences([token_list], maxlen=max_seq_len - 1, padding='pre')
@@ -162,49 +58,23 @@ def generate_text(seed_text, next_words=30, temperature=0.5):
162
 
163
  return decoded
164
 
165
- def flag_output(prompt_used, text_flagged):
166
- if not text_flagged.strip(): # Check for empty or whitespace-only strings
167
- return "Cannot flag an empty output."
168
-
169
- logging.info(f"Output flagged: {text_flagged} with prompt: {prompt_used}")
170
- timestamp = datetime.now().isoformat()
171
- new_data = {"Timestamp": [timestamp], "Prompt": [prompt_used], "Flagged Text": [text_flagged]}
172
- new_dataset = Dataset.from_dict(new_data)
173
- global flagged_dataset
174
- flagged_dataset = Dataset.from_pandas(flagged_dataset.to_pandas().append(new_dataset.to_pandas(), ignore_index=True))
175
- flagged_dataset.push_to_hub(FLAGGED_DATASET_ID)
176
- return "Output Flagged. Thank you for your feedback."
177
-
178
- with gr.Blocks() as demo:
179
- gr.Markdown("""# Dungen Dev - Name Generator
180
- A prompt-based name generator for game developers.""")
181
-
182
- with gr.Row():
183
- with gr.Column():
184
- prompt = gr.Textbox(label="Prompt", value="a female character name", max_lines=1)
185
- with gr.Row():
186
- next_words_slider = gr.Slider(1, 100, step=1, label='Next Words', value=30)
187
- temperature_slider = gr.Slider(0.1, 1, value=0.5, label='Temperature', info='Controls randomness of generation, higher values = more creative, lower values = more probalistic')
188
- generate_button = gr.Button("Generate")
189
- with gr.Column():
190
- output_text = gr.Textbox(label="Generated Names", interactive=False)
191
- flag_button = gr.Button("Flag Output")
192
-
193
- gr.Markdown("""Dungen Dev is an experimental model, and may produce outputs that are inappropriate, biased, or potentially harmful and inaccurate. Caution is advised.""")
194
-
195
- generate_button.click(
196
- fn=generate_text,
197
- inputs=[prompt, next_words_slider, temperature_slider],
198
- outputs=output_text
199
- )
200
- flag_button.click(flag_output, inputs=output_text, outputs=gr.Textbox(label="Flag Status", interactive=False))
201
-
202
- demo.examples=[
203
  ["a male character name", 30, 0.5],
204
  ["a futuristic city name", 30, 0.5],
205
  ["an item name", 30, 0.5],
206
  ["a dark and mysterious forest name", 30, 0.5],
207
  ["an evil character name", 30, 0.5]
208
  ]
 
209
 
210
  demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import sentencepiece as spm
2
  import numpy as np
3
  import tensorflow as tf
4
  from tensorflow.keras.preprocessing.sequence import pad_sequences
5
  from valx import detect_profanity, detect_hate_speech
6
  import gradio as gr
 
 
 
 
 
7
 
 
8
  sp = spm.SentencePieceProcessor()
9
  sp.Load("dungen_dev_preview.model")
 
 
 
 
10
 
11
+ model = tf.keras.models.load_model("dungen_dev_preview_model.keras")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ max_seq_len = 25
14
 
15
  def generate_text(seed_text, next_words=30, temperature=0.5):
16
  seed_text = seed_text.strip().lower()
 
28
  seed_text += ' | '
29
 
30
  generated_text = seed_text
31
+ if generated_text != 'game name | ': # only generate if not the default prompt
32
  for _ in range(next_words):
33
  token_list = sp.encode_as_ids(generated_text)
34
  token_list = pad_sequences([token_list], maxlen=max_seq_len - 1, padding='pre')
 
58
 
59
  return decoded
60
 
61
+ demo = gr.Interface(
62
+ fn=generate_text,
63
+ inputs=[
64
+ gr.Textbox(label="Prompt", value="a female character name", max_lines=1),
65
+ gr.Slider(1, 100, step=1, label='Next Words', value=30),
66
+ gr.Slider(0.1, 1, value=0.5, label='Temperature', info='Controls randomness of generation, higher values = more creative, lower values = more probalistic')
67
+ ],
68
+ outputs=gr.Textbox(label="Generated Names"),
69
+ title='Dungen Dev - Name Generator',
70
+ description='A prompt-based name generator for game developers. Dungen Dev is an experimental model, and may produce outputs that are inappropriate, biased, or potentially harmful and inaccurate. Caution is advised.',
71
+ examples=[
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  ["a male character name", 30, 0.5],
73
  ["a futuristic city name", 30, 0.5],
74
  ["an item name", 30, 0.5],
75
  ["a dark and mysterious forest name", 30, 0.5],
76
  ["an evil character name", 30, 0.5]
77
  ]
78
+ )
79
 
80
  demo.launch()