File size: 11,335 Bytes
bd71161
 
 
 
 
dc6db5b
53200cd
975a0e4
bd71161
fafc478
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
608d8aa
 
 
782b343
608d8aa
 
 
 
 
 
 
 
 
 
1435559
 
 
 
 
608d8aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51b72f7
 
608d8aa
c9fc99e
 
608d8aa
 
 
 
 
975a0e4
b16c88e
608d8aa
d4c1f23
5cdd7c2
1c7206a
 
5cdd7c2
1c7206a
 
 
 
 
 
 
 
 
5cdd7c2
1c7206a
5cdd7c2
d4c1f23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fafc478
b16c88e
 
 
 
 
 
 
 
 
 
 
 
 
20628b5
b16c88e
d4c1f23
fafc478
d4c1f23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fafc478
20628b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4c1f23
 
d0d758e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b50994
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4c1f23
 
d0d758e
d4c1f23
 
14879e1
d4c1f23
 
fafc478
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
import os
import random
import gradio as gr
import sentencepiece as spm
import numpy as np
import pandas as pd
import tensorflow as tf
from valx import detect_profanity, detect_hate_speech

def custom_pad_sequences(sequences, maxlen, padding='pre', value=0):
    """
    Pads sequences to the same length.

    :param sequences: List of lists, where each element is a sequence.
    :param maxlen: Maximum length of all sequences.
    :param padding: 'pre' or 'post', pad either before or after each sequence.
    :param value: Float, padding value.
    :return: Numpy array with dimensions (number_of_sequences, maxlen)
    """
    padded_sequences = np.full((len(sequences), maxlen), value)
    for i, seq in enumerate(sequences):
        if padding == 'pre':
            if len(seq) <= maxlen:
                padded_sequences[i, -len(seq):] = seq
            else:
                padded_sequences[i, :] = seq[-maxlen:]
        elif padding == 'post':
            if len(seq) <= maxlen:
                padded_sequences[i, :len(seq)] = seq
            else:
                padded_sequences[i, :] = seq[:maxlen]
    return padded_sequences

def generate_random_name(interpreter, vocab_size, sp, max_length=10, temperature=0.5, seed_text="", max_seq_len=12):
    # Get input and output tensors
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    decoded_name = ''

    if seed_text:
        generated_name = seed_text
    else:
        random_index = np.random.randint(1, vocab_size)
        random_token = sp.id_to_piece(random_index)
        generated_name = random_token

    for _ in range(max_length - 1):
        token_list = sp.encode_as_ids(generated_name)

        # Handle empty token list case
        if len(token_list) == 0:
            continue  # Skip the current iteration if the token list is empty
            
        # Pad to the correct length expected by the model
        token_list = custom_pad_sequences([token_list], maxlen=max_seq_len, padding='pre') 

        # Convert token_list to FLOAT32 before setting the tensor
        token_list = token_list.astype(np.float32)

        # Set the input tensor
        interpreter.set_tensor(input_details[0]['index'], token_list)

        # Run inference
        interpreter.invoke()

        # Get the output tensor
        predicted = interpreter.get_tensor(output_details[0]['index'])[0]

        # Apply temperature to predictions
        predicted = np.log(predicted + 1e-8) / temperature
        predicted = np.exp(predicted) / np.sum(np.exp(predicted))

        # Sample from the distribution
        next_index = np.random.choice(range(vocab_size), p=predicted)
        next_index = int(next_index)
        next_token = sp.id_to_piece(next_index)

        generated_name += next_token

        # Decode the generated subword tokens into a string
        decoded_name = sp.decode_pieces(generated_name.split())

        # Stop if end token is predicted (optional)
        if next_token == '' or len(decoded_name) > max_length:
            break

    decoded_name = decoded_name.replace("▁", " ")
    decoded_name = decoded_name.replace("</s>", "")
    decoded_name = decoded_name.replace("<unk>", "")
    decoded_name = decoded_name.replace("<s>", "")
    generated_name = decoded_name.rsplit(' ', 1)[0]
    if generated_name:
        generated_name = generated_name[0].upper() + generated_name[1:]

    # Split the name and check the last part
    parts = generated_name.split()
    if parts and len(parts[-1]) < 3:
        generated_name = " ".join(parts[:-1])
        
    return generated_name.strip()

def generateNames(type, amount, max_length=30, temperature=0.5, seed_text=""):
    hate_speech = detect_hate_speech(seed_text)
    profanity = detect_profanity([seed_text], language='All')
    output = ''
    
    if profanity > 0:
        gr.Warning("Profanity detected in the seed text, using an empty seed text.")
        seed_text = ''
    else:
        if hate_speech == ['Hate Speech']:
            gr.Warning('Hate speech detected in the seed text, using an empty seed text.')
            seed_text = ''
        elif hate_speech == ['Offensive Speech']:
            gr.Warning('Offensive speech detected in the seed text, using an empty seed text.')
            seed_text = ''
        # elif hate_speech == ['No Hate and Offensive Speech']:
    
    if type == "Terraria":
        max_seq_len = 12 # For skyrim = 13, for terraria = 12
        sp = spm.SentencePieceProcessor()
        sp.load("models/terraria_names.model")
        amount = int(amount)
        max_length = int(max_length)

        names = []

        # Define necessary variables
        vocab_size = sp.GetPieceSize()

        # Load TFLite model
        interpreter = tf.lite.Interpreter(model_path="models/dungen_terraria_model.tflite")
        interpreter.allocate_tensors()

        # Use the function to generate a name
        for _ in range(amount):
            generated_name = generate_random_name(interpreter, vocab_size, sp, seed_text=seed_text, max_length=max_length, temperature=temperature, max_seq_len=max_seq_len)
            stripped = generated_name.strip()
            hate_speech = detect_hate_speech(stripped)
            profanity = detect_profanity([stripped], language='All')
            name = ''
    
            if profanity > 0:
                name = "Profanity Detected"
            else:
                if hate_speech == ['Hate Speech']:
                    name = 'Hate Speech Detected'
                elif hate_speech == ['Offensive Speech']:
                    name = 'Offensive Speech Detected'
                elif hate_speech == ['No Hate and Offensive Speech']:
                    name = stripped
            names.append(name)
        return pd.DataFrame(names, columns=['Names'])

    elif type == "Skyrim":
        max_seq_len = 13 # For skyrim = 13, for terraria = 12
        sp = spm.SentencePieceProcessor()
        sp.load("models/skyrim_names.model")
        amount = int(amount)
        max_length = int(max_length)

        names = []

        # Define necessary variables
        vocab_size = sp.GetPieceSize()

        # Load TFLite model
        interpreter = tf.lite.Interpreter(model_path="models/dungen_skyrim_model.tflite")
        interpreter.allocate_tensors()

        # Use the function to generate a name
        for _ in range(amount):
            generated_name = generate_random_name(interpreter, vocab_size, sp, seed_text=seed_text, max_length=max_length, temperature=temperature, max_seq_len=max_seq_len)
            stripped = generated_name.strip()
            hate_speech = detect_hate_speech(stripped)
            profanity = detect_profanity([stripped], language='All')
            name = ''
    
            if profanity > 0:
                name = "Profanity Detected"
            else:
                if hate_speech == ['Hate Speech']:
                    name = 'Hate Speech Detected'
                elif hate_speech == ['Offensive Speech']:
                    name = 'Offensive Speech Detected'
                elif hate_speech == ['No Hate and Offensive Speech']:
                    name = stripped
            names.append(name)
        return pd.DataFrame(names, columns=['Names'])

    elif type == "Witcher":
        max_seq_len = 20 # For skyrim = 13, for terraria = 12
        sp = spm.SentencePieceProcessor()
        sp.load("models/witcher_names.model")
        amount = int(amount)
        max_length = int(max_length)

        names = []

        # Define necessary variables
        vocab_size = sp.GetPieceSize()

        # Load TFLite model
        interpreter = tf.lite.Interpreter(model_path="models/dungen_witcher_model.tflite")
        interpreter.allocate_tensors()

        # Use the function to generate a name
        for _ in range(amount):
            generated_name = generate_random_name(interpreter, vocab_size, sp, seed_text=seed_text, max_length=max_length, temperature=temperature, max_seq_len=max_seq_len)
            stripped = generated_name.strip()
            hate_speech = detect_hate_speech(stripped)
            profanity = detect_profanity([stripped], language='All')
            name = ''
    
            if profanity > 0:
                name = "Profanity Detected"
            else:
                if hate_speech == ['Hate Speech']:
                    name = 'Hate Speech Detected'
                elif hate_speech == ['Offensive Speech']:
                    name = 'Offensive Speech Detected'
                elif hate_speech == ['No Hate and Offensive Speech']:
                    name = stripped
            names.append(name)
        return pd.DataFrame(names, columns=['Names'])

    elif type == "Fantasy":
        max_seq_len = 16 # For fantasy, 16
        sp = spm.SentencePieceProcessor()
        sp.load("models/fantasy_names.model")
        amount = int(amount)
        max_length = int(max_length)

        names = []

        # Define necessary variables
        vocab_size = sp.GetPieceSize()

        # Load TFLite model
        interpreter = tf.lite.Interpreter(model_path="models/dungen_fantasy_model.tflite")
        interpreter.allocate_tensors()

        # Use the function to generate a name
        for _ in range(amount):
            generated_name = generate_random_name(interpreter, vocab_size, sp, seed_text=seed_text, max_length=max_length, temperature=temperature, max_seq_len=max_seq_len)
            stripped = generated_name.strip()
            hate_speech = detect_hate_speech(stripped)
            profanity = detect_profanity([stripped], language='All')
            name = ''
    
            if profanity > 0:
                name = "Profanity Detected"
            else:
                if hate_speech == ['Hate Speech']:
                    name = 'Hate Speech Detected'
                elif hate_speech == ['Offensive Speech']:
                    name = 'Offensive Speech Detected'
                elif hate_speech == ['No Hate and Offensive Speech']:
                    name = stripped
            names.append(name)
        return pd.DataFrame(names, columns=['Names'])

demo = gr.Interface(
    fn=generateNames,
    inputs=[gr.Radio(choices=["Terraria", "Skyrim", "Witcher", "Fantasy"], label="Choose a model for your request", value="Terraria"), gr.Slider(1,100, step=1, label='Amount of Names', info='How many names to generate, must be greater than 0'), gr.Slider(10, 60, value=30, step=1, label='Max Length', info='Max length of the generated word'), gr.Slider(0.1, 1, value=0.5, label='Temperature', info='Controls randomness of generation, higher values = more creative, lower values = more probalistic'), gr.Textbox('', label='Seed text (optional)', info='The starting text to begin with', max_lines=1, )],
    outputs=[gr.Dataframe(row_count = (2, "dynamic"), col_count=(1, "fixed"), label="Generated Names", headers=["Names"])],
    title='Dungen - Name Generator',
    description='A fun game-inspired name generator. For an example of how to create, and train your model, similar to this one, head over to: https://github.com/Infinitode/OPEN-ARC/tree/main/Project-5-TWNG. There you will find our base model, the dataset we used, and implementation code in the form of a Jupyter Notebook (exported from Kaggle).'
)

demo.launch()