Spaces:
Sleeping
Sleeping
Commit
·
035182f
1
Parent(s):
af1568e
Adding colors
Browse files
app.py
CHANGED
@@ -8,12 +8,10 @@ tokenizer.load('models/english_5000.model')
|
|
8 |
|
9 |
def inference(input_text):
|
10 |
encoding = tokenizer.encode_ordinary(input_text)
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
# color_sentence += f"\033[0;37;{background_color}m {word}"
|
16 |
-
color_sentence = "\033[0;37;41m Black"
|
17 |
return len(encoding), color_sentence, encoding
|
18 |
|
19 |
title = "Bilingual Tokenizer"
|
@@ -29,7 +27,7 @@ demo = gr.Interface(
|
|
29 |
],
|
30 |
outputs = [
|
31 |
gr.Label(label="Token count"),
|
32 |
-
gr.
|
33 |
gr.Textbox(label="Encoding", type="text")
|
34 |
],
|
35 |
title = title,
|
|
|
8 |
|
9 |
def inference(input_text):
|
10 |
encoding = tokenizer.encode_ordinary(input_text)
|
11 |
+
sentence = [tokenizer.decode([x]) for x in encoding]
|
12 |
+
color_sentence = []
|
13 |
+
for word in sentence:
|
14 |
+
color_sentence.append((word, random.randint(0, 7)))
|
|
|
|
|
15 |
return len(encoding), color_sentence, encoding
|
16 |
|
17 |
title = "Bilingual Tokenizer"
|
|
|
27 |
],
|
28 |
outputs = [
|
29 |
gr.Label(label="Token count"),
|
30 |
+
gr.HighlightedText(label="Sentence after tokenization"),
|
31 |
gr.Textbox(label="Encoding", type="text")
|
32 |
],
|
33 |
title = title,
|