Spaces:
Sleeping
Sleeping
Commit
·
b216aad
1
Parent(s):
812bb26
decoding tokens
Browse files
app.py
CHANGED
@@ -6,11 +6,11 @@ tokenizer = BPETokenizer()
|
|
6 |
tokenizer.load('models/english_5000.model')
|
7 |
|
8 |
def inference(input_text):
|
9 |
-
|
10 |
-
|
11 |
-
return len(
|
12 |
|
13 |
-
title = "
|
14 |
description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
|
15 |
examples = [["He walked into the basement with the horror movie from the night before playing in his head."],
|
16 |
["Henry couldn't decide if he was an auto mechanic or a priest."],
|
@@ -23,7 +23,8 @@ demo = gr.Interface(
|
|
23 |
],
|
24 |
outputs = [
|
25 |
gr.Label(label="Token count"),
|
26 |
-
gr.Textbox(label="
|
|
|
27 |
],
|
28 |
title = title,
|
29 |
description = description,
|
|
|
6 |
tokenizer.load('models/english_5000.model')
|
7 |
|
8 |
def inference(input_text):
|
9 |
+
encoding = tokenizer.encode_ordinary(input_text)
|
10 |
+
sentence = tokenizer.decode(encoding)
|
11 |
+
return len(encoding), sentence, encoding
|
12 |
|
13 |
+
title = "Bilingual tokenizer"
|
14 |
description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
|
15 |
examples = [["He walked into the basement with the horror movie from the night before playing in his head."],
|
16 |
["Henry couldn't decide if he was an auto mechanic or a priest."],
|
|
|
23 |
],
|
24 |
outputs = [
|
25 |
gr.Label(label="Token count"),
|
26 |
+
gr.Textbox(label="Sentence after tokenization", type="text"),
|
27 |
+
gr.Textbox(label="Encoding", type="text")
|
28 |
],
|
29 |
title = title,
|
30 |
description = description,
|