AkashDataScience commited on
Commit
b216aad
·
1 Parent(s): 812bb26

decoding tokens

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -6,11 +6,11 @@ tokenizer = BPETokenizer()
6
  tokenizer.load('models/english_5000.model')
7
 
8
  def inference(input_text):
9
- tokens = tokenizer.encode_ordinary(input_text)
10
-
11
- return len(tokens), tokens
12
 
13
- title = "A bilingual tokenizer build using opus and wikipedia data"
14
  description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
15
  examples = [["He walked into the basement with the horror movie from the night before playing in his head."],
16
  ["Henry couldn't decide if he was an auto mechanic or a priest."],
@@ -23,7 +23,8 @@ demo = gr.Interface(
23
  ],
24
  outputs = [
25
  gr.Label(label="Token count"),
26
- gr.Textbox(label="Output", type="text")
 
27
  ],
28
  title = title,
29
  description = description,
 
6
  tokenizer.load('models/english_5000.model')
7
 
8
  def inference(input_text):
9
+ encoding = tokenizer.encode_ordinary(input_text)
10
+ sentence = tokenizer.decode(encoding)
11
+ return len(encoding), sentence, encoding
12
 
13
+ title = "Bilingual tokenizer"
14
  description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
15
  examples = [["He walked into the basement with the horror movie from the night before playing in his head."],
16
  ["Henry couldn't decide if he was an auto mechanic or a priest."],
 
23
  ],
24
  outputs = [
25
  gr.Label(label="Token count"),
26
+ gr.Textbox(label="Sentence after tokenization", type="text"),
27
+ gr.Textbox(label="Encoding", type="text")
28
  ],
29
  title = title,
30
  description = description,