AkashDataScience commited on
Commit
812bb26
·
1 Parent(s): 1287e6c

Adding token length

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -8,7 +8,7 @@ tokenizer.load('models/english_5000.model')
8
  def inference(input_text):
9
  tokens = tokenizer.encode_ordinary(input_text)
10
 
11
- return tokens
12
 
13
  title = "A bilingual tokenizer build using opus and wikipedia data"
14
  description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
@@ -22,6 +22,7 @@ demo = gr.Interface(
22
  gr.Textbox(label="Enter any sentence in Hindi, English or both language", type="text"),
23
  ],
24
  outputs = [
 
25
  gr.Textbox(label="Output", type="text")
26
  ],
27
  title = title,
 
8
  def inference(input_text):
9
  tokens = tokenizer.encode_ordinary(input_text)
10
 
11
+ return len(tokens), tokens
12
 
13
  title = "A bilingual tokenizer build using opus and wikipedia data"
14
  description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
 
22
  gr.Textbox(label="Enter any sentence in Hindi, English or both language", type="text"),
23
  ],
24
  outputs = [
25
+ gr.Label(label="Token count"),
26
  gr.Textbox(label="Output", type="text")
27
  ],
28
  title = title,