AkashDataScience commited on
Commit
e40da07
·
1 Parent(s): 4e2aa97

Added colors

Browse files
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import torch
 
2
  import gradio as gr
3
  from language_bpe import BPETokenizer
4
 
@@ -8,7 +9,13 @@ tokenizer.load('models/english_5000.model')
8
  def inference(input_text):
9
  encoding = tokenizer.encode_ordinary(input_text)
10
  sentence = [tokenizer.decode([x]) for x in encoding]
11
- return len(encoding), sentence, encoding
 
 
 
 
 
 
12
 
13
  title = "Bilingual tokenizer"
14
  description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
 
1
  import torch
2
+ import random
3
  import gradio as gr
4
  from language_bpe import BPETokenizer
5
 
 
9
  def inference(input_text):
10
  encoding = tokenizer.encode_ordinary(input_text)
11
  sentence = [tokenizer.decode([x]) for x in encoding]
12
+ color_sentence = ""
13
+ for word in sentence:
14
+ background_color = random.randint(40, 47)
15
+ color_sentence = f"\033[0;37;{background_color}m {word} "
16
+ color_sentence = color_sentence.strip()
17
+
18
+ return len(encoding), color_sentence, encoding
19
 
20
  title = "Bilingual tokenizer"
21
  description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"