Spaces:

AkashDataScience
/

languageBPE

Sleeping

AkashDataScience commited on Jun 26, 2024

Commit

035182f

1 Parent(s): af1568e

Adding colors

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,12 +8,10 @@ tokenizer.load('models/english_5000.model')
 def inference(input_text):
     encoding = tokenizer.encode_ordinary(input_text)
-    # sentence = [tokenizer.decode([x]) for x in encoding]
-    # color_sentence = ""
-    # for word in sentence:
-    #     background_color = random.randint(40, 47)
-    #     color_sentence += f"\033[0;37;{background_color}m {word}"
-    color_sentence = "\033[0;37;41m Black"
     return len(encoding), color_sentence, encoding
 title = "Bilingual Tokenizer"
@@ -29,7 +27,7 @@ demo = gr.Interface(
         ],
     outputs = [
         gr.Label(label="Token count"),
-        gr.Textbox(label="Sentence after tokenization", type="text"),
         gr.Textbox(label="Encoding", type="text")
         ],
     title = title,

 def inference(input_text):
     encoding = tokenizer.encode_ordinary(input_text)
+    sentence = [tokenizer.decode([x]) for x in encoding]
+    color_sentence = []
+    for word in sentence:
+        color_sentence.append((word, random.randint(0, 7)))
     return len(encoding), color_sentence, encoding
 title = "Bilingual Tokenizer"
         ],
     outputs = [
         gr.Label(label="Token count"),
+        gr.HighlightedText(label="Sentence after tokenization"),
         gr.Textbox(label="Encoding", type="text")
         ],
     title = title,