Spaces:

AkashDataScience
/

languageBPE

Sleeping

AkashDataScience commited on Jun 26, 2024

Commit

e40da07

1 Parent(s): 4e2aa97

Added colors

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import torch
 import gradio as gr
 from language_bpe import BPETokenizer
@@ -8,7 +9,13 @@ tokenizer.load('models/english_5000.model')
 def inference(input_text):
     encoding = tokenizer.encode_ordinary(input_text)
     sentence = [tokenizer.decode([x]) for x in encoding]
-    return len(encoding), sentence, encoding
 title = "Bilingual tokenizer"
 description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"

 import torch
+import random
 import gradio as gr
 from language_bpe import BPETokenizer
 def inference(input_text):
     encoding = tokenizer.encode_ordinary(input_text)
     sentence = [tokenizer.decode([x]) for x in encoding]
+    color_sentence = ""
+    for word in sentence:
+        background_color = random.randint(40, 47)
+        color_sentence = f"\033[0;37;{background_color}m {word} "
+    color_sentence = color_sentence.strip()
+    return len(encoding), color_sentence, encoding
 title = "Bilingual tokenizer"
 description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"