Spaces:
Sleeping
Sleeping
Commit
·
e40da07
1
Parent(s):
4e2aa97
Added colors
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import torch
|
|
|
2 |
import gradio as gr
|
3 |
from language_bpe import BPETokenizer
|
4 |
|
@@ -8,7 +9,13 @@ tokenizer.load('models/english_5000.model')
|
|
8 |
def inference(input_text):
|
9 |
encoding = tokenizer.encode_ordinary(input_text)
|
10 |
sentence = [tokenizer.decode([x]) for x in encoding]
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
title = "Bilingual tokenizer"
|
14 |
description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
|
|
|
1 |
import torch
|
2 |
+
import random
|
3 |
import gradio as gr
|
4 |
from language_bpe import BPETokenizer
|
5 |
|
|
|
9 |
def inference(input_text):
|
10 |
encoding = tokenizer.encode_ordinary(input_text)
|
11 |
sentence = [tokenizer.decode([x]) for x in encoding]
|
12 |
+
color_sentence = ""
|
13 |
+
for word in sentence:
|
14 |
+
background_color = random.randint(40, 47)
|
15 |
+
color_sentence = f"\033[0;37;{background_color}m {word} "
|
16 |
+
color_sentence = color_sentence.strip()
|
17 |
+
|
18 |
+
return len(encoding), color_sentence, encoding
|
19 |
|
20 |
title = "Bilingual tokenizer"
|
21 |
description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
|