Spaces:
Sleeping
Sleeping
Commit
·
7d4468d
1
Parent(s):
e9e4696
Updating colors
Browse files
app.py
CHANGED
@@ -10,9 +10,11 @@ def inference(input_text):
|
|
10 |
encoding = tokenizer.encode_ordinary(input_text)
|
11 |
sentence = [tokenizer.decode([x]) for x in encoding]
|
12 |
color_sentence = []
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
16 |
|
17 |
title = "Bilingual Tokenizer"
|
18 |
description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
|
|
|
10 |
encoding = tokenizer.encode_ordinary(input_text)
|
11 |
sentence = [tokenizer.decode([x]) for x in encoding]
|
12 |
color_sentence = []
|
13 |
+
color_encoding = []
|
14 |
+
for word, encode in zip(sentence, encoding):
|
15 |
+
color_sentence.append((word, str(encode)))
|
16 |
+
color_encoding.append((encode, str(encode)))
|
17 |
+
return len(encoding), color_sentence, color_encoding
|
18 |
|
19 |
title = "Bilingual Tokenizer"
|
20 |
description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
|