Spaces:
Sleeping
Sleeping
Commit
·
f049fd3
1
Parent(s):
7d4468d
Minor change
Browse files- app.py +2 -2
- language_bpe/bpe_tokenizer.py +1 -1
app.py
CHANGED
@@ -29,8 +29,8 @@ demo = gr.Interface(
|
|
29 |
],
|
30 |
outputs = [
|
31 |
gr.Label(label="Token count"),
|
32 |
-
gr.HighlightedText(label="Sentence
|
33 |
-
gr.
|
34 |
],
|
35 |
title = title,
|
36 |
description = description,
|
|
|
29 |
],
|
30 |
outputs = [
|
31 |
gr.Label(label="Token count"),
|
32 |
+
gr.HighlightedText(label="Sentence", show_inline_category=False),
|
33 |
+
gr.HighlightedText(label="Encoding", tshow_inline_category=False)
|
34 |
],
|
35 |
title = title,
|
36 |
description = description,
|
language_bpe/bpe_tokenizer.py
CHANGED
@@ -42,7 +42,7 @@ class BPETokenizer(Tokenizer):
|
|
42 |
vocab.update({idx: bytes(list(chr(value).encode('utf-8'))) for idx,value in zip(range(256, 384), range(2304, 2432))})
|
43 |
|
44 |
print("Merging hindi characters in single token")
|
45 |
-
for index in
|
46 |
pair = list(vocab[index])
|
47 |
ids = [merge_hindi(chunk_ids, pair, index) for chunk_ids in ids]
|
48 |
|
|
|
42 |
vocab.update({idx: bytes(list(chr(value).encode('utf-8'))) for idx,value in zip(range(256, 384), range(2304, 2432))})
|
43 |
|
44 |
print("Merging hindi characters in single token")
|
45 |
+
for index in range(256, 384):
|
46 |
pair = list(vocab[index])
|
47 |
ids = [merge_hindi(chunk_ids, pair, index) for chunk_ids in ids]
|
48 |
|