AkashDataScience commited on
Commit
f049fd3
·
1 Parent(s): 7d4468d

Minor change

Browse files
Files changed (2) hide show
  1. app.py +2 -2
  2. language_bpe/bpe_tokenizer.py +1 -1
app.py CHANGED
@@ -29,8 +29,8 @@ demo = gr.Interface(
29
  ],
30
  outputs = [
31
  gr.Label(label="Token count"),
32
- gr.HighlightedText(label="Sentence after tokenization", show_inline_category=False),
33
- gr.Textbox(label="Encoding", type="text")
34
  ],
35
  title = title,
36
  description = description,
 
29
  ],
30
  outputs = [
31
  gr.Label(label="Token count"),
32
+ gr.HighlightedText(label="Sentence", show_inline_category=False),
33
+ gr.HighlightedText(label="Encoding", tshow_inline_category=False)
34
  ],
35
  title = title,
36
  description = description,
language_bpe/bpe_tokenizer.py CHANGED
@@ -42,7 +42,7 @@ class BPETokenizer(Tokenizer):
42
  vocab.update({idx: bytes(list(chr(value).encode('utf-8'))) for idx,value in zip(range(256, 384), range(2304, 2432))})
43
 
44
  print("Merging hindi characters in single token")
45
- for index in tqdm(range(256, 384)):
46
  pair = list(vocab[index])
47
  ids = [merge_hindi(chunk_ids, pair, index) for chunk_ids in ids]
48
 
 
42
  vocab.update({idx: bytes(list(chr(value).encode('utf-8'))) for idx,value in zip(range(256, 384), range(2304, 2432))})
43
 
44
  print("Merging hindi characters in single token")
45
+ for index in range(256, 384):
46
  pair = list(vocab[index])
47
  ids = [merge_hindi(chunk_ids, pair, index) for chunk_ids in ids]
48