Spaces:
Sleeping
Sleeping
File size: 1,891 Bytes
41dfb3a e40da07 41dfb3a 5581e99 41dfb3a 5581e99 035182f 7d4468d 49c6fa3 7d4468d 41dfb3a af1568e 41dfb3a 5581e99 41dfb3a 812bb26 f049fd3 2f70820 41dfb3a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import torch
import random
import gradio as gr
from language_bpe import BPETokenizer
hinglish_tokenizer = BPETokenizer()
hinglish_tokenizer.load('models/hinglish_5000.model')
hindi_tokenizer = BPETokenizer()
hindi_tokenizer.load('models/hindi_5000.model')
english_tokenizer = BPETokenizer()
english_tokenizer.load('models/english_5000.model')
tokenizer_dict = {"Hinglish_5k": hinglish_tokenizer, "Hindi_5k": hindi_tokenizer, "English_5k": english_tokenizer}
def inference(input_text, tokenizer):
encoding = tokenizer_dict[tokenizer].encode_ordinary(input_text)
sentence = [tokenizer_dict[tokenizer].decode([x]) for x in encoding]
color_sentence = []
color_encoding = []
for word, encode in zip(sentence, encoding):
color_sentence.append((word, str(encode)))
color_encoding.append((str(encode), str(encode)))
return len(encoding), color_sentence, color_encoding
title = "Bilingual Tokenizer"
description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
examples = [["He walked into the basement with the horror movie from the night before playing in his head."],
["Henry couldn't decide if he was an auto mechanic or a priest."],
["Poison ivy grew through the fence they said was impenetrable."],
]
demo = gr.Interface(
inference,
inputs = [
gr.Textbox(label="Enter any sentence in Hindi, English or both language", type="text"),
gr.Dropdown(label="Tokenizer", choices=["Hinglish_5k", "Hindi_5k", "English_5k"], value="Hinglish_5k")
],
outputs = [
gr.Label(label="Token count"),
gr.HighlightedText(label="Sentence", show_inline_category=False),
gr.HighlightedText(label="Encoding", show_inline_category=False)
],
title = title,
description = description,
examples = examples,
)
demo.launch() |