Spaces:

AkashDataScience
/

languageBPE

Sleeping

AkashDataScience commited on Jun 26, 2024

Commit

812bb26

1 Parent(s): 1287e6c

Adding token length

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ tokenizer.load('models/english_5000.model')
 def inference(input_text):
     tokens = tokenizer.encode_ordinary(input_text)
-    return tokens
 title = "A bilingual tokenizer build using opus and wikipedia data"
 description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
@@ -22,6 +22,7 @@ demo = gr.Interface(
         gr.Textbox(label="Enter any sentence in Hindi, English or both language", type="text"),
         ],
     outputs = [
         gr.Textbox(label="Output", type="text")
         ],
     title = title,

 def inference(input_text):
     tokens = tokenizer.encode_ordinary(input_text)
+    return len(tokens), tokens
 title = "A bilingual tokenizer build using opus and wikipedia data"
 description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
         gr.Textbox(label="Enter any sentence in Hindi, English or both language", type="text"),
         ],
     outputs = [
+        gr.Label(label="Token count"),
         gr.Textbox(label="Output", type="text")
         ],
     title = title,