Spaces:

Lyte
/

Any-Tokenizer-Count

Sleeping

Lyte commited on Oct 8, 2024

Commit

2673358

verified ·

1 Parent(s): 7f14ca3

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+from transformers import AutoTokenizer
+from huggingface_hub import HfApi
+from gradio_huggingfacehub_search import HuggingfaceHubSearch
+def count_tokens(model_id, text):
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        tokens = tokenizer.encode(text)
+        token_count = len(tokens)
+        return f"Number of tokens: {token_count}"
+    except Exception as e:
+        return f"Error: {str(e)}"
+with gr.Blocks() as iface:
+    gr.Markdown("# Universal Tokenizer - Token Counter")
+    gr.Markdown("This app counts the number of tokens in the provided text using any tokenizer from a Hugging Face model.")
+    model_id = HuggingfaceHubSearch(
+        label="Select a model repo with a tokenizer",
+        placeholder="Search for a model on Hugging Face",
+        search_type="model",
+    )
+    text_input = gr.Textbox(lines=5, placeholder="Enter your text here...")
+    output = gr.Textbox(label="Result")
+    btn = gr.Button("Count Tokens")
+    btn.click(fn=count_tokens, inputs=[model_id, text_input], outputs=output)
+iface.launch()