Update app.py
Browse files
app.py
CHANGED
@@ -15,9 +15,9 @@ def get_distinct_colors(n):
|
|
15 |
colors.append(color)
|
16 |
return colors
|
17 |
|
18 |
-
def tokenize_text(hf_model_id, text):
|
19 |
try:
|
20 |
-
tokenizer = AutoTokenizer.from_pretrained(hf_model_id)
|
21 |
tokens = tokenizer.tokenize(text)
|
22 |
token_count = len(tokens)
|
23 |
colors = get_distinct_colors(token_count)
|
@@ -35,6 +35,7 @@ demo = gr.Interface(
|
|
35 |
fn=tokenize_text,
|
36 |
inputs=[
|
37 |
gr.Textbox(label="Hugging Face Model ID", placeholder="unsloth/gemma-3-27b-it", value="unsloth/gemma-3-27b-it"),
|
|
|
38 |
gr.Textbox(label="Text to Tokenize", lines=5, placeholder="Enter your text here...")
|
39 |
],
|
40 |
outputs=[
|
@@ -42,7 +43,7 @@ demo = gr.Interface(
|
|
42 |
gr.HTML(label="Tokens", container=True, show_label=True)
|
43 |
],
|
44 |
title="HuggingFace Tokenizer",
|
45 |
-
description="Enter a HuggingFace model ID and text to see how it gets tokenized.",
|
46 |
allow_flagging="never"
|
47 |
)
|
48 |
|
|
|
15 |
colors.append(color)
|
16 |
return colors
|
17 |
|
18 |
+
def tokenize_text(hf_model_id, text, hf_token):
|
19 |
try:
|
20 |
+
tokenizer = AutoTokenizer.from_pretrained(hf_model_id, token=hf_token)
|
21 |
tokens = tokenizer.tokenize(text)
|
22 |
token_count = len(tokens)
|
23 |
colors = get_distinct_colors(token_count)
|
|
|
35 |
fn=tokenize_text,
|
36 |
inputs=[
|
37 |
gr.Textbox(label="Hugging Face Model ID", placeholder="unsloth/gemma-3-27b-it", value="unsloth/gemma-3-27b-it"),
|
38 |
+
gr.Textbox(label="Hugging Face Token (optional)", type="password", placeholder="hf_..."),
|
39 |
gr.Textbox(label="Text to Tokenize", lines=5, placeholder="Enter your text here...")
|
40 |
],
|
41 |
outputs=[
|
|
|
43 |
gr.HTML(label="Tokens", container=True, show_label=True)
|
44 |
],
|
45 |
title="HuggingFace Tokenizer",
|
46 |
+
description="Enter a HuggingFace model ID and text to see how it gets tokenized. Provide a HuggingFace token if the model is gated.",
|
47 |
allow_flagging="never"
|
48 |
)
|
49 |
|