File size: 5,233 Bytes
ede3a61 ec5feeb ede3a61 ec5feeb ede3a61 ec5feeb ede3a61 ec5feeb ede3a61 ec5feeb ede3a61 ec5feeb dd041ea ec5feeb af813d9 ec5feeb df7598f ec5feeb ede3a61 ec5feeb ede3a61 ec5feeb ede3a61 ec5feeb ede3a61 ec5feeb ede3a61 ec5feeb ede3a61 ec5feeb ede3a61 ec5feeb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
#
# demo/app.py
#
import gradio as gr
from gradio_tokenizertextbox import TokenizerTextBox
import json
# --- Data and Helper Functions ---
TOKENIZER_OPTIONS = {
"Xenova/clip-vit-large-patch14": "CLIP ViT-L/14",
"Xenova/gpt-4": "gpt-4 / gpt-3.5-turbo / text-embedding-ada-002",
"Xenova/text-davinci-003": "text-davinci-003 / text-davinci-002",
"Xenova/gpt-3": "gpt-3",
"Xenova/grok-1-tokenizer": "Grok-1",
"Xenova/claude-tokenizer": "Claude",
"Xenova/mistral-tokenizer-v3": "Mistral v3",
"Xenova/mistral-tokenizer-v1": "Mistral v1",
"Xenova/gemma-tokenizer": "Gemma",
"Xenova/llama-3-tokenizer": "Llama 3",
"Xenova/llama-tokenizer": "LLaMA / Llama 2",
"Xenova/c4ai-command-r-v01-tokenizer": "Cohere Command-R",
"Xenova/t5-small": "T5",
"Xenova/bert-base-cased": "bert-base-cased",
}
dropdown_choices = [
(display_name, model_name)
for model_name, display_name in TOKENIZER_OPTIONS.items()
]
def process_output(tokenization_data):
"""
This function receives the full dictionary from the component.
"""
if not tokenization_data:
return {"status": "Waiting for input..."}
return tokenization_data
# --- Gradio Application ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
# --- Header and Information ---
gr.Markdown("# TokenizerTextBox Component Demo")
gr.Markdown("Component idea taken from the original example application on [Xenova Tokenizer Playground](https://github.com/huggingface/transformers.js-examples/tree/main/the-tokenizer-playground)")
# --- Global Controls (affect both tabs) ---
with gr.Row():
model_selector = gr.Dropdown(
label="Select a Tokenizer",
choices=dropdown_choices,
value="Xenova/clip-vit-large-patch14",
)
display_mode_radio = gr.Radio(
["text", "token_ids", "hidden"],
label="Display Mode",
value="text"
)
# --- Tabbed Interface for Different Modes ---
with gr.Tabs():
# --- Tab 1: Standalone Mode ---
with gr.TabItem("Standalone Mode"):
gr.Markdown("### In this mode, the component acts as its own interactive textbox.")
gr.Markdown("<span>💻 <a href='https://github.com/DEVAIEXP/gradio_component_tokenizertextbox'>GitHub Code</a></span>")
standalone_tokenizer = TokenizerTextBox(
label="Type your text here",
value="Gradio is an awesome tool for building ML demos!",
model="Xenova/clip-vit-large-patch14",
display_mode="text",
preview_tokens=True
)
standalone_output = gr.JSON(label="Component Output")
standalone_tokenizer.change(process_output, standalone_tokenizer, standalone_output)
# --- Tab 2: Listener ("Push") Mode ---
with gr.TabItem("Listener Mode"):
gr.Markdown("### In this mode, the component is a read-only visualizer for other text inputs.")
with gr.Row():
prompt_1 = gr.Textbox(label="Prompt Part 1", value="A photorealistic image of an astronaut")
prompt_2 = gr.Textbox(label="Prompt Part 2", value="riding a horse on Mars")
visualizer = TokenizerTextBox(
label="Concatenated Prompt Visualization",
hide_input=True, # Hides the internal textbox
model="Xenova/clip-vit-large-patch14",
display_mode="text",
)
visualizer_output = gr.JSON(label="Visualizer Component Output")
# --- "Push" Logic ---
def update_visualizer_text(p1, p2):
concatenated_text = f"{p1}, {p2}"
# Return a new value for the visualizer.
# The postprocess method will correctly handle this string.
return gr.update(value=concatenated_text)
# Listen for changes on the source textboxes
prompt_1.change(update_visualizer_text, [prompt_1, prompt_2], visualizer)
prompt_2.change(update_visualizer_text, [prompt_1, prompt_2], visualizer)
# Also connect the visualizer to its own JSON output
visualizer.change(process_output, visualizer, visualizer_output)
# Run once on load to show the initial state
demo.load(update_visualizer_text, [prompt_1, prompt_2], visualizer)
# --- Link Global Controls to Both Components ---
# Create a list of all TokenizerTextBox components that need to be updated
all_tokenizers = [standalone_tokenizer, visualizer]
model_selector.change(
fn=lambda model: [gr.update(model=model) for _ in all_tokenizers],
inputs=model_selector,
outputs=all_tokenizers
)
display_mode_radio.change(
fn=lambda mode: [gr.update(display_mode=mode) for _ in all_tokenizers],
inputs=display_mode_radio,
outputs=all_tokenizers
)
if __name__ == '__main__':
demo.launch() |