Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# Load the model once | |
model_name = "HuggingFaceTB/SmolLM-1.7B" | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Define a list of five different tokenizers to use | |
tokenizer_names = [ | |
"HuggingFaceTB/SmolLM-1.7B", # Model's default tokenizer | |
"gpt2", # GPT-2 tokenizer | |
"distilbert-base-uncased", # DistilBERT tokenizer | |
"bert-base-uncased", # BERT tokenizer | |
"roberta-base" # RoBERTa tokenizer | |
] | |
# Load all the tokenizers | |
tokenizers = {name: AutoTokenizer.from_pretrained(name) for name in tokenizer_names} | |
def generate_responses(prompt, selected_tokenizers): | |
responses = {} | |
for name in selected_tokenizers: | |
tokenizer = tokenizers.get(name) | |
if tokenizer: | |
try: | |
inputs = tokenizer(prompt, return_tensors="pt") | |
outputs = model.generate(**inputs) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
responses[name] = response | |
except Exception as e: | |
responses[name] = f"Error: {str(e)}" | |
return responses | |
# Gradio interface setup with checkboxes for tokenizers | |
interface = gr.Interface( | |
fn=generate_responses, | |
inputs=[ | |
gr.Textbox(lines=2, placeholder="Enter your prompt here..."), | |
gr.CheckboxGroup(choices=tokenizer_names, label="Select tokenizers to use") | |
], | |
outputs=gr.JSON(), | |
title="Tokenizer Comparison", | |
description="Compare model outputs with different tokenizers" | |
) | |
# Launch the Gradio interface | |
interface.launch() |