TuringsSolutions commited on
Commit
162172f
·
verified ·
1 Parent(s): e802a83

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -0
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+
4
+ # Load the model once
5
+ model_name = "HuggingFaceTB/SmolLM-1.7B"
6
+ model = AutoModelForCausalLM.from_pretrained(model_name)
7
+
8
+ # Define a list of five different tokenizers to use
9
+ tokenizer_names = [
10
+ "HuggingFaceTB/SmolLM-1.7B", # Model's default tokenizer
11
+ "gpt2", # GPT-2 tokenizer
12
+ "distilbert-base-uncased", # DistilBERT tokenizer
13
+ "bert-base-uncased", # BERT tokenizer
14
+ "roberta-base" # RoBERTa tokenizer
15
+ ]
16
+
17
+ # Load all the tokenizers
18
+ tokenizers = {name: AutoTokenizer.from_pretrained(name) for name in tokenizer_names}
19
+
20
+ # Function to generate responses using different tokenizers
21
+ def generate_responses(prompt):
22
+ responses = {}
23
+ for name, tokenizer in tokenizers.items():
24
+ inputs = tokenizer(prompt, return_tensors="pt")
25
+ outputs = model.generate(**inputs)
26
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
27
+ responses[name] = response
28
+ return responses
29
+
30
+ # Gradio interface setup
31
+ interface = gr.Interface(
32
+ fn=generate_responses,
33
+ inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your prompt here..."),
34
+ outputs=gr.outputs.JSON(),
35
+ title="Tokenizer Comparison",
36
+ description="Compare model outputs with different tokenizers"
37
+ )
38
+
39
+ # Launch the Gradio interface
40
+ interface.launch()