File size: 346 Bytes
90c8b51 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
from datasets import load_dataset
from transformers import AutoTokenizer
def get_tokenizer(model_name="bert-base-uncased", max_len=128):
token = AutoTokenizer.from_pretrained(model_name)
token.model_max_length = max_len
return token
def load(tokenizer, split="validation"):
ds = load_dataset("glue", "stsb", split=split)
|