Spaces:
Runtime error
Runtime error
| from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling | |
| from datasets import load_dataset | |
| tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2") | |
| model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2") | |
| tokenizer.pad_token = tokenizer.eos_token | |
| dataset = load_dataset("HuggingFaceH4/ultrachat_200k") | |
| dataset = dataset['train_sft'].select(range(20)) | |
| def tokenize_function(examples): | |
| return tokenizer(examples["prompt"], padding="max_length", truncation=True) | |
| td = dataset.map(tokenize_function, batched=True) | |
| training_args = TrainingArguments( | |
| output_dir="./output", | |
| per_device_train_batch_size=4, | |
| num_train_epochs=3, | |
| logging_dir="./logs", | |
| ) | |
| data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) | |
| """ | |
| dataloader_config = DataLoaderConfiguration( | |
| dispatch_batches=None, | |
| split_batches=False, | |
| even_batches=True, | |
| use_seedable_sampler=True | |
| ) | |
| accelerator = Accelerator(dataloader_config=dataloader_config) | |
| with accelerator.prepare(): | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| data_collator=data_collator, | |
| train_dataset=td, | |
| ) | |
| trainer.train() | |
| trainer.save_model("fine_tuned_gpt2") | |
| """ | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| data_collator=data_collator, | |
| train_dataset=td, | |
| ) | |
| trainer.train() | |
| trainer.save_model("fine_tuned_gpt2") |