from datasets import load_dataset from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments # Load dataset from Hugging Face Hub dataset = load_dataset("pathii/css_design_snippets") # Load pre-trained model and tokenizer model_name = "TinyLlama/TinyLlama_v1.1" model = AutoModelForCausalLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # Tokenize dataset def tokenize_function(example): return tokenizer(example["input"], truncation=True) tokenized_datasets = dataset.map(tokenize_function, batched=True) # Define training arguments training_args = TrainingArguments( output_dir="./model", evaluation_strategy="epoch", learning_rate=2e-5, per_device_train_batch_size=8, per_device_eval_batch_size=8, num_train_epochs=3, weight_decay=0.01, save_total_limit=2, save_strategy="epoch" ) # Create Trainer trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_datasets["train"], eval_dataset=tokenized_datasets["validation"], ) # Start training trainer.train()