Spaces:
Runtime error
Runtime error
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments | |
| from datasets import load_dataset | |
| import os | |
| class HuggingFaceHelper: | |
| def __init__(self, model_path="./merged_model", dataset_path=None): | |
| self.model_path = model_path | |
| self.dataset_path = dataset_path | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| self.model = AutoModelForCausalLM.from_pretrained(model_path).to(self.device) | |
| def load_dataset(self): | |
| if self.dataset_path: | |
| dataset = load_dataset("json", data_files=self.dataset_path, split="train") | |
| return dataset.map(self.tokenize_function, batched=True) | |
| raise ValueError("Dataset path not provided.") | |
| def tokenize_function(self, examples): | |
| return self.tokenizer(examples["messages"], truncation=True, padding="max_length", max_length=512) | |
| def fine_tune(self, output_dir="./fine_tuned_model", epochs=3, batch_size=4): | |
| dataset = self.load_dataset() | |
| training_args = TrainingArguments( | |
| output_dir=output_dir, | |
| evaluation_strategy="epoch", | |
| save_strategy="epoch", | |
| per_device_train_batch_size=batch_size, | |
| num_train_epochs=epochs, | |
| weight_decay=0.01, | |
| push_to_hub=True, | |
| hub_model_id="Raiff1982/codriao-finetuned" | |
| ) | |
| trainer = Trainer( | |
| model=self.model, | |
| args=training_args, | |
| train_dataset=dataset, | |
| tokenizer=self.tokenizer, | |
| ) | |
| trainer.train() | |
| self.save_model(output_dir) | |
| def save_model(self, output_dir): | |
| self.model.save_pretrained(output_dir) | |
| self.tokenizer.save_pretrained(output_dir) | |
| print(f"Γ’ΒΒ Model saved to {output_dir} and uploaded to Hugging Face Hub.") | |