#!/usr/bin/env python3
"""
Fine-tuning script for SmolLM2-135M model using Unsloth.

This script demonstrates how to:
1. Install and configure Unsloth
2. Prepare and format training data
3. Configure and run the training process
4. Save and evaluate the model

To run this script:
1. Install dependencies: pip install -r requirements.txt
2. Run: python train.py
"""

import os
from typing import Union

from datasets import (
    Dataset,
    DatasetDict,
    IterableDataset,
    IterableDatasetDict,
    load_dataset,
)
from transformers import AutoTokenizer, Trainer, TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel, is_bfloat16_supported
from unsloth.chat_templates import get_chat_template

# Configuration
max_seq_length = 2048  # Auto supports RoPE Scaling internally
dtype = (
    None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
)
load_in_4bit = True  # Use 4bit quantization to reduce memory usage

# def install_dependencies():
#     """Install required dependencies."""
#     os.system('pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"')
#     os.system('pip install --no-deps xformers trl peft accelerate bitsandbytes')


def load_model() -> tuple[FastLanguageModel, AutoTokenizer]:
    """Load and configure the model."""
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name="unsloth/SmolLM2-135M-Instruct-bnb-4bit",
        max_seq_length=max_seq_length,
        dtype=dtype,
        load_in_4bit=load_in_4bit,
    )

    # Configure LoRA
    model = FastLanguageModel.get_peft_model(
        model,
        r=64,
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ],
        lora_alpha=128,
        lora_dropout=0.05,
        bias="none",
        use_gradient_checkpointing="unsloth",
        random_state=3407,
        use_rslora=True,
        loftq_config=None,
    )

    return model, tokenizer


def load_and_format_dataset(
    tokenizer: AutoTokenizer,
) -> tuple[
    Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset], AutoTokenizer
]:
    """Load and format the training dataset."""
    # Load the code-act dataset
    dataset = load_dataset("xingyaoww/code-act", split="codeact")

    # Configure chat template
    tokenizer = get_chat_template(
        tokenizer,
        chat_template="chatml",  # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
        mapping={
            "role": "from",
            "content": "value",
            "user": "human",
            "assistant": "gpt",
        },  # ShareGPT style
        map_eos_token=True,  # Maps <|im_end|> to </s> instead
    )

    def formatting_prompts_func(examples):
        convos = examples["conversations"]
        texts = [
            tokenizer.apply_chat_template(
                convo, tokenize=False, add_generation_prompt=False
            )
            for convo in convos
        ]
        return {"text": texts}

    # Apply formatting to dataset
    dataset = dataset.map(formatting_prompts_func, batched=True)

    return dataset, tokenizer


def create_trainer(
    model: FastLanguageModel,
    tokenizer: AutoTokenizer,
    dataset: Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset],
) -> Trainer:
    """Create and configure the SFTTrainer."""
    return SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=dataset,
        dataset_text_field="text",
        max_seq_length=max_seq_length,
        dataset_num_proc=2,
        packing=False,
        args=TrainingArguments(
            per_device_train_batch_size=2,
            gradient_accumulation_steps=16,
            warmup_steps=100,
            max_steps=120,
            learning_rate=5e-5,
            fp16=not is_bfloat16_supported(),
            bf16=is_bfloat16_supported(),
            logging_steps=1,
            optim="adamw_8bit",
            weight_decay=0.01,
            lr_scheduler_type="cosine_with_restarts",
            seed=3407,
            output_dir="outputs",
            gradient_checkpointing=True,
            save_strategy="steps",
            save_steps=30,
            save_total_limit=2,
        ),
    )


def main():
    """Main training function."""
    # Install dependencies
    # install_dependencies()

    # Load model and tokenizer
    model, tokenizer = load_model()

    # Load and prepare dataset
    dataset, tokenizer = load_and_format_dataset(tokenizer)

    # Create trainer
    trainer: Trainer = create_trainer(model, tokenizer, dataset)

    # Train
    trainer.train()

    # Save model
    trainer.save_model("final_model")


if __name__ == "__main__":
    main()