File size: 6,285 Bytes
517984d
bf713b8
 
 
 
 
 
 
 
 
 
 
 
 
 
517984d
 
 
 
bf713b8
 
 
 
 
 
 
 
 
 
b4ff959
8069d6f
 
 
 
420d0a9
bf713b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4ff959
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf713b8
b4ff959
bf713b8
 
 
 
 
 
 
 
 
 
 
b4ff959
bf713b8
b4ff959
 
 
bf713b8
 
 
b4ff959
 
 
 
 
 
bf713b8
 
 
 
 
 
b4ff959
 
e6fa3be
bf713b8
 
 
e6fa3be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# app.py (corrected version)

# Handle missing dependencies first
try:
    import gradio as gr
    from transformers import LlamaForCausalLM, LlamaTokenizer, Trainer, TrainingArguments
    import datasets
    import torch
    import json
    import os
    from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
    from accelerate import Accelerator
    import bitsandbytes
except ImportError as e:
    missing_package = str(e).split("'")[-2]  # Extract the missing package name
    if "accelerate" in missing_package:
        os.system(f'pip install "accelerate>=0.26.0"')
    else:
        os.system(f'pip install "{missing_package}"')
    # Re-import after installation
    import gradio as gr
    from transformers import LlamaForCausalLM, LlamaTokenizer, Trainer, TrainingArguments
    import datasets
    import torch
    import json
    import os
    from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
    from accelerate import Accelerator
    import bitsandbytes

# Add before loading model
from huggingface_hub import login
login()  # Will prompt for token or use HF_TOKEN env variable

# Model setup
MODEL_ID = "meta-llama/Llama-2-7b-hf"  # Use Llama-2-7b; switch to "meta-llama/Llama-3-8b-hf" for Llama 3
tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID)

# Add padding token if it doesn't exist (required for Llama models)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Check if CUDA is available to enable Flash Attention 2
use_flash_attention = torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8  # Ampere or newer (e.g., A100)

# Load the model with optimizations for Llama
model = LlamaForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,  # Better for A100 GPUs, falls back to float16 on CPU
    device_map="auto",
    use_flash_attention_2=use_flash_attention,  # Only enable if GPU supports it
    load_in_8bit=True  # Quantization for memory efficiency
)

# Prepare the model for training with LoRA (more memory-efficient)
model = prepare_model_for_kbit_training(model)

# LoRA configuration
peft_config = LoraConfig(
    r=16,               # Rank
    lora_alpha=32,      # Alpha
    lora_dropout=0.05,  # Dropout
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]  # Attention modules for Llama
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()  # Print percentage of trainable parameters

# Function to process uploaded JSON and train
def train_ui_tars(file):
    try:
        # Step 1: Load and preprocess the uploaded JSON file
        with open(file.name, "r", encoding="utf-8") as f:
            raw_data = json.load(f)
       
        # Extract training pairs or use flat structure
        training_data = raw_data.get("training_pairs", raw_data)
       
        # Save fixed JSON to avoid issues
        fixed_json_path = "fixed_fraud_data.json"
        with open(fixed_json_path, "w", encoding="utf-8") as f:
            json.dump(training_data, f, indent=4)
       
        # Load dataset
        dataset = datasets.load_dataset("json", data_files=fixed_json_path)
       
        # Step 2: Tokenize dataset with Llama-compatible context length
        def tokenize_data(example):
            # Format input for Llama (instruction-following style)
            formatted_text = f"<s>[INST] {example['input']} [/INST] {example['output']}</s>"
            inputs = tokenizer(
                formatted_text,
                padding="max_length",
                truncation=True,
                max_length=2048,  # Llama 2 context length; adjust to 8192 for Llama 3 if needed
                return_tensors="pt"
            )
            inputs["labels"] = inputs["input_ids"].clone()
            return {k: v.squeeze(0) for k, v in inputs.items()}
       
        tokenized_dataset = dataset["train"].map(tokenize_data, batched=True, remove_columns=dataset["train"].column_names)
       
        # Step 3: Training setup
        training_args = TrainingArguments(
            output_dir="./fine_tuned_llama",
            per_device_train_batch_size=4,  # Increased for better efficiency
            gradient_accumulation_steps=8,  # To handle larger effective batch size
            evaluation_strategy="no",
            save_strategy="epoch",
            save_total_limit=2,
            num_train_epochs=3,
            learning_rate=2e-5,
            weight_decay=0.01,
            logging_dir="./logs",
            logging_steps=10,
            bf16=True,  # Use bfloat16 for A100 GPUs, falls back to float16 on CPU
            gradient_checkpointing=True,  # Memory optimization
            optim="adamw_torch",
            warmup_steps=100,
        )
       
        # Custom data collator for Llama (corrected)
        def custom_data_collator(features):
            batch = {
                "input_ids": torch.stack([f["input_ids"] for f in features]),
                "attention_mask": torch.stack([f["attention_mask"] for f in features]),
                "labels": torch.stack([f["labels"] for f in features]),
            }
            return batch

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=tokenized_dataset,
            data_collator=custom_data_collator,
        )
       
        # Step 4: Start training
        trainer.train()
       
        # Step 5: Save the model
        model.save_pretrained("./fine_tuned_llama")
        tokenizer.save_pretrained("./fine_tuned_llama")
       
        return "Training completed successfully! Model saved to ./fine_tuned_llama"
   
    except Exception as e:
        return f"Error: {str(e)}"

# Gradio UI
with gr.Blocks(title="Model Fine-Tuning Interface") as demo:
    gr.Markdown("# Llama Fraud Detection Fine-Tuning UI")
    gr.Markdown("Upload a JSON file with 'input' and 'output' pairs to fine-tune the Llama model on your fraud dataset.")
   
    file_input = gr.File(label="Upload Fraud Dataset (JSON)")
    train_button = gr.Button("Start Fine-Tuning")
    output = gr.Textbox(label="Training Status")
   
    train_button.click(fn=train_ui_tars, inputs=file_input, outputs=output)

demo.launch()