File size: 6,161 Bytes
517984d bf713b8 517984d bf713b8 b4ff959 420d0a9 bf713b8 b4ff959 bf713b8 b4ff959 bf713b8 b4ff959 bf713b8 b4ff959 bf713b8 b4ff959 bf713b8 b4ff959 e6fa3be bf713b8 e6fa3be |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
# app.py (corrected version)
# Handle missing dependencies first
try:
import gradio as gr
from transformers import LlamaForCausalLM, LlamaTokenizer, Trainer, TrainingArguments
import datasets
import torch
import json
import os
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from accelerate import Accelerator
import bitsandbytes
except ImportError as e:
missing_package = str(e).split("'")[-2] # Extract the missing package name
if "accelerate" in missing_package:
os.system(f'pip install "accelerate>=0.26.0"')
else:
os.system(f'pip install "{missing_package}"')
# Re-import after installation
import gradio as gr
from transformers import LlamaForCausalLM, LlamaTokenizer, Trainer, TrainingArguments
import datasets
import torch
import json
import os
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from accelerate import Accelerator
import bitsandbytes
# Model setup
MODEL_ID = "meta-llama/Llama-2-7b-hf" # Use Llama-2-7b; switch to "meta-llama/Llama-3-8b-hf" for Llama 3
tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID)
# Add padding token if it doesn't exist (required for Llama models)
if tokenizer.pad_token is None:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
# Check if CUDA is available to enable Flash Attention 2
use_flash_attention = torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 # Ampere or newer (e.g., A100)
# Load the model with optimizations for Llama
model = LlamaForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16, # Better for A100 GPUs, falls back to float16 on CPU
device_map="auto",
use_flash_attention_2=use_flash_attention, # Only enable if GPU supports it
load_in_8bit=True # Quantization for memory efficiency
)
# Prepare the model for training with LoRA (more memory-efficient)
model = prepare_model_for_kbit_training(model)
# LoRA configuration
peft_config = LoraConfig(
r=16, # Rank
lora_alpha=32, # Alpha
lora_dropout=0.05, # Dropout
bias="none",
task_type="CAUSAL_LM",
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"] # Attention modules for Llama
)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters() # Print percentage of trainable parameters
# Function to process uploaded JSON and train
def train_ui_tars(file):
try:
# Step 1: Load and preprocess the uploaded JSON file
with open(file.name, "r", encoding="utf-8") as f:
raw_data = json.load(f)
# Extract training pairs or use flat structure
training_data = raw_data.get("training_pairs", raw_data)
# Save fixed JSON to avoid issues
fixed_json_path = "fixed_fraud_data.json"
with open(fixed_json_path, "w", encoding="utf-8") as f:
json.dump(training_data, f, indent=4)
# Load dataset
dataset = datasets.load_dataset("json", data_files=fixed_json_path)
# Step 2: Tokenize dataset with Llama-compatible context length
def tokenize_data(example):
# Format input for Llama (instruction-following style)
formatted_text = f"<s>[INST] {example['input']} [/INST] {example['output']}</s>"
inputs = tokenizer(
formatted_text,
padding="max_length",
truncation=True,
max_length=2048, # Llama 2 context length; adjust to 8192 for Llama 3 if needed
return_tensors="pt"
)
inputs["labels"] = inputs["input_ids"].clone()
return {k: v.squeeze(0) for k, v in inputs.items()}
tokenized_dataset = dataset["train"].map(tokenize_data, batched=True, remove_columns=dataset["train"].column_names)
# Step 3: Training setup
training_args = TrainingArguments(
output_dir="./fine_tuned_llama",
per_device_train_batch_size=4, # Increased for better efficiency
gradient_accumulation_steps=8, # To handle larger effective batch size
evaluation_strategy="no",
save_strategy="epoch",
save_total_limit=2,
num_train_epochs=3,
learning_rate=2e-5,
weight_decay=0.01,
logging_dir="./logs",
logging_steps=10,
bf16=True, # Use bfloat16 for A100 GPUs, falls back to float16 on CPU
gradient_checkpointing=True, # Memory optimization
optim="adamw_torch",
warmup_steps=100,
)
# Custom data collator for Llama (corrected)
def custom_data_collator(features):
batch = {
"input_ids": torch.stack([f["input_ids"] for f in features]),
"attention_mask": torch.stack([f["attention_mask"] for f in features]),
"labels": torch.stack([f["labels"] for f in features]),
}
return batch
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
data_collator=custom_data_collator,
)
# Step 4: Start training
trainer.train()
# Step 5: Save the model
model.save_pretrained("./fine_tuned_llama")
tokenizer.save_pretrained("./fine_tuned_llama")
return "Training completed successfully! Model saved to ./fine_tuned_llama"
except Exception as e:
return f"Error: {str(e)}"
# Gradio UI
with gr.Blocks(title="Model Fine-Tuning Interface") as demo:
gr.Markdown("# Llama Fraud Detection Fine-Tuning UI")
gr.Markdown("Upload a JSON file with 'input' and 'output' pairs to fine-tune the Llama model on your fraud dataset.")
file_input = gr.File(label="Upload Fraud Dataset (JSON)")
train_button = gr.Button("Start Fine-Tuning")
output = gr.Textbox(label="Training Status")
train_button.click(fn=train_ui_tars, inputs=file_input, outputs=output)
demo.launch() |