# app.py (corrected version) # Handle missing dependencies first try: import gradio as gr from transformers import LlamaForCausalLM, LlamaTokenizer, Trainer, TrainingArguments import datasets import torch import json import os from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training from accelerate import Accelerator import bitsandbytes import sentencepiece # Added for Llama tokenizer except ImportError as e: missing_package = str(e).split("'")[-2] # Extract the missing package name if "accelerate" in missing_package: os.system(f'pip install "accelerate>=0.26.0"') elif "sentencepiece" in missing_package: os.system(f'pip install "sentencepiece"') else: os.system(f'pip install "{missing_package}"') # Re-import after installation import gradio as gr from transformers import LlamaForCausalLM, LlamaTokenizer, Trainer, TrainingArguments import datasets import torch import json import os from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training from accelerate import Accelerator import bitsandbytes import sentencepiece # Model setup MODEL_ID = "meta-llama/Llama-2-7b-hf" # Use Llama-2-7b; switch to "meta-llama/Llama-3-8b-hf" for Llama 3 tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID) # Add padding token if it doesn't exist (required for Llama models) if tokenizer.pad_token is None: tokenizer.add_special_tokens({'pad_token': '[PAD]'}) # Check if CUDA is available to enable Flash Attention 2 use_flash_attention = torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 # Ampere or newer (e.g., A100) # Load the model with optimizations for Llama model = LlamaForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, # Better for A100 GPUs, falls back to float16 on CPU device_map="auto", use_flash_attention_2=use_flash_attention, # Only enable if GPU supports it load_in_8bit=True # Quantization for memory efficiency ) # Prepare the model for training with LoRA (more memory-efficient) model = prepare_model_for_kbit_training(model) # LoRA configuration peft_config = LoraConfig( r=16, # Rank lora_alpha=32, # Alpha lora_dropout=0.05, # Dropout bias="none", task_type="CAUSAL_LM", target_modules=["q_proj", "k_proj", "v_proj", "o_proj"] # Attention modules for Llama ) model = get_peft_model(model, peft_config) model.print_trainable_parameters() # Print percentage of trainable parameters # Function to process uploaded JSON and train def train_ui_tars(file): try: # Step 1: Load and preprocess the uploaded JSON file with open(file.name, "r", encoding="utf-8") as f: raw_data = json.load(f) # Extract training pairs or use flat structure training_data = raw_data.get("training_pairs", raw_data) # Save fixed JSON to avoid issues fixed_json_path = "fixed_fraud_data.json" with open(fixed_json_path, "w", encoding="utf-8") as f: json.dump(training_data, f, indent=4) # Load dataset dataset = datasets.load_dataset("json", data_files=fixed_json_path) # Step 2: Tokenize dataset with Llama-compatible context length def tokenize_data(example): # Format input for Llama (instruction-following style) formatted_text = f"[INST] {example['input']} [/INST] {example['output']}" inputs = tokenizer( formatted_text, padding="max_length", truncation=True, max_length=2048, # Llama 2 context length; adjust to 8192 for Llama 3 if needed return_tensors="pt" ) inputs["labels"] = inputs["input_ids"].clone() return {k: v.squeeze(0) for k, v in inputs.items()} tokenized_dataset = dataset["train"].map(tokenize_data, batched=True, remove_columns=dataset["train"].column_names) # Step 3: Training setup training_args = TrainingArguments( output_dir="./fine_tuned_llama", per_device_train_batch_size=4, # Increased for better efficiency gradient_accumulation_steps=8, # To handle larger effective batch size evaluation_strategy="no", save_strategy="epoch", save_total_limit=2, num_train_epochs=3, learning_rate=2e-5, weight_decay=0.01, logging_dir="./logs", logging_steps=10, bf16=True, # Use bfloat16 for A100 GPUs, falls back to float16 on CPU gradient_checkpointing=True, # Memory optimization optim="adamw_torch", warmup_steps=100, ) # Custom data collator for Llama def custom_data_collator(features): batch = { "input_ids": torch.stack([f["input_ids"] for f in features]), "attention_mask": torch.stack([f["attention_mask"] for f in features]), "labels": torch.stack([f["labels"] for f in features]), } return batch trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset, data_collator=custom_data_collator, ) # Step 4: Start training trainer.train() # Step 5: Save the model model.save_pretrained("./fine_tuned_llama") tokenizer.save_pretrained("./fine_tuned_llama") return "Training completed successfully! Model saved to ./fine_tuned_llama" except Exception as e: return f"Error: {str(e)}" # Gradio UI with gr.Blocks(title="Model Fine-Tuning Interface") as demo: gr.Markdown("# Llama Fraud Detection Fine-Tuning UI") gr.Markdown("Upload a JSON file with 'input' and 'output' pairs to fine-tune the Llama model on your fraud dataset.") file_input = gr.File(label="Upload Fraud Dataset (JSON)") train_button = gr.Button("Start Fine-Tuning") output = gr.Textbox(label="Training Status") train_button.click(fn=train_ui_tars, inputs=file_input, outputs=output) demo.launch()