Spaces:

Cylanoid
/

Nursing-Home-Fraud-Detection-using-Llama

Paused

Nursing-Home-Fraud-Detection-using-Llama

File size: 4,928 Bytes

# app.py (corrected version)

# Handle missing dependencies first
try:
    import gradio as gr
    from transformers import LlamaForCausalLM, LlamaTokenizer, Trainer, TrainingArguments
    import datasets
    import torch
    import json
    import os
    from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
    from accelerate import Accelerator
    import bitsandbytes
except ImportError as e:
    missing_package = str(e).split("'")[-2]  # Extract the missing package name
    if "accelerate" in missing_package:
        os.system(f'pip install "accelerate>=0.26.0"')
    else:
        os.system(f'pip install "{missing_package}"')
    # Re-import after installation
    import gradio as gr
    from transformers import LlamaForCausalLM, LlamaTokenizer, Trainer, TrainingArguments
    import datasets
    import torch
    import json
    import os
    from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
    from accelerate import Accelerator
    import bitsandbytes

# Model setup
MODEL_ID = "meta-llama/Llama-2-7b-hf"  # Use Llama-2-7b; switch to "meta-llama/Llama-3-8b-hf" for Llama 3
tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID)

# Add padding token if it doesn't exist (required for Llama models)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Check if CUDA is available to enable Flash Attention 2
use_flash_attention = torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8  # Ampere or newer (e.g., A100)

# Load the model with optimizations for Llama
model = LlamaForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,  # Better for A100 GPUs, falls back to float16 on CPU
    device_map="auto",
    use_flash_attention_2=use_flash_attention,  # Only enable if GPU supports it
    load_in_8bit=True  # Quantization for memory efficiency
)

# Prepare the model for training with LoRA (more memory-efficient)
model = prepare_model_for_kbit_training(model)

# LoRA configuration
peft_config = LoraConfig(
    r=16,               # Rank
    lora_alpha=32,      # Alpha
    lora_dropout=0.05,  # Dropout
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]  # Attention modules for Llama
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()  # Print percentage of trainable parameters

# Function to process uploaded JSON and train
def train_ui_tars(file):
    try:
        # Step 1: Load and preprocess the uploaded JSON file
        with open(file.name, "r", encoding="utf-8") as f:
            raw_data = json.load(f)
       
        # Extract training pairs or use flat structure
        training_data = raw_data.get("training_pairs", raw_data)
       
        # Save fixed JSON to avoid issues
        fixed_json_path = "fixed_fraud_data.json"
        with open(fixed_json_path, "w", encoding="utf-8") as f:
            json.dump(training_data, f, indent=4)
       
        # Load dataset
        dataset = datasets.load_dataset("json", data_files=fixed_json_path)
       
        # Step 2: Tokenize dataset with Llama-compatible context length
        def tokenize_data(example):
            # Format input for Llama (instruction-following style)
            formatted_text = f"<s>[INST] {example['input']} [/INST] {example['output']}</s>"
            inputs = tokenizer(
                formatted_text,
                padding="max_length",
                truncation=True,
                max_length=2048,  # Llama 2 context length; adjust to 8192 for Llama 3 if needed
                return_tensors="pt"
            )
            inputs["labels"] = inputs["input_ids"].clone()
            return {k: v.squeeze(0) for k, v in inputs.items()}
       
        tokenized_dataset = dataset["train"].map(tokenize_data, batched=True, remove_columns=dataset["train"].column_names)
       
        # Step 3: Training setup
        training_args = TrainingArguments(
            output_dir="./fine_tuned_llama",
            per_device_train_batch_size=4,  # Increased for better efficiency
            gradient_accumulation_steps=8,  # To handle larger effective batch size
            evaluation_strategy="no",
            save_strategy="epoch",
            save_total_limit=2,
            num_train_epochs=3,
            learning_rate=2e-5,
            weight_decay=0.01,
            logging_dir="./logs",
            logging_steps=10,
            bf16=True,  # Use bfloat16 for A100 GPUs, falls back to float16 on CPU
            gradient_checkpointing=True,  # Memory optimization
            optim="adamw_torch",
            warmup_steps=100,
        )
       
        # Custom data collator for Llama
        def custom_data_collator(features):
            batch = {
                "input_ids": torch.stack([f["input_ids"] for f in features]),
                "attention_mask": torch.stack([f["