Spaces:

Cylanoid
/

Nursing-Home-Fraud-Detection-using-Llama

Paused

App Files Files Community

Nursing-Home-Fraud-Detection-using-Llama / app.py

Cylanoid

Update app.py

ba6c89d verified 3 months ago

raw

history blame

6.28 kB

	# app.py (corrected version)

	# Handle missing dependencies first
	try:
	import gradio as gr
	from transformers import LlamaForCausalLM, LlamaTokenizer, Trainer, TrainingArguments
	import datasets
	import torch
	import json
	import os
	from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
	from accelerate import Accelerator
	import bitsandbytes
	import sentencepiece # Added for Llama tokenizer
	except ImportError as e:
	missing_package = str(e).split("'")[-2] # Extract the missing package name
	if "accelerate" in missing_package:
	os.system(f'pip install "accelerate>=0.26.0"')
	elif "sentencepiece" in missing_package:
	os.system(f'pip install "sentencepiece"')
	else:
	os.system(f'pip install "{missing_package}"')
	# Re-import after installation
	import gradio as gr
	from transformers import LlamaForCausalLM, LlamaTokenizer, Trainer, TrainingArguments
	import datasets
	import torch
	import json
	import os
	from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
	from accelerate import Accelerator
	import bitsandbytes
	import sentencepiece

	# Model setup
	MODEL_ID = "meta-llama/Llama-2-7b-hf" # Use Llama-2-7b; switch to "meta-llama/Llama-3-8b-hf" for Llama 3
	tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID)

	# Add padding token if it doesn't exist (required for Llama models)
	if tokenizer.pad_token is None:
	tokenizer.add_special_tokens({'pad_token': '[PAD]'})

	# Check if CUDA is available to enable Flash Attention 2
	use_flash_attention = torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 # Ampere or newer (e.g., A100)

	# Load the model with optimizations for Llama
	model = LlamaForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.bfloat16, # Better for A100 GPUs, falls back to float16 on CPU
	device_map="auto",
	use_flash_attention_2=use_flash_attention, # Only enable if GPU supports it
	load_in_8bit=True # Quantization for memory efficiency
	)

	# Prepare the model for training with LoRA (more memory-efficient)
	model = prepare_model_for_kbit_training(model)

	# LoRA configuration
	peft_config = LoraConfig(
	r=16, # Rank
	lora_alpha=32, # Alpha
	lora_dropout=0.05, # Dropout
	bias="none",
	task_type="CAUSAL_LM",
	target_modules=["q_proj", "k_proj", "v_proj", "o_proj"] # Attention modules for Llama
	)

	model = get_peft_model(model, peft_config)
	model.print_trainable_parameters() # Print percentage of trainable parameters

	# Function to process uploaded JSON and train
	def train_ui_tars(file):
	try:
	# Step 1: Load and preprocess the uploaded JSON file
	with open(file.name, "r", encoding="utf-8") as f:
	raw_data = json.load(f)

	# Extract training pairs or use flat structure
	training_data = raw_data.get("training_pairs", raw_data)

	# Save fixed JSON to avoid issues
	fixed_json_path = "fixed_fraud_data.json"
	with open(fixed_json_path, "w", encoding="utf-8") as f:
	json.dump(training_data, f, indent=4)

	# Load dataset
	dataset = datasets.load_dataset("json", data_files=fixed_json_path)

	# Step 2: Tokenize dataset with Llama-compatible context length
	def tokenize_data(example):
	# Format input for Llama (instruction-following style)
	formatted_text = f"<s>[INST] {example['input']} [/INST] {example['output']}</s>"
	inputs = tokenizer(
	formatted_text,
	padding="max_length",
	truncation=True,
	max_length=2048, # Llama 2 context length; adjust to 8192 for Llama 3 if needed
	return_tensors="pt"
	)
	inputs["labels"] = inputs["input_ids"].clone()
	return {k: v.squeeze(0) for k, v in inputs.items()}

	tokenized_dataset = dataset["train"].map(tokenize_data, batched=True, remove_columns=dataset["train"].column_names)

	# Step 3: Training setup
	training_args = TrainingArguments(
	output_dir="./fine_tuned_llama",
	per_device_train_batch_size=4, # Increased for better efficiency
	gradient_accumulation_steps=8, # To handle larger effective batch size
	evaluation_strategy="no",
	save_strategy="epoch",
	save_total_limit=2,
	num_train_epochs=3,
	learning_rate=2e-5,
	weight_decay=0.01,
	logging_dir="./logs",
	logging_steps=10,
	bf16=True, # Use bfloat16 for A100 GPUs, falls back to float16 on CPU
	gradient_checkpointing=True, # Memory optimization
	optim="adamw_torch",
	warmup_steps=100,
	)

	# Custom data collator for Llama
	def custom_data_collator(features):
	batch = {
	"input_ids": torch.stack([f["input_ids"] for f in features]),
	"attention_mask": torch.stack([f["attention_mask"] for f in features]),
	"labels": torch.stack([f["labels"] for f in features]),
	}
	return batch

	trainer = Trainer(
	model=model,
	args=training_args,
	train_dataset=tokenized_dataset,
	data_collator=custom_data_collator,
	)

	# Step 4: Start training
	trainer.train()

	# Step 5: Save the model
	model.save_pretrained("./fine_tuned_llama")
	tokenizer.save_pretrained("./fine_tuned_llama")

	return "Training completed successfully! Model saved to ./fine_tuned_llama"

	except Exception as e:
	return f"Error: {str(e)}"

	# Gradio UI
	with gr.Blocks(title="Model Fine-Tuning Interface") as demo:
	gr.Markdown("# Llama Fraud Detection Fine-Tuning UI")
	gr.Markdown("Upload a JSON file with 'input' and 'output' pairs to fine-tune the Llama model on your fraud dataset.")

	file_input = gr.File(label="Upload Fraud Dataset (JSON)")
	train_button = gr.Button("Start Fine-Tuning")
	output = gr.Textbox(label="Training Status")

	train_button.click(fn=train_ui_tars, inputs=file_input, outputs=output)

	demo.launch()