File size: 5,346 Bytes
6f2b1d7 2cf39df b2c8265 2cf39df a18ecb8 b2c8265 2cf39df b2c8265 2cf39df b2c8265 a18ecb8 e73698a 2cf39df b2c8265 2cf39df a18ecb8 b2c8265 a18ecb8 2cf39df b2c8265 2cf39df 50c6bec b2c8265 fab7ed8 e73698a a18ecb8 b538b3c a18ecb8 e73698a a7aeb40 e73698a b538b3c e73698a b538b3c 2cf39df b2c8265 36b5bed b2c8265 36b5bed b2c8265 36b5bed a18ecb8 36b5bed 2cf39df b2c8265 a18ecb8 36b5bed 2cf39df b2c8265 a18ecb8 2cf39df b2c8265 a18ecb8 36b5bed b2c8265 36b5bed a18ecb8 b2c8265 36b5bed 2cf39df b2c8265 2cf39df a18ecb8 b2c8265 2cf39df a18ecb8 2cf39df a18ecb8 36b5bed 2cf39df b2c8265 36b5bed b2c8265 36b5bed b2c8265 a18ecb8 b2c8265 a18ecb8 b2c8265 2cf39df a18ecb8 1bf4b77 b2c8265 36b5bed a18ecb8 b2c8265 36b5bed a18ecb8 b2c8265 36b5bed a18ecb8 36b5bed a18ecb8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
# app.py
import os
import gradio as gr
import pdfplumber
import nltk
from nltk.tokenize import sent_tokenize
from transformers import AutoTokenizer, Llama4ForConditionalGeneration, BitsAndBytesConfig
import datasets
import torch
from accelerate import Accelerator
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import huggingface_hub
from document_analyzer import HealthcareFraudAnalyzer
print("Running updated app.py with restricted GPU usage (version: 2025-04-22 v2)")
# — Ensure NLTK punkt tokenizer is available
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
# — Authenticate with Hugging Face
LLAMA = os.getenv("LLama")
if not LLAMA:
raise ValueError("LLama token not found. Please set it as 'LLama' in your environment.")
huggingface_hub.login(token=LLAMA)
# — Model and tokenizer setup
MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
# — BitsAndBytes quantization + CPU off‑load config
quant_config = BitsAndBytesConfig(
load_in_8bit=True,
llm_int8_enable_fp32_cpu_offload=True
)
print("Loading model with 8-bit quantization, CPU offload, auto device mapping + max_memory cap")
model = Llama4ForConditionalGeneration.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
device_map="auto",
max_memory={ # cap GPU usage to ~11 GiB
0: "11GiB",
"cpu": "200GiB"
},
quantization_config=quant_config,
offload_folder="./offload"
)
# — Resize embeddings if we added a pad token
model.resize_token_embeddings(len(tokenizer))
# — Prepare with Accelerate
accelerator = Accelerator()
model = accelerator.prepare(model)
# — Initialize the fraud analyzer
analyzer = HealthcareFraudAnalyzer(model, tokenizer, accelerator)
# — Fine-tune function
def fine_tune_model(training_data_file, epochs=1, batch_size=2):
try:
ds = datasets.load_dataset('json', data_files=training_data_file)['train']
# LoRA configuration
lora_cfg = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
# Prepare for k-bit training
local_model = prepare_model_for_kbit_training(model)
local_model = get_peft_model(local_model, lora_cfg)
# Training arguments
args = {
"output_dir": "./results",
"num_train_epochs": int(epochs),
"per_device_train_batch_size": int(batch_size),
"gradient_accumulation_steps": 8,
"optim": "adamw_torch",
"save_steps": 500,
"logging_steps": 100,
"learning_rate": 2e-4,
"fp16": True,
"max_grad_norm": 0.3,
"warmup_ratio": 0.03,
"lr_scheduler_type": "cosine"
}
trainer = accelerator.prepare(
datasets.Trainer(
model=local_model,
args=datasets.TrainingArguments(**args),
train_dataset=ds
)
)
trainer.train()
local_model.save_pretrained("./fine_tuned_model")
return f"Training completed on {len(ds)} examples."
except Exception as e:
return f"Training failed: {e}"
# — PDF analysis function
def analyze_document(pdf_file):
try:
text = ""
with pdfplumber.open(pdf_file.name) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
sentences = sent_tokenize(text)
results = analyzer.analyze_document(sentences)
if not results:
return "No fraud indicators detected."
report = "Potential Fraud Indicators Detected:\n\n"
for item in results:
report += (
f"- Sentence: {item['sentence']}\n"
f" Reason: {item['reason']}\n"
f" Confidence: {item['confidence']:.2f}\n\n"
)
return report.strip()
except Exception as e:
return f"Analysis failed: {e}"
# — Gradio Interface
with gr.Blocks(theme=gr.themes.Default()) as demo:
gr.Markdown("# Llama 4 Healthcare Fraud Detection")
with gr.Tab("Fine-Tune Model"):
training_data = gr.File(label="Upload Training JSON File")
epochs = gr.Slider(1, 10, value=1, step=1, label="Epochs")
batch_size = gr.Slider(1, 4, value=2, step=1, label="Batch Size")
train_button = gr.Button("Fine-Tune")
train_output = gr.Textbox(label="Training Output")
train_button.click(
fn=fine_tune_model,
inputs=[training_data, epochs, batch_size],
outputs=train_output
)
with gr.Tab("Analyze Document"):
pdf_input = gr.File(label="Upload PDF Document")
analyze_button = gr.Button("Analyze")
analysis_output = gr.Textbox(label="Analysis Results")
analyze_button.click(
fn=analyze_document,
inputs=pdf_input,
outputs=analysis_output
)
demo.launch(server_name="0.0.0.0", server_port=7860)
|