|
|
|
|
|
import os |
|
import gradio as gr |
|
import pdfplumber |
|
import nltk |
|
from nltk.tokenize import sent_tokenize |
|
from transformers import AutoTokenizer, Llama4ForConditionalGeneration, BitsAndBytesConfig |
|
import datasets |
|
import torch |
|
from accelerate import Accelerator |
|
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training |
|
import huggingface_hub |
|
|
|
from document_analyzer import HealthcareFraudAnalyzer |
|
|
|
print("Running updated app.py with restricted GPU usage (version: 2025-04-22 v2)") |
|
|
|
|
|
try: |
|
nltk.data.find('tokenizers/punkt') |
|
except LookupError: |
|
nltk.download('punkt') |
|
|
|
|
|
LLAMA = os.getenv("LLama") |
|
if not LLAMA: |
|
raise ValueError("LLama token not found. Please set it as 'LLama' in your environment.") |
|
huggingface_hub.login(token=LLAMA) |
|
|
|
|
|
MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct" |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) |
|
if tokenizer.pad_token is None: |
|
tokenizer.add_special_tokens({'pad_token': '[PAD]'}) |
|
|
|
|
|
quant_config = BitsAndBytesConfig( |
|
load_in_8bit=True, |
|
llm_int8_enable_fp32_cpu_offload=True |
|
) |
|
|
|
print("Loading model with 8-bit quantization, CPU offload, auto device mapping + max_memory cap") |
|
model = Llama4ForConditionalGeneration.from_pretrained( |
|
MODEL_ID, |
|
torch_dtype=torch.bfloat16, |
|
device_map="auto", |
|
max_memory={ |
|
0: "11GiB", |
|
"cpu": "200GiB" |
|
}, |
|
quantization_config=quant_config, |
|
offload_folder="./offload" |
|
) |
|
|
|
|
|
model.resize_token_embeddings(len(tokenizer)) |
|
|
|
|
|
accelerator = Accelerator() |
|
model = accelerator.prepare(model) |
|
|
|
|
|
analyzer = HealthcareFraudAnalyzer(model, tokenizer, accelerator) |
|
|
|
|
|
def fine_tune_model(training_data_file, epochs=1, batch_size=2): |
|
try: |
|
ds = datasets.load_dataset('json', data_files=training_data_file)['train'] |
|
|
|
|
|
lora_cfg = LoraConfig( |
|
r=16, |
|
lora_alpha=32, |
|
target_modules=["q_proj", "v_proj"], |
|
lora_dropout=0.05, |
|
bias="none", |
|
task_type="CAUSAL_LM" |
|
) |
|
|
|
|
|
local_model = prepare_model_for_kbit_training(model) |
|
local_model = get_peft_model(local_model, lora_cfg) |
|
|
|
|
|
args = { |
|
"output_dir": "./results", |
|
"num_train_epochs": int(epochs), |
|
"per_device_train_batch_size": int(batch_size), |
|
"gradient_accumulation_steps": 8, |
|
"optim": "adamw_torch", |
|
"save_steps": 500, |
|
"logging_steps": 100, |
|
"learning_rate": 2e-4, |
|
"fp16": True, |
|
"max_grad_norm": 0.3, |
|
"warmup_ratio": 0.03, |
|
"lr_scheduler_type": "cosine" |
|
} |
|
|
|
trainer = accelerator.prepare( |
|
datasets.Trainer( |
|
model=local_model, |
|
args=datasets.TrainingArguments(**args), |
|
train_dataset=ds |
|
) |
|
) |
|
|
|
trainer.train() |
|
local_model.save_pretrained("./fine_tuned_model") |
|
return f"Training completed on {len(ds)} examples." |
|
except Exception as e: |
|
return f"Training failed: {e}" |
|
|
|
|
|
def analyze_document(pdf_file): |
|
try: |
|
text = "" |
|
with pdfplumber.open(pdf_file.name) as pdf: |
|
for page in pdf.pages: |
|
text += page.extract_text() or "" |
|
|
|
sentences = sent_tokenize(text) |
|
results = analyzer.analyze_document(sentences) |
|
|
|
if not results: |
|
return "No fraud indicators detected." |
|
|
|
report = "Potential Fraud Indicators Detected:\n\n" |
|
for item in results: |
|
report += ( |
|
f"- Sentence: {item['sentence']}\n" |
|
f" Reason: {item['reason']}\n" |
|
f" Confidence: {item['confidence']:.2f}\n\n" |
|
) |
|
return report.strip() |
|
except Exception as e: |
|
return f"Analysis failed: {e}" |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Default()) as demo: |
|
gr.Markdown("# Llama 4 Healthcare Fraud Detection") |
|
|
|
with gr.Tab("Fine-Tune Model"): |
|
training_data = gr.File(label="Upload Training JSON File") |
|
epochs = gr.Slider(1, 10, value=1, step=1, label="Epochs") |
|
batch_size = gr.Slider(1, 4, value=2, step=1, label="Batch Size") |
|
train_button = gr.Button("Fine-Tune") |
|
train_output = gr.Textbox(label="Training Output") |
|
train_button.click( |
|
fn=fine_tune_model, |
|
inputs=[training_data, epochs, batch_size], |
|
outputs=train_output |
|
) |
|
|
|
with gr.Tab("Analyze Document"): |
|
pdf_input = gr.File(label="Upload PDF Document") |
|
analyze_button = gr.Button("Analyze") |
|
analysis_output = gr.Textbox(label="Analysis Results") |
|
analyze_button.click( |
|
fn=analyze_document, |
|
inputs=pdf_input, |
|
outputs=analysis_output |
|
) |
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|