Spaces:

Cylanoid
/

llama_4_Medical_Fraud_Detection

Paused

File size: 5,346 Bytes

6f2b1d7
2cf39df
 
b2c8265
2cf39df
a18ecb8
 
b2c8265
 
 
2cf39df
b2c8265
2cf39df
b2c8265
a18ecb8
 
e73698a
2cf39df
b2c8265
2cf39df
 
 
 
 
a18ecb8
 
 
b2c8265
a18ecb8
2cf39df
b2c8265
2cf39df
50c6bec
 
 
 
b2c8265
fab7ed8
 
 
 
 
e73698a
a18ecb8
b538b3c
a18ecb8
e73698a
 
a7aeb40
e73698a
 
b538b3c
e73698a
b538b3c
2cf39df
b2c8265
36b5bed
 
b2c8265
36b5bed
 
 
b2c8265
36b5bed
 
a18ecb8
36b5bed
2cf39df
b2c8265
 
 
a18ecb8
36b5bed
 
 
 
 
 
2cf39df
b2c8265
 
a18ecb8
 
2cf39df
b2c8265
a18ecb8
36b5bed
 
 
 
 
 
 
 
 
 
 
 
 
b2c8265
36b5bed
 
a18ecb8
 
b2c8265
36b5bed
2cf39df
b2c8265
2cf39df
a18ecb8
b2c8265
2cf39df
a18ecb8
2cf39df
a18ecb8
36b5bed
2cf39df
b2c8265
 
 
 
 
36b5bed
b2c8265
 
 
36b5bed
b2c8265
 
 
a18ecb8
b2c8265
 
 
a18ecb8
b2c8265
2cf39df
a18ecb8
1bf4b77
b2c8265
36b5bed
a18ecb8
 
b2c8265
36b5bed
a18ecb8
 
b2c8265
36b5bed
 
 
 
 
 
a18ecb8
36b5bed
 
 
 
 
 
 
 
 
 
a18ecb8

# app.py

import os
import gradio as gr
import pdfplumber
import nltk
from nltk.tokenize import sent_tokenize
from transformers import AutoTokenizer, Llama4ForConditionalGeneration, BitsAndBytesConfig
import datasets
import torch
from accelerate import Accelerator
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import huggingface_hub

from document_analyzer import HealthcareFraudAnalyzer

print("Running updated app.py with restricted GPU usage (version: 2025-04-22 v2)")

# — Ensure NLTK punkt tokenizer is available
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

# — Authenticate with Hugging Face
LLAMA = os.getenv("LLama")
if not LLAMA:
    raise ValueError("LLama token not found. Please set it as 'LLama' in your environment.")
huggingface_hub.login(token=LLAMA)

# — Model and tokenizer setup
MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# — BitsAndBytes quantization + CPU off‑load config
quant_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True
)

print("Loading model with 8-bit quantization, CPU offload, auto device mapping + max_memory cap")
model = Llama4ForConditionalGeneration.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,
    device_map="auto",  
    max_memory={                               # cap GPU usage to ~11 GiB
        0: "11GiB",
        "cpu": "200GiB"
    },
    quantization_config=quant_config,
    offload_folder="./offload"
)

# — Resize embeddings if we added a pad token
model.resize_token_embeddings(len(tokenizer))

# — Prepare with Accelerate
accelerator = Accelerator()
model = accelerator.prepare(model)

# — Initialize the fraud analyzer
analyzer = HealthcareFraudAnalyzer(model, tokenizer, accelerator)

# — Fine-tune function
def fine_tune_model(training_data_file, epochs=1, batch_size=2):
    try:
        ds = datasets.load_dataset('json', data_files=training_data_file)['train']

        # LoRA configuration
        lora_cfg = LoraConfig(
            r=16,
            lora_alpha=32,
            target_modules=["q_proj", "v_proj"],
            lora_dropout=0.05,
            bias="none",
            task_type="CAUSAL_LM"
        )

        # Prepare for k-bit training
        local_model = prepare_model_for_kbit_training(model)
        local_model = get_peft_model(local_model, lora_cfg)

        # Training arguments
        args = {
            "output_dir": "./results",
            "num_train_epochs": int(epochs),
            "per_device_train_batch_size": int(batch_size),
            "gradient_accumulation_steps": 8,
            "optim": "adamw_torch",
            "save_steps": 500,
            "logging_steps": 100,
            "learning_rate": 2e-4,
            "fp16": True,
            "max_grad_norm": 0.3,
            "warmup_ratio": 0.03,
            "lr_scheduler_type": "cosine"
        }

        trainer = accelerator.prepare(
            datasets.Trainer(
                model=local_model,
                args=datasets.TrainingArguments(**args),
                train_dataset=ds
            )
        )

        trainer.train()
        local_model.save_pretrained("./fine_tuned_model")
        return f"Training completed on {len(ds)} examples."
    except Exception as e:
        return f"Training failed: {e}"

# — PDF analysis function
def analyze_document(pdf_file):
    try:
        text = ""
        with pdfplumber.open(pdf_file.name) as pdf:
            for page in pdf.pages:
                text += page.extract_text() or ""

        sentences = sent_tokenize(text)
        results = analyzer.analyze_document(sentences)

        if not results:
            return "No fraud indicators detected."

        report = "Potential Fraud Indicators Detected:\n\n"
        for item in results:
            report += (
                f"- Sentence: {item['sentence']}\n"
                f"  Reason: {item['reason']}\n"
                f"  Confidence: {item['confidence']:.2f}\n\n"
            )
        return report.strip()
    except Exception as e:
        return f"Analysis failed: {e}"

# — Gradio Interface
with gr.Blocks(theme=gr.themes.Default()) as demo:
    gr.Markdown("# Llama 4 Healthcare Fraud Detection")

    with gr.Tab("Fine-Tune Model"):
        training_data = gr.File(label="Upload Training JSON File")
        epochs = gr.Slider(1, 10, value=1, step=1, label="Epochs")
        batch_size = gr.Slider(1, 4, value=2, step=1, label="Batch Size")
        train_button = gr.Button("Fine-Tune")
        train_output = gr.Textbox(label="Training Output")
        train_button.click(
            fn=fine_tune_model,
            inputs=[training_data, epochs, batch_size],
            outputs=train_output
        )

    with gr.Tab("Analyze Document"):
        pdf_input = gr.File(label="Upload PDF Document")
        analyze_button = gr.Button("Analyze")
        analysis_output = gr.Textbox(label="Analysis Results")
        analyze_button.click(
            fn=analyze_document,
            inputs=pdf_input,
            outputs=analysis_output
        )

demo.launch(server_name="0.0.0.0", server_port=7860)