|
|
|
|
|
|
|
import gradio as gr |
|
from transformers import AutoTokenizer, Llama4ForConditionalGeneration, BitsAndBytesConfig |
|
import datasets |
|
import torch |
|
import json |
|
import os |
|
import pdfplumber |
|
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training |
|
from accelerate import Accelerator |
|
import huggingface_hub |
|
import re |
|
import nltk |
|
from nltk.tokenize import sent_tokenize |
|
|
|
try: |
|
nltk.data.find('tokenizers/punkt') |
|
except LookupError: |
|
nltk.download('punkt') |
|
|
|
|
|
from document_analyzer import HealthcareFraudAnalyzer |
|
|
|
|
|
print("Running updated app.py with CPU offloading (version: 2025-04-21 v3)") |
|
|
|
|
|
print("Environment variables:", dict(os.environ)) |
|
|
|
|
|
LLama = os.getenv("LLama") |
|
if not LLama: |
|
raise ValueError("LLama token not found. Set it in Hugging Face Space secrets as 'LLama'.") |
|
|
|
|
|
print(f"Retrieved LLama token: {LLama[:5]}...") |
|
|
|
|
|
huggingface_hub.login(token=LLama) |
|
|
|
|
|
MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct" |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) |
|
|
|
if tokenizer.pad_token is None: |
|
tokenizer.add_special_tokens({'pad_token': '[PAD]'}) |
|
|
|
|
|
quant_config = BitsAndBytesConfig( |
|
load_in_8bit=True, |
|
llm_int8_enable_fp32_cpu_offload=True |
|
) |
|
|
|
|
|
device_map = { |
|
"model.embed_tokens": 0, |
|
"model.layers.0-10": 0, |
|
"model.layers.11-31": "cpu", |
|
"model.norm": 0, |
|
"lm_head": 0 |
|
} |
|
|
|
|
|
print("Loading model with: quantization_config=", quant_config, ", device_map=", device_map) |
|
|
|
|
|
try: |
|
model = Llama4ForConditionalGeneration.from_pretrained( |
|
MODEL_ID, |
|
torch_dtype=torch.bfloat16, |
|
device_map=device_map, |
|
quantization_config=quant_config, |
|
attn_implementation="flex_attention" |
|
) |
|
except Exception as e: |
|
print(f"Model loading failed: {str(e)}") |
|
raise |
|
|
|
|
|
model.resize_token_embeddings(len(tokenizer)) |
|
|
|
|
|
accelerator = Accelerator() |
|
model = accelerator.prepare(model) |
|
|
|
|
|
analyzer = HealthcareFraudAnalyzer(model, tokenizer, accelerator) |
|
|
|
|
|
def fine_tune_model(training_data_file, epochs=1, batch_size=2): |
|
try: |
|
dataset = datasets.load_dataset('json', data_files=training_data_file) |
|
dataset = dataset['train'] |
|
|
|
lora_config = LoraConfig( |
|
r=16, |
|
lora_alpha=32, |
|
target_modules=["q_proj", "v_proj"], |
|
lora_dropout=0.05, |
|
bias="none", |
|
task_type="CAUSAL_LM" |
|
) |
|
|
|
model = prepare_model_for_kbit_training(model) |
|
model = get_peft_model(model, lora_config) |
|
|
|
training_args = { |
|
"output_dir": "./results", |
|
"num_train_epochs": int(epochs), |
|
"per_device_train_batch_size": int(batch_size), |
|
"gradient_accumulation_steps": 8, |
|
"optim": "adamw_torch", |
|
"save_steps": 500, |
|
"logging_steps": 100, |
|
"learning_rate": 2e-4, |
|
"fp16": True, |
|
"max_grad_norm": 0.3, |
|
"warmup_ratio": 0.03, |
|
"lr_scheduler_type": "cosine" |
|
} |
|
|
|
trainer = accelerator.prepare( |
|
datasets.Trainer( |
|
model=model, |
|
args=datasets.TrainingArguments(**training_args), |
|
train_dataset=dataset, |
|
) |
|
) |
|
|
|
trainer.train() |
|
model.save_pretrained("./fine_tuned_model") |
|
return f"Training completed with {len(dataset)} examples!" |
|
except Exception as e: |
|
return f"Training failed: {str(e)}" |
|
|
|
|
|
def analyze_document(pdf_file): |
|
try: |
|
with pdfplumber.open(pdf_file) as pdf: |
|
text = "" |
|
for page in pdf.pages: |
|
text += page.extract_text() or "" |
|
|
|
sentences = sent_tokenize(text) |
|
fraud_indicators = analyzer.analyze_document(sentences) |
|
|
|
if not fraud_indicators: |
|
return "No fraud indicators detected." |
|
|
|
report = "Potential Fraud Indicators Detected:\n" |
|
for indicator in fraud_indicators: |
|
report += f"- {indicator['sentence']}\n Reason: {indicator['reason']}\n Confidence: {indicator['confidence']:.2f}\n" |
|
return report |
|
except Exception as e: |
|
return f"Analysis failed: {str(e)}" |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Default()) as demo: |
|
gr.Markdown("# Llama 4 Healthcare Fraud Detection") |
|
|
|
with gr.Tab("Fine-Tune Model"): |
|
training_data = gr.File(label="Upload Training JSON File") |
|
epochs = gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Epochs") |
|
batch_size = gr.Slider(minimum=1, maximum=4, value=2, step=1, label="Batch Size") |
|
train_button = gr.Button("Fine-Tune") |
|
train_output = gr.Textbox(label="Training Output") |
|
train_button.click( |
|
fn=fine_tune_model, |
|
inputs=[training_data, epochs, batch_size], |
|
outputs=train_output |
|
) |
|
|
|
with gr.Tab("Analyze Document"): |
|
pdf_input = gr.File(label="Upload PDF Document") |
|
analyze_button = gr.Button("Analyze") |
|
analysis_output = gr.Textbox(label="Analysis Results") |
|
analyze_button.click( |
|
fn=analyze_document, |
|
inputs=pdf_input, |
|
outputs=analysis_output |
|
) |
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |