Spaces:

Cylanoid
/

llama_4_Medical_Fraud_Detection

Paused

App Files Files Community

Cylanoid commited on Apr 22

Commit

b2c8265

1 Parent(s): f5ef606

Use device_map='auto' + offload_folder to avoid OOM

Browse files

Files changed (1) hide show

app.py +48 -32

app.py CHANGED Viewed

@@ -1,21 +1,25 @@
 # app.py
-import gradio as gr
-from transformers import AutoTokenizer, Llama4ForConditionalGeneration, BitsAndBytesConfig
-import datasets
-import torch
 import os
 import pdfplumber
 import nltk
 from nltk.tokenize import sent_tokenize
-from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
 from accelerate import Accelerator
 import huggingface_hub
 from document_analyzer import HealthcareFraudAnalyzer
-print("Running updated app.py with CPU offloading (version: 2025-04-21 v3)")
-# — Ensure NLTK punkt is available
 try:
     nltk.data.find('tokenizers/punkt')
 except LookupError:
@@ -24,44 +28,46 @@ except LookupError:
 # — Authenticate with Hugging Face
 LLAMA = os.getenv("LLama")
 if not LLAMA:
-    raise ValueError("LLama token not found. Set it in environment as 'LLama'.")
 huggingface_hub.login(token=LLAMA)
-# — Tokenizer
 MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 if tokenizer.pad_token is None:
     tokenizer.add_special_tokens({'pad_token': '[PAD]'})
-# — Quantization + CPU off‑load config
 quant_config = BitsAndBytesConfig(
     load_in_8bit=True,
     llm_int8_enable_fp32_cpu_offload=True
 )
-print("Loading model with: quantization_config=", quant_config, ", device_map=auto")
 model = Llama4ForConditionalGeneration.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
-    device_map="auto",
     quantization_config=quant_config,
-    offload_folder="./offload"
 )
-# — Resize embeddings if pad was added
 model.resize_token_embeddings(len(tokenizer))
-# — Accelerator prep
 accelerator = Accelerator()
 model = accelerator.prepare(model)
-# — Analyzer instance
 analyzer = HealthcareFraudAnalyzer(model, tokenizer, accelerator)
 # — Fine-tune function
 def fine_tune_model(training_data_file, epochs=1, batch_size=2):
     try:
-        dataset = datasets.load_dataset('json', data_files=training_data_file)['train']
         lora_cfg = LoraConfig(
             r=16,
             lora_alpha=32,
@@ -70,9 +76,12 @@ def fine_tune_model(training_data_file, epochs=1, batch_size=2):
             bias="none",
             task_type="CAUSAL_LM"
         )
         local_model = prepare_model_for_kbit_training(model)
         local_model = get_peft_model(local_model, lora_cfg)
         args = {
             "output_dir": "./results",
             "num_train_epochs": int(epochs),
@@ -87,48 +96,55 @@ def fine_tune_model(training_data_file, epochs=1, batch_size=2):
             "warmup_ratio": 0.03,
             "lr_scheduler_type": "cosine"
         }
         trainer = accelerator.prepare(
             datasets.Trainer(
                 model=local_model,
                 args=datasets.TrainingArguments(**args),
-                train_dataset=dataset
             )
         )
         trainer.train()
         local_model.save_pretrained("./fine_tuned_model")
-        return f"Training completed with {len(dataset)} examples!"
     except Exception as e:
         return f"Training failed: {e}"
 # — PDF analysis function
 def analyze_document(pdf_file):
     try:
-        with pdfplumber.open(pdf_file) as pdf:
-            text = "".join(page.extract_text() or "" for page in pdf.pages)
         sentences = sent_tokenize(text)
-        fraud_indicators = analyzer.analyze_document(sentences)
-        if not fraud_indicators:
             return "No fraud indicators detected."
-        report = "Potential Fraud Indicators Detected:\n"
-        for ind in fraud_indicators:
             report += (
-                f"- {ind['sentence']}\n"
-                f"  Reason: {ind['reason']}\n"
-                f"  Confidence: {ind['confidence']:.2f}\n"
             )
-        return report
     except Exception as e:
         return f"Analysis failed: {e}"
-# — Gradio UI
 with gr.Blocks(theme=gr.themes.Default()) as demo:
     gr.Markdown("# Llama 4 Healthcare Fraud Detection")
-    with gr.Tab("Fine‑Tune Model"):
         training_data = gr.File(label="Upload Training JSON File")
         epochs = gr.Slider(1, 10, value=1, step=1, label="Epochs")
         batch_size = gr.Slider(1, 4, value=2, step=1, label="Batch Size")
-        train_button = gr.Button("Fine‑Tune")
         train_output = gr.Textbox(label="Training Output")
         train_button.click(
             fn=fine_tune_model,

 # app.py
 import os
+import json
+import re
+import gradio as gr
 import pdfplumber
 import nltk
 from nltk.tokenize import sent_tokenize
+from transformers import AutoTokenizer, Llama4ForConditionalGeneration, BitsAndBytesConfig
+import datasets
+import torch
 from accelerate import Accelerator
+from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
 import huggingface_hub
 from document_analyzer import HealthcareFraudAnalyzer
+print("Running updated app.py with CPU offloading (version: 2025-04-22 v1)")
+# — Ensure NLTK punkt tokenizer is available
 try:
     nltk.data.find('tokenizers/punkt')
 except LookupError:
 # — Authenticate with Hugging Face
 LLAMA = os.getenv("LLama")
 if not LLAMA:
+    raise ValueError("LLama token not found. Please set it as 'LLama' in your environment.")
 huggingface_hub.login(token=LLAMA)
+# — Model and tokenizer setup
 MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 if tokenizer.pad_token is None:
     tokenizer.add_special_tokens({'pad_token': '[PAD]'})
+# — BitsAndBytes quantization + CPU off‑load config
 quant_config = BitsAndBytesConfig(
     load_in_8bit=True,
     llm_int8_enable_fp32_cpu_offload=True
 )
+print("Loading model with 8-bit quantization, CPU offload, and automatic device mapping")
 model = Llama4ForConditionalGeneration.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
+    device_map="auto",        # let Accelerate decide which layers go to GPU vs. CPU
     quantization_config=quant_config,
+    offload_folder="./offload"  # spill CPU‑offloaded weights here
 )
+# — Resize embeddings if we added a pad token
 model.resize_token_embeddings(len(tokenizer))
+# — Prepare with Accelerate
 accelerator = Accelerator()
 model = accelerator.prepare(model)
+# — Initialize the fraud analyzer
 analyzer = HealthcareFraudAnalyzer(model, tokenizer, accelerator)
 # — Fine-tune function
 def fine_tune_model(training_data_file, epochs=1, batch_size=2):
     try:
+        ds = datasets.load_dataset('json', data_files=training_data_file)['train']
+        # LoRA configuration
         lora_cfg = LoraConfig(
             r=16,
             lora_alpha=32,
             bias="none",
             task_type="CAUSAL_LM"
         )
+        # Prepare for k-bit training
         local_model = prepare_model_for_kbit_training(model)
         local_model = get_peft_model(local_model, lora_cfg)
+        # Training arguments
         args = {
             "output_dir": "./results",
             "num_train_epochs": int(epochs),
             "warmup_ratio": 0.03,
             "lr_scheduler_type": "cosine"
         }
         trainer = accelerator.prepare(
             datasets.Trainer(
                 model=local_model,
                 args=datasets.TrainingArguments(**args),
+                train_dataset=ds
             )
         )
         trainer.train()
         local_model.save_pretrained("./fine_tuned_model")
+        return f"Training completed on {len(ds)} examples."
     except Exception as e:
         return f"Training failed: {e}"
 # — PDF analysis function
 def analyze_document(pdf_file):
     try:
+        text = ""
+        with pdfplumber.open(pdf_file.name) as pdf:
+            for page in pdf.pages:
+                text += page.extract_text() or ""
         sentences = sent_tokenize(text)
+        results = analyzer.analyze_document(sentences)
+        if not results:
             return "No fraud indicators detected."
+        report = "Potential Fraud Indicators Detected:\n\n"
+        for item in results:
             report += (
+                f"- Sentence: {item['sentence']}\n"
+                f"  Reason: {item['reason']}\n"
+                f"  Confidence: {item['confidence']:.2f}\n\n"
             )
+        return report.strip()
     except Exception as e:
         return f"Analysis failed: {e}"
+# — Gradio Interface
 with gr.Blocks(theme=gr.themes.Default()) as demo:
     gr.Markdown("# Llama 4 Healthcare Fraud Detection")
+    with gr.Tab("Fine-Tune Model"):
         training_data = gr.File(label="Upload Training JSON File")
         epochs = gr.Slider(1, 10, value=1, step=1, label="Epochs")
         batch_size = gr.Slider(1, 4, value=2, step=1, label="Batch Size")
+        train_button = gr.Button("Fine-Tune")
         train_output = gr.Textbox(label="Training Output")
         train_button.click(
             fn=fine_tune_model,