# document_analyzer.py # Analyzer for healthcare fraud detection using Llama 4 Maverick (text-only) import torch import nltk from nltk.tokenize import sent_tokenize class HealthcareFraudAnalyzer: def __init__(self, model, tokenizer, accelerator): self.model = model self.tokenizer = tokenizer self.accelerator = accelerator self.device = self.accelerator.device try: nltk.data.find('tokenizers/punkt') except LookupError: nltk.download('punkt') def analyze_document(self, sentences): fraud_indicators = [] for sentence in sentences: prompt = ( f"Analyze the following sentence for potential healthcare fraud indicators, " f"such as consent violations, medication misuse, or billing irregularities. " f"Provide a reason and confidence score (0-1). " f"Sentence: {sentence}\nOutput format: {{'fraud_detected': bool, 'reason': str, 'confidence': float}}" ) inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to(self.device) with torch.no_grad(): outputs = self.model.generate( **inputs, max_new_tokens=256, temperature=0.7, top_p=0.9, do_sample=True ) response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) try: result = eval(response) if response.startswith("{") else {"fraud_detected": False, "reason": "Invalid response", "confidence": 0.0} if result["fraud_detected"]: fraud_indicators.append({ "sentence": sentence, "reason": result["reason"], "confidence": result["confidence"] }) except: continue return fraud_indicators