|
|
|
|
|
|
|
import torch |
|
import nltk |
|
from nltk.tokenize import sent_tokenize |
|
|
|
class HealthcareFraudAnalyzer: |
|
def __init__(self, model, tokenizer, accelerator): |
|
self.model = model |
|
self.tokenizer = tokenizer |
|
self.accelerator = accelerator |
|
self.device = self.accelerator.device |
|
try: |
|
nltk.data.find('tokenizers/punkt') |
|
except LookupError: |
|
nltk.download('punkt') |
|
|
|
def analyze_document(self, sentences): |
|
fraud_indicators = [] |
|
for sentence in sentences: |
|
prompt = ( |
|
f"Analyze the following sentence for potential healthcare fraud indicators, " |
|
f"such as consent violations, medication misuse, or billing irregularities. " |
|
f"Provide a reason and confidence score (0-1). " |
|
f"Sentence: {sentence}\nOutput format: {{'fraud_detected': bool, 'reason': str, 'confidence': float}}" |
|
) |
|
inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to(self.device) |
|
|
|
with torch.no_grad(): |
|
outputs = self.model.generate( |
|
**inputs, |
|
max_new_tokens=256, |
|
temperature=0.7, |
|
top_p=0.9, |
|
do_sample=True |
|
) |
|
|
|
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
try: |
|
result = eval(response) if response.startswith("{") else {"fraud_detected": False, "reason": "Invalid response", "confidence": 0.0} |
|
if result["fraud_detected"]: |
|
fraud_indicators.append({ |
|
"sentence": sentence, |
|
"reason": result["reason"], |
|
"confidence": result["confidence"] |
|
}) |
|
except: |
|
continue |
|
|
|
return fraud_indicators |