Spaces:

Cylanoid
/

Nursing-Home-Fraud-Detection-using-Llama

Paused

Cylanoid commited on Mar 7

Commit

c417ed1

verified ·

1 Parent(s): 2e7ec4a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -35,11 +35,12 @@ tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 # Check CUDA and enable Flash Attention if supported
 use_flash_attention = torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
 model = LlamaForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
     device_map="auto",
-    use_flash_attention_2=use_flash_attention,
     load_in_8bit=True
 )

 # Check CUDA and enable Flash Attention if supported
 use_flash_attention = torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
+attn_implementation = "flash_attention_2" if use_flash_attention else "eager"  # Default to eager if no compatible GPU
 model = LlamaForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
     device_map="auto",
+    attn_implementation=attn_implementation,
     load_in_8bit=True
 )