Cylanoid commited on
Commit
e5f8a81
·
verified ·
1 Parent(s): 4329ec1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -4
app.py CHANGED
@@ -33,14 +33,11 @@ huggingface_hub.login(token=LLama)
33
  MODEL_ID = "meta-llama/Llama-2-7b-hf"
34
  tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
35
 
36
- # Check CUDA and enable Flash Attention if supported
37
- use_flash_attention = torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
38
- attn_implementation = "flash_attention_2" if use_flash_attention else "eager" # Default to eager if no compatible GPU
39
  model = LlamaForCausalLM.from_pretrained(
40
  MODEL_ID,
41
  torch_dtype=torch.bfloat16,
42
  device_map="auto",
43
- attn_implementation=attn_implementation,
44
  load_in_8bit=True
45
  )
46
 
 
33
  MODEL_ID = "meta-llama/Llama-2-7b-hf"
34
  tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
35
 
36
+ # Load model with default attention mechanism (no Flash Attention)
 
 
37
  model = LlamaForCausalLM.from_pretrained(
38
  MODEL_ID,
39
  torch_dtype=torch.bfloat16,
40
  device_map="auto",
 
41
  load_in_8bit=True
42
  )
43