assentian1970 commited on
Commit
455abc6
·
verified ·
1 Parent(s): b8c4add

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -3
app.py CHANGED
@@ -36,13 +36,12 @@ parser.add_argument("--port", type=int, default=7860)
36
  args = parser.parse_args()
37
  device = args.device
38
 
39
- # Load model and tokenizer
40
- model_path = './iic/mPLUG-Owl3-7B-240728'
41
  model = AutoModel.from_pretrained(
42
  model_path,
43
  trust_remote_code=True,
44
  torch_dtype=torch.bfloat16 if 'int4' not in model_path else torch.float32,
45
- attn_implementation="flash_attention_2" if device == 'cuda' else None
46
  ).to(device)
47
 
48
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 
36
  args = parser.parse_args()
37
  device = args.device
38
 
39
+ # Replace the model loading section with:
 
40
  model = AutoModel.from_pretrained(
41
  model_path,
42
  trust_remote_code=True,
43
  torch_dtype=torch.bfloat16 if 'int4' not in model_path else torch.float32,
44
+ attn_implementation="sdpa" # Use scaled dot-product attention instead of flash-attn
45
  ).to(device)
46
 
47
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)