Tonic commited on
Commit
80434d8
·
verified ·
1 Parent(s): 1247004

adds flash attention

Browse files
Files changed (1) hide show
  1. app.py +1 -0
app.py CHANGED
@@ -12,6 +12,7 @@ try:
12
  "openai/gpt-oss-20b",
13
  torch_dtype="auto",
14
  device_map="auto",
 
15
  )
16
  tokenizer = AutoTokenizer.from_pretrained("openai/gpt-oss-20b")
17
 
 
12
  "openai/gpt-oss-20b",
13
  torch_dtype="auto",
14
  device_map="auto",
15
+ attn_implementation="kernel-community/vllm-flash-attention3"
16
  )
17
  tokenizer = AutoTokenizer.from_pretrained("openai/gpt-oss-20b")
18