adds flash attention
Browse files
app.py
CHANGED
@@ -12,6 +12,7 @@ try:
|
|
12 |
"openai/gpt-oss-20b",
|
13 |
torch_dtype="auto",
|
14 |
device_map="auto",
|
|
|
15 |
)
|
16 |
tokenizer = AutoTokenizer.from_pretrained("openai/gpt-oss-20b")
|
17 |
|
|
|
12 |
"openai/gpt-oss-20b",
|
13 |
torch_dtype="auto",
|
14 |
device_map="auto",
|
15 |
+
attn_implementation="kernel-community/vllm-flash-attention3"
|
16 |
)
|
17 |
tokenizer = AutoTokenizer.from_pretrained("openai/gpt-oss-20b")
|
18 |
|