fix eval regression caused in 13f7efaf74fcd3c4514277ccb71914c589873f6a
Browse files
src/axolotl/monkeypatch/llama_attn_hijack_flash.py
CHANGED
|
@@ -155,12 +155,10 @@ def flashattn_forward(
|
|
| 155 |
# during training q,k,v always have same seqlen
|
| 156 |
assert key_states.shape == query_states.shape
|
| 157 |
is_causal = True
|
| 158 |
-
elif past_key_value is None:
|
| 159 |
-
is_causal = True
|
| 160 |
else:
|
| 161 |
# turn off FA causal mask after first inference autoregressive iteration
|
| 162 |
# only on first autoregressive step q,k,v have same seqlen
|
| 163 |
-
is_causal =
|
| 164 |
|
| 165 |
if cu_seqlens is not None and max_seqlen is not None:
|
| 166 |
# special handling using sample packing
|
|
|
|
| 155 |
# during training q,k,v always have same seqlen
|
| 156 |
assert key_states.shape == query_states.shape
|
| 157 |
is_causal = True
|
|
|
|
|
|
|
| 158 |
else:
|
| 159 |
# turn off FA causal mask after first inference autoregressive iteration
|
| 160 |
# only on first autoregressive step q,k,v have same seqlen
|
| 161 |
+
is_causal = key_states.shape == query_states.shape
|
| 162 |
|
| 163 |
if cu_seqlens is not None and max_seqlen is not None:
|
| 164 |
# special handling using sample packing
|