Spaces:

Dovakiins
/

qwerrwe

Build error

tmm1 commited on Aug 21, 2023

Commit

a213d99

1 Parent(s): fbf49a4

fix eval regression caused in 13f7efaf74fcd3c4514277ccb71914c589873f6a

Files changed (1) hide show

src/axolotl/monkeypatch/llama_attn_hijack_flash.py CHANGED Viewed

@@ -155,12 +155,10 @@ def flashattn_forward(
         # during training q,k,v always have same seqlen
         assert key_states.shape == query_states.shape
         is_causal = True
-    elif past_key_value is None:
-        is_causal = True
     else:
         # turn off FA causal mask after first inference autoregressive iteration
         # only on first autoregressive step q,k,v have same seqlen
-        is_causal = past_key_value is not None
     if cu_seqlens is not None and max_seqlen is not None:
         # special handling using sample packing

         # during training q,k,v always have same seqlen
         assert key_states.shape == query_states.shape
         is_causal = True
     else:
         # turn off FA causal mask after first inference autoregressive iteration
         # only on first autoregressive step q,k,v have same seqlen
+        is_causal = key_states.shape == query_states.shape
     if cu_seqlens is not None and max_seqlen is not None:
         # special handling using sample packing