Spaces:

ejschwartz
/

nova-6.7b

Sleeping

ejschwartz commited on May 1

Commit

6e800de

1 Parent(s): f81d7bd

Revert modeling_nova

Files changed (1) hide show

modeling_nova.py CHANGED Viewed

@@ -5,7 +5,7 @@ import torch.nn as nn
 import torch.nn.functional as F
 from typing import Tuple, List, Optional
 from transformers import LlamaModel, LlamaConfig, LlamaForCausalLM
-from transformers.models.llama.modeling_llama import LlamaDecoderLayer, LlamaMLP, LlamaRMSNorm
 from transformers.models.llama.modeling_llama import LlamaSdpaAttention, apply_rotary_pos_emb, repeat_kv
 from transformers import logging, Cache, DynamicCache, StaticCache
 from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
@@ -421,10 +421,7 @@ class NovaModel(LlamaModel):
         if position_ids is None:
             position_ids = cache_position.unsqueeze(0)
-        if past_seen_tokens == 0:
-            past_seen_tokens = None
-        causal_mask = self._update_causal_mask(attention_mask, inputs_embeds, cache_position, past_seen_tokens, output_attentions=False)
         # apply the nova attention
         if nova_attention_mask is not None:
@@ -667,4 +664,4 @@ class NovaForCausalLM(LlamaForCausalLM, NovaGenerationMixin):
                 "no_mask_idx": kwargs.get("no_mask_idx")
             }
         )
-        return model_inputs

 import torch.nn.functional as F
 from typing import Tuple, List, Optional
 from transformers import LlamaModel, LlamaConfig, LlamaForCausalLM
+from transformers.models.llama.modeling_llama import LlamaDecoderLayer, LLAMA_ATTENTION_CLASSES, LlamaMLP, LlamaRMSNorm
 from transformers.models.llama.modeling_llama import LlamaSdpaAttention, apply_rotary_pos_emb, repeat_kv
 from transformers import logging, Cache, DynamicCache, StaticCache
 from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
         if position_ids is None:
             position_ids = cache_position.unsqueeze(0)
+        causal_mask = self._update_causal_mask(attention_mask, inputs_embeds, cache_position, past_seen_tokens)
         # apply the nova attention
         if nova_attention_mask is not None:
                 "no_mask_idx": kwargs.get("no_mask_idx")
             }
         )
+        return model_inputs