Spaces:

lucalp
/

blt-entropy-patcher

Running on Zero

lucalp commited on May 1

Commit

661d10b

1 Parent(s): 86969f4

xformers when cuda available

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import spaces
 import os
 import gradio as gr
 import torch
@@ -30,7 +31,7 @@ class Config:
     # Bytelatent Specific
     BLT_WEIGHTS_DIR: str = "hf-weights"
-    BLT_MAX_BYTES_FOR_DEMO: int = 512 # Limit for this specific demo's entropy model
     # Gradio
     DEFAULT_PROMPT: str = "Daenerys Targaryen is in Game of Thrones, a fantasy epic by George R.R. Martin."
@@ -158,7 +159,7 @@ class BytelatentProcessor:
         return highlighted_data, patch_count
-    def process(self, prompt: str, max_bytes: int) -> Tuple[Optional[matplotlib.figure.Figure], List[Tuple[str, str]], int, str]:
         """Processes the prompt using the loaded Bytelatent model."""
         status = ""
         if not self.is_available or self.tokenizer is None or self.patcher is None:

 import spaces
+import math
 import os
 import gradio as gr
 import torch
     # Bytelatent Specific
     BLT_WEIGHTS_DIR: str = "hf-weights"
+    BLT_MAX_BYTES_FOR_DEMO: float = math.inf # Limit for this specific demo's entropy model
     # Gradio
     DEFAULT_PROMPT: str = "Daenerys Targaryen is in Game of Thrones, a fantasy epic by George R.R. Martin."
         return highlighted_data, patch_count
+    def process(self, prompt: str, max_bytes: float) -> Tuple[Optional[matplotlib.figure.Figure], List[Tuple[str, str]], int, str]:
         """Processes the prompt using the loaded Bytelatent model."""
         status = ""
         if not self.is_available or self.tokenizer is None or self.patcher is None:

bytelatent/entropy_model.py CHANGED Viewed

@@ -28,7 +28,7 @@ def load_entropy_model(entropy_model_checkpoint_dir, state_dict_path, device="cp
             ffn_dim_multiplier=model_params["ffn_dim_multiplier"],
             vocab_size=model_params["vocab_size"],
             attn_bias_type="causal",
-            attn_impl="sdpa",
             sliding_window=512,
         )
     )

             ffn_dim_multiplier=model_params["ffn_dim_multiplier"],
             vocab_size=model_params["vocab_size"],
             attn_bias_type="causal",
+            attn_impl="xformers" if torch.cuda.is_available() else "sdpa",
             sliding_window=512,
         )
     )