document-summarization

Runtime error

pszemraj commited on Apr 30, 2023

Commit

8312087

1 Parent(s): 04190ea

⚡️ improve performance, enable longer text

Signed-off-by: peter szemraj <[email protected]>

Files changed (3) hide show

app.py CHANGED Viewed

@@ -73,11 +73,11 @@ def predict(
         batch_length=token_batch_length,
         **settings,
     )
     del model
     del tokenizer
     gc.collect()
     return summaries
@@ -89,7 +89,7 @@ def proc_submission(
     length_penalty: float,
     repetition_penalty: float,
     no_repeat_ngram_size: int,
-    max_input_length: int = 2048,
 ):
     """
     proc_submission - a helper function for the gradio module to process submissions

         batch_length=token_batch_length,
         **settings,
     )
     del model
     del tokenizer
     gc.collect()
     return summaries
     length_penalty: float,
     repetition_penalty: float,
     no_repeat_ngram_size: int,
+    max_input_length: int = 4096,
 ):
     """
     proc_submission - a helper function for the gradio module to process submissions

summarize.py CHANGED Viewed

@@ -6,6 +6,8 @@ import torch
 from tqdm.auto import tqdm
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 def load_model_and_tokenizer(model_name: str) -> tuple:
     """
@@ -24,6 +26,15 @@ def load_model_and_tokenizer(model_name: str) -> tuple:
     logging.info(f"Loaded model {model_name} to {device}")
     return model, tokenizer

 from tqdm.auto import tqdm
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+from utils import validate_pytorch2
 def load_model_and_tokenizer(model_name: str) -> tuple:
     """
     logging.info(f"Loaded model {model_name} to {device}")
+    if validate_pytorch2():
+        try:
+            logging.info("Compiling model with Torch 2.0")
+            model = torch.compile(model)
+        except Exception as e:
+            logging.warning(f"Could not compile model with Torch 2.0: {e}")
+    else:
+        logging.info("Torch 2.0 not detected, skipping compilation")
     return model, tokenizer

utils.py CHANGED Viewed

@@ -3,10 +3,20 @@
 """
 import re
-from pathlib import Path
 from datetime import datetime
 from natsort import natsorted
-import subprocess
 def get_timestamp() -> str:
@@ -114,7 +124,6 @@ def saves_summary(summarize_output, outpath: str or Path = None, add_signature=T
         outpath,
         "a",
     ) as fo:
         fo.write("\n" * 3)
         fo.write(f"\n\nSection Scores:\n")
         fo.writelines(scores_text)

 """
 import re
+import subprocess
 from datetime import datetime
+from pathlib import Path
+import torch
 from natsort import natsorted
+def validate_pytorch2(torch_version: str = None):
+    torch_version = torch.__version__ if torch_version is None else torch_version
+    pattern = r"^2\.\d+(\.\d+)*"
+    return True if re.match(pattern, torch_version) else False
 def get_timestamp() -> str:
         outpath,
         "a",
     ) as fo:
         fo.write("\n" * 3)
         fo.write(f"\n\nSection Scores:\n")
         fo.writelines(scores_text)