Spaces:

rodrigomasini
/

rephrase

Paused

rodrigomasini commited on Nov 7, 2023

Commit

fd4c28d

1 Parent(s): 9e7be30

Update app_v3.py

Files changed (1) hide show

app_v3.py CHANGED Viewed

@@ -3,6 +3,7 @@ from transformers import AutoTokenizer, TextStreamer, pipeline
 from auto_gptq import AutoGPTQForCausalLM
 from huggingface_hub import snapshot_download
 import os
 # Define pretrained and quantized model directories
 pretrained_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
@@ -22,6 +23,10 @@ model_basename = "Jackson2-4bit-128g-GPTQ"
 #os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 use_triton = False
 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, legacy=False)

 from auto_gptq import AutoGPTQForCausalLM
 from huggingface_hub import snapshot_download
 import os
+import gc
 # Define pretrained and quantized model directories
 pretrained_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
 #os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+# Before allocating or loading the model, clear up memory
+gc.collect()
+torch.cuda.empty_cache()
 use_triton = False
 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, legacy=False)