orionweller commited on
Commit
86087e8
·
1 Parent(s): dc29b43
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -25,7 +25,7 @@ logger = logging.getLogger(__name__)
25
  login(token=os.environ['HF_TOKEN'])
26
 
27
  # Global variables
28
- CUR_MODEL = "./repllama-instruct-hard-positives-v2-joint-full-weights"
29
  BASE_MODEL = "meta-llama/Llama-2-7b-hf"
30
  tokenizer = None
31
  model = None
@@ -62,15 +62,15 @@ def create_batch_dict(tokenizer, input_texts, max_length=512):
62
  )
63
 
64
  def load_model():
65
- global tokenizer, model, CUR_MODEL
66
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
67
  tokenizer.pad_token_id = tokenizer.eos_token_id
68
  tokenizer.pad_token = tokenizer.eos_token
69
  tokenizer.padding_side = "right"
70
 
71
- model = AutoModel.from_pretrained(CUR_MODEL, max_memory={"cpu": "12GiB"}, torch_dtype=torch.bfloat16, offload_state_dict=True)
72
- # base_model_instance = AutoModel.from_pretrained(BASE_MODEL)
73
- # model = PeftModel.from_pretrained(base_model_instance, CUR_MODEL)
74
  model.eval()
75
 
76
 
 
25
  login(token=os.environ['HF_TOKEN'])
26
 
27
  # Global variables
28
+ CUR_MODEL = "./repllama-instruct-hard-positives-v2-joint"
29
  BASE_MODEL = "meta-llama/Llama-2-7b-hf"
30
  tokenizer = None
31
  model = None
 
62
  )
63
 
64
  def load_model():
65
+ global tokenizer, model, CUR_MODEL, BASE_MODEL
66
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
67
  tokenizer.pad_token_id = tokenizer.eos_token_id
68
  tokenizer.pad_token = tokenizer.eos_token
69
  tokenizer.padding_side = "right"
70
 
71
+ # model = AutoModel.from_pretrained(CUR_MODEL, max_memory={"cpu": "12GiB"}, torch_dtype=torch.bfloat16, offload_state_dict=True)
72
+ base_model_instance = AutoModel.from_pretrained(BASE_MODEL)
73
+ model = PeftModel.from_pretrained(base_model_instance, CUR_MODEL)
74
  model.eval()
75
 
76