Spaces:
Sleeping
Sleeping
Commit
·
86087e8
1
Parent(s):
dc29b43
add cuda
Browse files
app.py
CHANGED
@@ -25,7 +25,7 @@ logger = logging.getLogger(__name__)
|
|
25 |
login(token=os.environ['HF_TOKEN'])
|
26 |
|
27 |
# Global variables
|
28 |
-
CUR_MODEL = "./repllama-instruct-hard-positives-v2-joint
|
29 |
BASE_MODEL = "meta-llama/Llama-2-7b-hf"
|
30 |
tokenizer = None
|
31 |
model = None
|
@@ -62,15 +62,15 @@ def create_batch_dict(tokenizer, input_texts, max_length=512):
|
|
62 |
)
|
63 |
|
64 |
def load_model():
|
65 |
-
global tokenizer, model, CUR_MODEL
|
66 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
67 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
68 |
tokenizer.pad_token = tokenizer.eos_token
|
69 |
tokenizer.padding_side = "right"
|
70 |
|
71 |
-
model = AutoModel.from_pretrained(CUR_MODEL, max_memory={"cpu": "12GiB"}, torch_dtype=torch.bfloat16, offload_state_dict=True)
|
72 |
-
|
73 |
-
|
74 |
model.eval()
|
75 |
|
76 |
|
|
|
25 |
login(token=os.environ['HF_TOKEN'])
|
26 |
|
27 |
# Global variables
|
28 |
+
CUR_MODEL = "./repllama-instruct-hard-positives-v2-joint"
|
29 |
BASE_MODEL = "meta-llama/Llama-2-7b-hf"
|
30 |
tokenizer = None
|
31 |
model = None
|
|
|
62 |
)
|
63 |
|
64 |
def load_model():
|
65 |
+
global tokenizer, model, CUR_MODEL, BASE_MODEL
|
66 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
67 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
68 |
tokenizer.pad_token = tokenizer.eos_token
|
69 |
tokenizer.padding_side = "right"
|
70 |
|
71 |
+
# model = AutoModel.from_pretrained(CUR_MODEL, max_memory={"cpu": "12GiB"}, torch_dtype=torch.bfloat16, offload_state_dict=True)
|
72 |
+
base_model_instance = AutoModel.from_pretrained(BASE_MODEL)
|
73 |
+
model = PeftModel.from_pretrained(base_model_instance, CUR_MODEL)
|
74 |
model.eval()
|
75 |
|
76 |
|