ssaroya
/

gptq_model

Model card Files Files and versions Community

ssaroya commited on May 22, 2023

Commit

8083870

·

1 Parent(s): 66acbfe

Update handler.py

Files changed (1) hide show

handler.py +6 -0

handler.py CHANGED Viewed

@@ -5,9 +5,11 @@ from typing import Dict, Any
 from gptq import GPTQ
 from utils import find_layers, DEV
 from transformers import AutoTokenizer, LlamaConfig, LlamaForCausalLM
 class EndpointHandler:
     def __init__(self,
                  model_name="Wizard-Vicuna-13B-Uncensored-GPTQ",
                  checkpoint_path="Wizard-Vicuna-13B-Uncensored-GPTQ/Wizard-Vicuna-13B-Uncensored-GPTQ-4bit-128g.compat.no-act-order.safetensors",
                  wbits = 4,
@@ -16,6 +18,10 @@ class EndpointHandler:
                  eval=True,
                  warmup_autotune=True):
         self.model = self.load_quant(model_name, checkpoint_path, wbits, groupsize, fused_mlp, eval, warmup_autotune)
         self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
         self.model.to(DEV)

 from gptq import GPTQ
 from utils import find_layers, DEV
 from transformers import AutoTokenizer, LlamaConfig, LlamaForCausalLM
+import os
 class EndpointHandler:
     def __init__(self,
+                 path="",
                  model_name="Wizard-Vicuna-13B-Uncensored-GPTQ",
                  checkpoint_path="Wizard-Vicuna-13B-Uncensored-GPTQ/Wizard-Vicuna-13B-Uncensored-GPTQ-4bit-128g.compat.no-act-order.safetensors",
                  wbits = 4,
                  eval=True,
                  warmup_autotune=True):
+        model_name = os.path.join(path, model_name)
+        checkpoint_path = os.path.join(path, checkpoint_path)
         self.model = self.load_quant(model_name, checkpoint_path, wbits, groupsize, fused_mlp, eval, warmup_autotune)
         self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
         self.model.to(DEV)