handraise-dev
/

gguf-inference

Text Generation

Model card Files Files and versions Community

syberWolf commited on Jul 4, 2024

Commit

7e91a22

·

1 Parent(s): f96aa72

updates for phi

Files changed (2) hide show

handler.py +17 -8
requirements.txt +4 -1

handler.py CHANGED Viewed

@@ -6,22 +6,31 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
-        tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", trust_remote_code=True)
         model = AutoModelForCausalLM.from_pretrained(
             "microsoft/Phi-3-mini-128k-instruct",
-            trust_remote_code=True
-        )
         # create inference pipeline
         self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         inputs = data.pop("inputs", data)
-        for key in ['stop_sequences', 'watermark', 'stop']:
-            if key in inputs:
-                del inputs[key]
         parameters = data.pop("parameters", None)
         # pass inputs with all kwargs in data
         if parameters is not None:

 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
+        tokenizer = tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")
         model = AutoModelForCausalLM.from_pretrained(
             "microsoft/Phi-3-mini-128k-instruct",
+            device_map="cuda",
+            torch_dtype="auto",
+            trust_remote_code=True,
+        )
         # create inference pipeline
         self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", None)
+        # Print parameters for debugging
+        print("Parameters before cleaning:", parameters)
+        # Remove unwanted keys from parameters
+        if parameters is not None:
+            for key in ['stop_sequences', 'watermark', 'stop']:
+                if key in parameters:
+                    del parameters[key]
+        # Print parameters after cleaning
+        print("Parameters after cleaning:", parameters)
         # pass inputs with all kwargs in data
         if parameters is not None:

requirements.txt CHANGED Viewed

	@@ -1 +1,4 @@
1	- ~~flash-attn~~==~~latest~~

+flash_attn==2.5.8
+torch==2.3.1
+accelerate==0.31.0
+transformers==4.41.2