stablelm-tuned-alpha-chat

Paused

multimodalart HF Staff commited on Apr 20, 2023

Commit

6402797

1 Parent(s): b7c78c8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from threading import Thread
 print(f"Starting to load the model to memory")
 m = AutoModelForCausalLM.from_pretrained(
-    "stabilityai/stablelm-tuned-alpha-7b", device_map="auto", load_in_8bit=True).cuda()
 tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-7b")
 generator = pipeline('text-generation', model=m, tokenizer=tok, device=0)
 print(f"Sucessfully loaded the model to the memory")

 print(f"Starting to load the model to memory")
 m = AutoModelForCausalLM.from_pretrained(
+    "stabilityai/stablelm-tuned-alpha-7b", torch_dtype=torch.float16).cuda()
 tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-7b")
 generator = pipeline('text-generation', model=m, tokenizer=tok, device=0)
 print(f"Sucessfully loaded the model to the memory")