multimodalart HF Staff commited on
Commit
6402797
·
1 Parent(s): b7c78c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -9,7 +9,7 @@ from threading import Thread
9
 
10
  print(f"Starting to load the model to memory")
11
  m = AutoModelForCausalLM.from_pretrained(
12
- "stabilityai/stablelm-tuned-alpha-7b", device_map="auto", load_in_8bit=True).cuda()
13
  tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-7b")
14
  generator = pipeline('text-generation', model=m, tokenizer=tok, device=0)
15
  print(f"Sucessfully loaded the model to the memory")
 
9
 
10
  print(f"Starting to load the model to memory")
11
  m = AutoModelForCausalLM.from_pretrained(
12
+ "stabilityai/stablelm-tuned-alpha-7b", torch_dtype=torch.float16).cuda()
13
  tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-7b")
14
  generator = pipeline('text-generation', model=m, tokenizer=tok, device=0)
15
  print(f"Sucessfully loaded the model to the memory")