Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,9 +9,11 @@ from threading import Thread
|
|
| 9 |
|
| 10 |
print(f"Starting to load the model to memory")
|
| 11 |
m = AutoModelForCausalLM.from_pretrained(
|
| 12 |
-
"stabilityai/stablelm-2-zephyr-1_6b", torch_dtype=torch.
|
| 13 |
tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b", trust_remote_code=True)
|
| 14 |
-
|
|
|
|
|
|
|
| 15 |
print(f"Sucessfully loaded the model to the memory")
|
| 16 |
|
| 17 |
|
|
@@ -31,7 +33,7 @@ def chat(message, history):
|
|
| 31 |
chat.append({"role": "user", "content": message})
|
| 32 |
messages = tok.apply_chat_template(chat, tokenize=False)
|
| 33 |
# Tokenize the messages string
|
| 34 |
-
model_inputs = tok([messages], return_tensors="pt")
|
| 35 |
streamer = TextIteratorStreamer(
|
| 36 |
tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
| 37 |
generate_kwargs = dict(
|
|
|
|
| 9 |
|
| 10 |
print(f"Starting to load the model to memory")
|
| 11 |
m = AutoModelForCausalLM.from_pretrained(
|
| 12 |
+
"stabilityai/stablelm-2-zephyr-1_6b", torch_dtype=torch.float16, trust_remote_code=True)
|
| 13 |
tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b", trust_remote_code=True)
|
| 14 |
+
# using CUDA for an optimal experience
|
| 15 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 16 |
+
m = m.to(device)
|
| 17 |
print(f"Sucessfully loaded the model to the memory")
|
| 18 |
|
| 19 |
|
|
|
|
| 33 |
chat.append({"role": "user", "content": message})
|
| 34 |
messages = tok.apply_chat_template(chat, tokenize=False)
|
| 35 |
# Tokenize the messages string
|
| 36 |
+
model_inputs = tok([messages], return_tensors="pt").to(device)
|
| 37 |
streamer = TextIteratorStreamer(
|
| 38 |
tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
| 39 |
generate_kwargs = dict(
|