Spaces:
Running
on
Zero
Running
on
Zero
Update llm.py
Browse files
llm.py
CHANGED
@@ -9,6 +9,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
9 |
from transformers import TextIteratorStreamer
|
10 |
import threading
|
11 |
import torch
|
|
|
12 |
|
13 |
import logging
|
14 |
logger = logging.getLogger(__name__)
|
@@ -79,6 +80,7 @@ def build_messages(
|
|
79 |
#
|
80 |
# Stream response
|
81 |
#
|
|
|
82 |
@torch.inference_mode()
|
83 |
def stream_response(
|
84 |
messages: list[dict],
|
|
|
9 |
from transformers import TextIteratorStreamer
|
10 |
import threading
|
11 |
import torch
|
12 |
+
import spaces
|
13 |
|
14 |
import logging
|
15 |
logger = logging.getLogger(__name__)
|
|
|
80 |
#
|
81 |
# Stream response
|
82 |
#
|
83 |
+
@spaces.GPU
|
84 |
@torch.inference_mode()
|
85 |
def stream_response(
|
86 |
messages: list[dict],
|