Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,8 +9,7 @@ import spaces
|
|
9 |
import torch
|
10 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, AutoModelForImageTextToText
|
11 |
|
12 |
-
DESCRIPTION = """#
|
13 |
-
開頭有<think>"""
|
14 |
|
15 |
if not torch.cuda.is_available():
|
16 |
DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
|
@@ -34,7 +33,7 @@ CHAT_TEMPLATE="""{%- set default_system_message = "A user will ask you to solve
|
|
34 |
|
35 |
{%- for message in loop_messages %}
|
36 |
{%- if message['role'] == 'user' %}
|
37 |
-
{{- '[INST]' + message['content'] + '[/INST]
|
38 |
{%- elif message['role'] == 'system' %}
|
39 |
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
|
40 |
{%- elif message['role'] == 'assistant' %}
|
@@ -47,7 +46,7 @@ CHAT_TEMPLATE="""{%- set default_system_message = "A user will ask you to solve
|
|
47 |
if torch.cuda.is_available():
|
48 |
model_id = "mistralai/Mistral-Small-24B-Instruct-2501"
|
49 |
model = AutoModelForImageTextToText.from_pretrained("AlexHung29629/add_vision_3", torch_dtype=torch.bfloat16, device_map="auto")
|
50 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
51 |
|
52 |
|
53 |
@spaces.GPU
|
@@ -62,7 +61,7 @@ def generate(
|
|
62 |
) -> Iterator[str]:
|
63 |
conversation = [*chat_history, {"role": "user", "content": message}]
|
64 |
|
65 |
-
input_ids = tokenizer.apply_chat_template(conversation,
|
66 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
67 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
68 |
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
|
|
|
9 |
import torch
|
10 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, AutoModelForImageTextToText
|
11 |
|
12 |
+
DESCRIPTION = """# 測試"""
|
|
|
13 |
|
14 |
if not torch.cuda.is_available():
|
15 |
DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
|
|
|
33 |
|
34 |
{%- for message in loop_messages %}
|
35 |
{%- if message['role'] == 'user' %}
|
36 |
+
{{- '[INST]' + message['content'] + '[/INST]' }}
|
37 |
{%- elif message['role'] == 'system' %}
|
38 |
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
|
39 |
{%- elif message['role'] == 'assistant' %}
|
|
|
46 |
if torch.cuda.is_available():
|
47 |
model_id = "mistralai/Mistral-Small-24B-Instruct-2501"
|
48 |
model = AutoModelForImageTextToText.from_pretrained("AlexHung29629/add_vision_3", torch_dtype=torch.bfloat16, device_map="auto")
|
49 |
+
tokenizer = AutoTokenizer.from_pretrained("AlexHung29629/add_vision_3")
|
50 |
|
51 |
|
52 |
@spaces.GPU
|
|
|
61 |
) -> Iterator[str]:
|
62 |
conversation = [*chat_history, {"role": "user", "content": message}]
|
63 |
|
64 |
+
input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
|
65 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
66 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
67 |
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
|